Mathematical Problems in Engineering

Research Article

GPU Preconditioning for Block Linear Systems Using Block Incomplete Sparse Approximate Inverses

GPU kernel for solving LGSBTS.

(1)	__shared__ double sm[warps_per_block][warp_size];
(2)	__shared__ double inv[warps_per_block][bsz][bsz];
(3)	__shared__ double ssol[warps_per_block][bsz][bsz];
(4)	tid = threadIdx.x + blockIdx.x blockDim.x;
(5)	wpid = threadIdx.x/warp_size;
(6)	gwpid = tid/warp_size;
(7)	rpos = threadIdx.x%warp size;
(8)	range = ;
(9)	if gwpid < nbr then
(10)	if rpos < range then
(11)	r = rpos%bsz;
(12)	c = (rpos/bsz)%bsz;
(13)	InvStIdx = NLColPtr[gwpid];
(14)	InvEdIdx = NLColPtr[gwpid + 1];
(15)	for idx = InvStIdx to InvEdIdx − 1 do
(16)	irow = NLRowVal[idx];
(17)	if rpos < then
(18)	tmpidx = LRowPtr[irow + 1] − 1;
(19)	ssol[wpid][r][c] = L[tmpidx + rpos];
(20)	inv[wpid][r][c] = 1.0/det ;
(21)	sm[wpid][rpos] = Rhs[idx + rpos];
(22)	ssol[wpid][r][c] = ;
(23)	NL[idx + rpos] = ssol[wpid][r][c];
(24)	end if
(25)	myidx = idx + 1 + ;
(26)	triEdIdx = triStIdx + (n − icol) ;
(27)	solIdx = solStIdx + + rpos;
(28)	while myidx < InvEdIdx do
(29)	sm[wpid][rpos] = 0.0;
(30)	myrow = NLRowVal[myidx];
(31)	tmpidx = LRowPtr[myrow + 1] − 1;
(32)	while tmpidx LRowPtr[myrow] && LColVal[tmpidx] == irow do
(33)	tmpidx−−;
(34)	end while
(35)	if tmpidx LRowPtr[myrow] && LColVal[tmpidx] == irow then
(36)	s[wpid][lane] = L[tmpidx + rpos%];
(37)	end if
(38)	Rhs[myidx + lane%]− = ;
(39)	myidx+ = warp_size/;
(40)	end while
(41)	end for
(42)	end if
(43)	end if