Research Article
Multi-GPU Support on Single Node Using Directive-Based Programming Model
Algorithm 4
Single GPU implementation of 2D heat equation.
| void step_kernel{…} | | { | | #pragma acc parallel present(temp_in[0:ninj], temp_out[0:ninj]) | | num_gangs(32) vector_length(32) | | { | | // loop over all points in domain (except boundary) | | #pragma acc loop gang | | for (j=1; j < nj-1; j++) { | | #pragma acc loop vector | | for (i=1; i < ni-1; i++) { | | // find indices into linear memory | | // for central point and neighbours | | i00 = I2D(ni, i, j); | | im10 = I2D(ni, i-1, j); | | ip10 = I2D(ni, i+1, j); | | i0m1 = I2D(ni, i, j-1); | | i0p1 = I2D(ni, i, j+1); | | // evaluate derivatives | | d2tdx2 = temp_in[im10]-2temp_in[i00]+temp_in[ip10]; | | d2tdy2 = temp_in[i0m1]-2temp_in[i00]+temp_in[i0p1]; | | // update temperatures | | temp_out[i00] = temp_in[i00]+tfac(d2tdx2 + d2tdy2); | | } | | } | | } | | } | | #pragma acc data copy(temp1[0:ninj]) | | copyin(temp2[0:ninj]) | | { | | for (istep=0; istep < nstep; istep++) { | | step_kernel(ni, nj, tfac, temp1, temp2); | | // swap the temp pointers | | temp = temp1; | | temp1 = temp2; | | temp2 = temp; | | } | | } |
|