Research Article
Effective SIMD Vectorization for Intel Xeon Phi Coprocessors
Algorithm 5
An example of OpenMP
parallel for and SIMD combined usage.
| #pragma omp declare SIMD uniform(max_iter) SIMDlen(32) | | uint32_t mandel (fcomplex c, uint32_t max_iter) | | | // Computes number of iterations(count variable) | | // that it takes for parameter c to be known to | | // be outside mandelbrot set | | uint32_t count = 1; fcomplex z = c; | | for (int32_t i = 0; i < max_iter; i += 1) { | | z = z z + c; | | int t = (cabsf(z) < 2.0f); | | count += t; | | if (t == 0) { break;} | | } | | return count; | | | Caller site code: | | int main() { | | … … … | | #pragma omp parallel for schedule(guided) | | for (int32_t y = 0; y < ImageHeight; ++y) { | | float c_im = max_imag - y imag_factor; | | #pragma omp SIMD safelen(32) | | for (int32_t x = 0; x < ImageWidth; ++x) { | | fcomplex in_val; | | in_val = (min_real + xreal_factor) + (c_im1.0iF); | | countyx = mandel (in_val, max_iter); | | } | | } | | … … … | |
|