#include <stdlib.h> // posix_memalign #include <stdio.h> // printf #define alignment_bytes 64 int main(int argc, char *argv[]) { size_t size = 100; // allocate aligned memory double *a = (double *) aligned_alloc(alignment_bytes, size * sizeof(double)); // vectorized loops #pragma omp simd aligned (a : alignment_bytes) for (size_t i = 0; i < size; i++) // line 14 a[i] = 1.0; // line 15 #pragma omp simd aligned (a : alignment_bytes) safelen(4) for (size_t i = 0; i < size - 4; i++) // line 18 a[i] += a[ i + 4]; // line 19 printf( "a[ 0]: %f\n", a[0] ); // 2.000000 printf( "a[size - 1]: %f\n", a[size - 1] ); // 1.000000 // free allocated memory free(a); } /* GCC $ module add compiler/gnu/7.1 $ gcc -O2 -fopenmp -std=c11 \ -fopt-info-vec \ openmp_simd.c -o openmp_simd openmp_simd.c:19:14: note: loop vectorized openmp_simd.c:15:10: note: loop vectorized $ gcc -O2 -std=c11 \ -fopt-info-vec \ openmp_simd.c -o openmp_simd -> No vectorization without OpenMP */ /* Intel $ module add compiler/intel/18.0 $ icc -O2 -qopenmp -std=c11 \ -qopt-report -qopt-report-phase=vec -qopt-report-stdout \ openmp_simd.c -o openmp_simd ... LOOP BEGIN at openmp_simd.c(14,19) remark #15301: OpenMP SIMD LOOP WAS VECTORIZED LOOP END LOOP BEGIN at openmp_simd.c(14,19) <Remainder loop for vectorization> LOOP END LOOP BEGIN at openmp_simd.c(18,19) remark #15301: OpenMP SIMD LOOP WAS VECTORIZED LOOP END $ icc -O2 -std=c11 \ -qopt-report -qopt-report-phase=vec -qopt-report-stdout \ openmp_simd.c -o openmp_simd ... LOOP BEGIN at openmp_simd.c(14,5) <Peeled loop for vectorization> LOOP END LOOP BEGIN at openmp_simd.c(14,5) remark #15300: LOOP WAS VECTORIZED LOOP END LOOP BEGIN at openmp_simd.c(14,5) <Remainder loop for vectorization> LOOP END LOOP BEGIN at openmp_simd.c(18,5) <Peeled loop for vectorization> LOOP END LOOP BEGIN at openmp_simd.c(18,5) remark #15300: LOOP WAS VECTORIZED LOOP END LOOP BEGIN at openmp_simd.c(18,5) <Remainder loop for vectorization> remark #15301: REMAINDER LOOP WAS VECTORIZED LOOP END LOOP BEGIN at openmp_simd.c(18,5) <Remainder loop for vectorization> LOOP END -> Extra peeling loop without OpenMP */