wiki:performance/compiler_optionen/gcc/example_vec_report_stream

Example: GCC compiler optimization report for benchmark stream

  • Code fragments benchmark stream
    void inline tuned_STREAM_Scale(STREAM_TYPE scalar) {                                   // L.557
        #pragma omp parallel shared(scalar)                                                // L.558
        {                                                                                  // L.559
            #ifdef __INTEL_COMPILER                                                        // L.560
                // Instructs the compiler to use non-temporal (that is, streaming) stores  // L.561
                #pragma vector nontemporal                                                 // L.562
            #endif                                                                         // L.563
            #pragma omp simd aligned (b, c : alignment_bytes)                              // L.564
            for (long int j = 0; j < STREAM_ARRAY_SIZE_thread; j++)                        // L.565
                b[j] = scalar*c[j];                                                        // L.566
        }                                                                                  // L.567
    }                                                                                      // L.568
                                                                                           // L.569
    void inline tuned_STREAM_Add() {                                                       // L.570
        #pragma omp parallel                                                               // L.571
        {                                                                                  // L.572
            #ifdef __INTEL_COMPILER                                                        // L.573
                // Instructs the compiler to use non-temporal (that is, streaming) stores  // L.574
                #pragma vector nontemporal                                                 // L.575
            #endif                                                                         // L.576
            #pragma omp simd aligned (a, b, c : alignment_bytes)                           // L.577
            for (long int j = 0; j < STREAM_ARRAY_SIZE_thread; j++)                        // L.578
                c[j] = a[j] + b[j];                                                        // L.579
        }                                                                                  // L.580
    }                                                                                      // L.581
    
  • Compile benchmark with vectorization report enabled
    module add compiler/gnu
    gcc -std=c11 -Ofast -march=native -flto -fopenmp \
        -fopt-info-vec \
        stream.c
    
  • Output
    ...
    stream.c:566:28: note: loop vectorized
    stream.c:579:21: note: loop vectorized
    ...
    
Last modified 12 months ago Last modified on Apr 9, 2018, 4:13:47 PM