#ifdef _OPENACC
    #include <openacc.h>
#else
    #error "OpenACC support required"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <err.h>

int main(int arg, char *argv[]) {
    const unsigned int size = 1 << 16;

    // Platform information
    acc_device_t devicetype = acc_device_default;
    int num_devices = acc_get_num_devices(devicetype);
    int device_num  = acc_get_device_num(devicetype);
    acc_set_device_num(device_num, devicetype);
    // 201306 -> 2.0
    // 201510 -> 2.5
    // 201711 -> 2.6
    printf("OpenACC version (yyyymm): %i\n", _OPENACC);
    printf("Number of OpenACC devices: %i\n", num_devices);
    printf("OpenACC Device number: %i\n", device_num);
    // acc_get_property, acc_get_property_string introduced with OpenACC 2.6
    #if _OPENACC >= 201711
    long int    mem             = acc_get_property(       device_num, acc_device_current, acc_property_memory);
    long int    free_mem        = acc_get_property(       device_num, acc_device_current, acc_property_free_memory);
    const char *property_name   = acc_get_property_string(device_num, acc_device_current, acc_property_name);
    const char *property_vendor = acc_get_property_string(device_num, acc_device_current, acc_property_vendor );
    const char *property_driver = acc_get_property_string(device_num, acc_device_current, acc_property_driver );
    printf("Memory on OpenACC device: %li\n", mem);
    printf("Free Memory on OpenACC device: %li\n", free_mem);
    if (property_name != NULL) {
        printf("OpenACC device name: %s\n", property_name);
    }
    if (property_vendor != NULL) {
        printf("OpenACC device vendor: %s\n", property_vendor);
    }
    if (property_driver != NULL) {
        printf("OpenACC device driver: %s\n", property_driver);
    }
    #endif

    printf("* Allocate memory on the host\n");
    double *a = (double *) malloc(size * sizeof(double));
    if (a == NULL) {
        errx(1, "malloc a[] failed");
    }

    printf("* Pre-process / initialize data on the host\n");
    printf("  e.g. read data from storage\n");
    for (int i = 0; i < size; i++) {
        a[i] = 1.;
    }

    printf("* Automatically allocate memory on the device\n");
    printf("* Copy data from the host to the device\n");
    #pragma acc enter data copyin(a[0:size])
    {}

    printf("* Compute on the device\n");
    #pragma acc parallel loop present(a[0:size]) wait
    for (int i = 0; i < size; i++) {
        a[i]++;
    }

    printf("* Transfer data back from the device to the host\n");
    #pragma acc update host(a[0:size])
    {}

    printf("* Delete data on the device\n");
    #pragma acc exit data delete(a[0:size])
    {}

    printf("* Post-process data on the host\n");
    printf("  e.g. write data to storage\n");
    for (int i = 0; i < size; i++) {
        if (a[i] != 2.) {
            errx(2, "Computation on GPU failed");
        }
    }

    printf("* Free memory on the host\n");
    free(a);

    return 0;
}