#include <algorithm> #include <err.h> #include <execution> #include <iostream> #include <ranges> using std::cout, std::endl; int main(int arg, char *argv[]) { const std::size_t size = 1 << 16; cout << "* Allocate memory on the host" << endl; double *a = (double *) malloc(size * sizeof(double)); if (a == nullptr) { errx(1, "malloc a[] failed"); } cout << "* Pre-process / initialize data on the host" << endl; cout << " e.g. read data from storage" << endl; for (std::size_t i = 0; i < size; i++) { a[i] = 1.; } cout << "* Automatically allocate memory on the device" << endl; cout << "* Automatically copy data from the host to the device" << endl; cout << "* Compute on the device" << endl; // Without access to vector index std::for_each_n( std::execution::par_unseq, // parallel, unsequenced order a, size, // kernel expressed as lambda expression [](double &a_i) { a_i++; }); // With access to vector index std::for_each_n( std::execution::par_unseq, // parallel, unsequenced order std::views::iota(0).begin(), size, // kernel expressed as lambda expression [&a](int i) { a[i]++; }); cout << "* Automatically transfer data back from the device to the host" << endl; cout << "* Automatically delete data on the device" << endl; cout << "* Post-process data on the host" << endl; cout << " e.g. write data to storage" << endl; for (int i = 0; i < size; i++) { if (a[i] != 3.) { cout << "a[" << i << "] = " << a[i] << endl; errx(2, "Computation on GPU failed"); } } cout << "* Free memory on the host" << endl; free(a); }