#include <Kokkos_Core.hpp>
#include <err.h>
#include <iostream>

using std::cout, std::endl;

void mainKokkos() {
    const int size = 1 << 16;

    // Platform information
    Kokkos::print_configuration(cout);

    // Allocate memory
    cout << "* Allocate memory on the device" << endl;
    Kokkos::View<double *> device_a(Kokkos::ViewAllocateWithoutInitializing("device_a"), size);

    cout << "* Allocate memory on the host" << endl;
    auto a = Kokkos::create_mirror_view(device_a);

    cout << "* Pre-process / initialize data on the host" << endl;
    cout << "  e.g. read data from storage" << endl;
    for (int i = 0; i < size; i++) {
        a(i) = 1.;
    };

    cout << "* Copy data from the host to the device" << endl;
    Kokkos::deep_copy(device_a, a);

    cout << "* Compute on the device" << endl;
    Kokkos::parallel_for(
        "Increment a[] on device", size,
        KOKKOS_LAMBDA(int i) {
            device_a(i)++;
        });

    cout << "* Transfer data back from the device to the host" << endl;
    Kokkos::deep_copy(a, device_a);

    cout << "* Post-process data on the host" << endl;
    cout << "  e.g. write data to storage or perform consistency checks" << endl;
    for (int i = 0; i < size; i++) {
        if (a[i] != 2.) {
            cout << "a[" << i << "] = " << a[i] << endl;
            errx(2, "Computation on GPU failed");
        }
    }
}

int main(int argc, char **argv) {
    Kokkos::initialize(argc, argv);
    // Bundle all Kokkos objects in function mainKokkos to ensure that destructors are called before Kokkos::finalize
    mainKokkos();
    // all Kokkos objects must be destroyed before Kokkos::finalize gets called!
    Kokkos::finalize();
    return 0;
}