#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <err.h>
#include <iostream>

using std::cout, std::endl;

void mainKokkos() {
    const unsigned int size = 1 << 16;

    // Platform information
    Kokkos::print_configuration(cout);

    // Allocate memory
    cout << "* Allocate memory on the host and the device" << endl;
    Kokkos::DualView<double *> aDualView(Kokkos::ViewAllocateWithoutInitializing("vector a"), size);
    auto a = aDualView.h_view;
    auto device_a = aDualView.d_view;

    cout << "* Pre-process / initialize data on the host" << endl;
    cout << "  e.g. read data from storage" << endl;
    for (int i = 0; i < size; i++) {
        a(i) = 1;
    };

    cout << "* Copy data from the host to the device" << endl;
    aDualView.modify_host();
    aDualView.sync_device();

    cout << "* Compute on the device" << endl;
    Kokkos::parallel_for(
        "Increment a[] on device", size,
        KOKKOS_LAMBDA(int i) {
            device_a(i)++;
        });

    cout << "* Transfer data back from the device to the host" << endl;
    aDualView.modify_device();
    aDualView.sync_host();

    cout << "* Post-process data on the host" << endl;
    cout << "  e.g. write data to storage or perform consistency checks" << endl;
    for (int i = 0; i < size; i++) {
        if (a[i] != 2.) {
            cout << "a[" << i << "] = " << a[i] << endl;
            errx(2, "Computation on GPU failed");
        }
    }
}

int main(int argc, char **argv) {
    Kokkos ::initialize(argc, argv);
    // Bundle all Kokkos objects in function mainKokkos to ensure that destructors are called before Kokkos::finalize
    mainKokkos();
    // all Kokkos objects must be destroyed before Kokkos::finalize gets called!
    Kokkos ::finalize();
    return 0;
}