#include <err.h>
#include <iostream>
#include <sycl/sycl.hpp>

using std::cout, std::endl;

int main(int arg, char *argv[]) {
    const size_t size = 1 << 16;

    // Create a queue on the default device
    // A list of devices can be obtained by "sycl-ls --verbose"
    // Device selection can be done by setting environment variables SYCL_DEVICE_FILTER or ONEAPI_DEVICE_SELECTOR
    sycl::queue queue;
    auto device = queue.get_device();
    auto platform = device.get_platform();

    // Platform information
    cout << "SYCL Platform: "
         << platform.get_info<sycl::info::platform::name>()
         << endl;
    cout << "SYCL Device name: "
         << device.get_info<sycl::info::device::name>()
         << endl;
    cout << "SYCL Driver version: "
         << device.get_info<sycl::info::device::driver_version>()
         << endl;
    cout << "Global memory size: "
         << device.get_info<sycl::info::device::global_mem_size>() / 1000000000. << " GB"
         << endl;

    cout << "* Create buffer" << endl;
    cout << "* Automatically allocate memory on the host" << endl;
    cout << "* Automatically allocate memory on the device" << endl;
    sycl::buffer<double, 1> a{size};

    cout << "* Pre-process / initialize data on the host" << endl;
    cout << "  e.g. read data from storage" << endl;
    {
        auto host_access_a = a.get_host_access(sycl::write_only);
        for (size_t i = 0; i < size; i++) {
            host_access_a[i] = 1.;
        }
    }

    queue.submit(
        // command group expressed as lambda expression
        [&](sycl::handler &handler) {
            cout << "* Automatically copy data from the host to the device" << endl;
            auto device_access_a = a.get_access(handler, sycl::read_write);

            cout << "* Compute on the device" << endl;
            handler.parallel_for(
                size,
                // kernel expressed as lambda expression
                [=](sycl::id<1> idx) {
                    device_access_a[idx]++;
                });
        });
    queue.wait();

    cout << "* Automatically transfer data back from the device to the host" << endl;
    auto host_access_a = a.get_host_access(sycl::read_only);

    cout << "* Post-process data on the host" << endl;
    cout << "  e.g. write data to storage" << endl;
    for (int i = 0; i < size; i++) {
        if (host_access_a[i] != 2.) {
            cout << "a[" << i << "] = " << host_access_a[i] << endl;
            errx(2, "Computation on GPU failed");
        }
    }
}