#include "hip/hip_runtime.h"
#include <cstdlib>
#include <err.h>
#include <iostream>

using std::cout, std::endl;

__global__ void inc_kernel(double *device_a) {
    const int i = blockDim.x * blockIdx.x + threadIdx.x;
    device_a[i]++;
}

int main(int arg, char *argv[]) {
    const unsigned int size = 1 << 16;

    hipError_t error_id;

    // Platform information
    int runtime_version = 0;
    error_id = hipRuntimeGetVersion(&runtime_version);
    cout << "HIP version: "
         << runtime_version / 1000 << "." << (runtime_version % 100) / 10
         << endl;

    int driver_version = 0;
    error_id = hipDriverGetVersion(&driver_version);
    cout << "HIP driver version: "
         << driver_version / 1000 << "." << (driver_version % 100) / 10
         << endl;

    int num_devices = 0;
    error_id = hipGetDeviceCount(&num_devices);
    if (error_id == hipErrorNoDevice || num_devices == 0) {
        errx(1, "No HIP device found");
    }
    cout << "Number of HIP devices: " << num_devices << endl;

    int device_num = 0;
    error_id = hipSetDevice(device_num);
    cout << "HIP Device number: " << device_num << endl;

    size_t memory_free = 0, memory_total = 0;
    error_id = hipMemGetInfo(&memory_free, &memory_total);
    cout << "Memory on HIP device: "
         << memory_total / (1024. * 1024. * 1024.) << " GiB"
         << endl;
    cout << "Free Memory on HIP device: "
         << memory_free / (1024. * 1024. * 1024.) << " GiB"
         << endl;

    hipDeviceProp_t device_properties;
    error_id = hipGetDeviceProperties(&device_properties, device_num);
    cout << "HIP device name: "
         << device_properties.name
         << endl;
    cout << "HIP device capability: "
         << device_properties.major << "." << device_properties.minor
         << endl;
    cout << "HIP device max clock rate: "
         << device_properties.clockRate / 1000000. << " GHz"
         << endl;
    cout << "HIP device max memory clock rate: "
         << device_properties.memoryClockRate / 1000000. << " GHz"
         << endl;
    cout << "HIP device compute mode: "
         << device_properties.computeMode
         << endl;

    if (!device_properties.arch.hasDoubles) {
        errx(1, "HIP device does not support doubles");
    }

    cout << "* Allocate memory on the host" << endl;
    double *a = (double *) malloc(size * sizeof(double));
    if (a == NULL) {
        errx(1, "malloc a[] failed");
    }

    cout << "* Allocate memory on the device" << endl;
    double *device_a;
    if (hipMalloc(&device_a, size * sizeof(double)) != hipSuccess) {
        errx(1, "hipMalloc device_a[] failed");
    }

    cout << "* Pre-process / initialize data on the host" << endl;
    cout << "  e.g. read data from storage" << endl;
    for (int i = 0; i < size; i++) {
        a[i] = 1.;
    }

    cout << "* Copy data from the host to the device" << endl;
    error_id = hipMemcpy(device_a, a, size * sizeof(double), hipMemcpyHostToDevice);

    cout << "* Compute on the device" << endl;
    inc_kernel<<<size / 256, 256>>>(device_a);

    cout << "* Transfer data back from the device to the host" << endl;
    error_id = hipMemcpy(a, device_a, size * sizeof(double), hipMemcpyDeviceToHost);

    cout << "* Delete data on the device" << endl;
    error_id = hipFree(device_a);

    cout << "* Post-process data on the host" << endl;
    cout << "  e.g. write data to storage" << endl;
    for (int i = 0; i < size; i++) {
        if (a[i] != 2.) {
            errx(2, "Computation on GPU failed");
        }
    }

    cout << "* Free memory on the host" << endl;
    free(a);

    return 0;
}