#include "hip/hip_runtime.h" #include <cstdlib> #include <err.h> #include <iostream> using std::cout, std::endl; __global__ void inc_kernel(double *device_a) { const int i = blockDim.x * blockIdx.x + threadIdx.x; device_a[i]++; } int main(int arg, char *argv[]) { const unsigned int size = 1 << 16; hipError_t error_id; // Platform information int runtime_version = 0; error_id = hipRuntimeGetVersion(&runtime_version); cout << "HIP version: " << runtime_version / 1000 << "." << (runtime_version % 100) / 10 << endl; int driver_version = 0; error_id = hipDriverGetVersion(&driver_version); cout << "HIP driver version: " << driver_version / 1000 << "." << (driver_version % 100) / 10 << endl; int num_devices = 0; error_id = hipGetDeviceCount(&num_devices); if (error_id == hipErrorNoDevice || num_devices == 0) { errx(1, "No HIP device found"); } cout << "Number of HIP devices: " << num_devices << endl; int device_num = 0; error_id = hipSetDevice(device_num); cout << "HIP Device number: " << device_num << endl; size_t memory_free = 0, memory_total = 0; error_id = hipMemGetInfo(&memory_free, &memory_total); cout << "Memory on HIP device: " << memory_total / (1024. * 1024. * 1024.) << " GiB" << endl; cout << "Free Memory on HIP device: " << memory_free / (1024. * 1024. * 1024.) << " GiB" << endl; hipDeviceProp_t device_properties; error_id = hipGetDeviceProperties(&device_properties, device_num); cout << "HIP device name: " << device_properties.name << endl; cout << "HIP device capability: " << device_properties.major << "." << device_properties.minor << endl; cout << "HIP device max clock rate: " << device_properties.clockRate / 1000000. << " GHz" << endl; cout << "HIP device max memory clock rate: " << device_properties.memoryClockRate / 1000000. << " GHz" << endl; cout << "HIP device compute mode: " << device_properties.computeMode << endl; if (!device_properties.arch.hasDoubles) { errx(1, "HIP device does not support doubles"); } cout << "* Allocate memory on the host" << endl; double *a = (double *) malloc(size * sizeof(double)); if (a == NULL) { errx(1, "malloc a[] failed"); } cout << "* Allocate memory on the device" << endl; double *device_a; if (hipMalloc(&device_a, size * sizeof(double)) != hipSuccess) { errx(1, "hipMalloc device_a[] failed"); } cout << "* Pre-process / initialize data on the host" << endl; cout << " e.g. read data from storage" << endl; for (int i = 0; i < size; i++) { a[i] = 1.; } cout << "* Copy data from the host to the device" << endl; error_id = hipMemcpy(device_a, a, size * sizeof(double), hipMemcpyHostToDevice); cout << "* Compute on the device" << endl; inc_kernel<<<size / 256, 256>>>(device_a); cout << "* Transfer data back from the device to the host" << endl; error_id = hipMemcpy(a, device_a, size * sizeof(double), hipMemcpyDeviceToHost); cout << "* Delete data on the device" << endl; error_id = hipFree(device_a); cout << "* Post-process data on the host" << endl; cout << " e.g. write data to storage" << endl; for (int i = 0; i < size; i++) { if (a[i] != 2.) { errx(2, "Computation on GPU failed"); } } cout << "* Free memory on the host" << endl; free(a); return 0; }