1 #ifndef EIGEN_TEST_GPU_COMMON_H
2 #define EIGEN_TEST_GPU_COMMON_H
5 #include <hip/hip_runtime.h>
6 #include <hip/hip_runtime_api.h>
9 #include <cuda_runtime.h>
10 #include <cuda_runtime_api.h>
15 #if !defined(__CUDACC__) && !defined(__HIPCC__)
19 template <
typename Kernel,
typename Input,
typename Output>
21 for (
int i = 0;
i <
n;
i++) ker(
i, in.data(),
out.data());
24 template <
typename Kernel,
typename Input,
typename Output>
33 template <
typename Kernel,
typename Input,
typename Output>
37 std::ptrdiff_t in_bytes = in.size() *
sizeof(
typename Input::Scalar);
40 gpuMalloc((
void**)(&d_in), in_bytes);
41 gpuMalloc((
void**)(&d_out), out_bytes);
43 gpuMemcpy(d_in, in.data(), in_bytes, gpuMemcpyHostToDevice);
44 gpuMemcpy(d_out,
out.data(), out_bytes, gpuMemcpyHostToDevice);
49 dim3 Grids((
n +
int(Blocks.x) - 1) /
int(Blocks.x));
51 gpuDeviceSynchronize();
55 typename std::decay<decltype(*d_out)>::
type>),
56 dim3(Grids), dim3(Blocks), 0, 0, ker,
n, d_in, d_out);
60 run_on_gpu_meta_kernel<<<Grids, Blocks>>>(ker,
n, d_in, d_out);
64 gpuError_t err = gpuGetLastError();
65 if (err != gpuSuccess) {
66 printf(
"%s: %s\n", gpuGetErrorName(err), gpuGetErrorString(err));
71 err = gpuDeviceSynchronize();
72 if (err != gpuSuccess) {
73 printf(
"%s: %s\n", gpuGetErrorName(err), gpuGetErrorString(err));
78 gpuMemcpy(
const_cast<typename
Input::Scalar*
>(in.data()), d_in, in_bytes, gpuMemcpyDeviceToHost);
79 gpuMemcpy(
out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost);
85 template <
typename Kernel,
typename Input,
typename Output>
88 Output out_ref, out_gpu;
89 #if !defined(EIGEN_GPU_COMPILE_PHASE)
91 out_ref = out_gpu =
out;
98 #if !defined(EIGEN_GPU_COMPILE_PHASE)
108 #if defined(__CUDA_ARCH__)
109 info[0] =
int(__CUDA_ARCH__ + 0);
111 #if defined(EIGEN_HIP_DEVICE_COMPILE)
112 info[1] =
int(EIGEN_HIP_DEVICE_COMPILE + 0);
120 gpuDeviceProp_t deviceProp;
121 gpuGetDeviceProperties(&deviceProp, device);
123 ArrayXi dummy(1),
info(10);
127 std::cout <<
"GPU compile-time info:\n";
130 std::cout <<
" EIGEN_CUDACC: " <<
int(EIGEN_CUDACC) <<
"\n";
133 #ifdef EIGEN_CUDA_SDK_VER
142 std::cout <<
" EIGEN_HIPCC: " <<
int(EIGEN_HIPCC) <<
"\n";
145 std::cout <<
" EIGEN_CUDA_ARCH: " <<
info[0] <<
"\n";
146 std::cout <<
" EIGEN_HIP_DEVICE_COMPILE: " <<
info[1] <<
"\n";
148 std::cout <<
"GPU device info:\n";
149 std::cout <<
" name: " << deviceProp.name <<
"\n";
150 std::cout <<
" capability: " << deviceProp.major <<
"." << deviceProp.minor <<
"\n";
151 std::cout <<
" multiProcessorCount: " << deviceProp.multiProcessorCount <<
"\n";
152 std::cout <<
" maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor <<
"\n";
153 std::cout <<
" warpSize: " << deviceProp.warpSize <<
"\n";
154 std::cout <<
" regsPerBlock: " << deviceProp.regsPerBlock <<
"\n";
155 std::cout <<
" concurrentKernels: " << deviceProp.concurrentKernels <<
"\n";
156 std::cout <<
" clockRate: " << deviceProp.clockRate <<
"\n";
157 std::cout <<
" canMapHostMemory: " << deviceProp.canMapHostMemory <<
"\n";
158 std::cout <<
" computeMode: " << deviceProp.computeMode <<
"\n";
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_CUDA_SDK_VER
Definition: Macros.h:542
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_HIP_LAUNCH_BOUNDS_1024
Definition: Macros.h:576
#define EIGEN_COMP_NVCC
Definition: Macros.h:143
SCALAR Scalar
Definition: bench_gemm.cpp:45
dim3 threadIdx
Definition: gpu_common.h:16
dim3 blockDim
Definition: gpu_common.h:16
void ei_test_init_gpu()
Definition: gpu_common.h:118
void run_and_compare_to_gpu(const Kernel &ker, int n, const Input &in, Output &out)
Definition: gpu_common.h:86
__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void run_on_gpu_meta_kernel(const Kernel ker, int n, const Input *in, Output *out)
Definition: gpu_common.h:25
dim3 blockIdx
Definition: gpu_common.h:16
void run_on_cpu(const Kernel &ker, int n, const Input &in, Output &out)
Definition: gpu_common.h:20
void run_on_gpu(const Kernel &ker, int n, const Input &in, Output &out)
Definition: gpu_common.h:34
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
int info
Definition: level2_cplx_impl.h:39
type
Definition: compute_granudrum_aor.py:141
Definition: gpu_common.h:104
EIGEN_DEVICE_FUNC void operator()(int i, const int *, int *info) const
Definition: gpu_common.h:105
std::ofstream out("Result.txt")