29 std::cout <<
"Testing for (" << m_size <<
"," << k_size <<
"," << n_size <<
" consume_dim : " << consume_dim <<
")"
36 std::size_t t_input_bytes = t_input.size() *
sizeof(DataType);
37 std::size_t t_result_bytes = t_result.size() *
sizeof(DataType);
39 DataType* gpu_data_in =
static_cast<DataType*
>(sycl_device.allocate(t_input_bytes));
40 DataType* gpu_data_out =
static_cast<DataType*
>(sycl_device.allocate(t_result_bytes));
45 sycl_device.memcpyHostToDevice(gpu_data_in, t_input.data(), t_input_bytes);
46 sycl_device.memcpyHostToDevice(gpu_data_out, t_input.data(), t_input_bytes);
48 gpu_t_result.device(sycl_device) = gpu_t_input.cumsum(consume_dim, exclusive);
50 t_result = t_input.cumsum(consume_dim, exclusive);
52 sycl_device.memcpyDeviceToHost(t_result_gpu.data(), gpu_data_out, t_result_bytes);
53 sycl_device.synchronize();
55 for (IndexType
i = 0;
i < t_result.size();
i++) {
62 std::cout <<
"mismatch detected at index " <<
i <<
" CPU : " << t_result(
i) <<
" vs SYCL : " << t_result_gpu(
i)
66 sycl_device.deallocate(gpu_data_in);
67 sycl_device.deallocate(gpu_data_out);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define assert(e,...)
Definition: Logger.h:744
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
static const float error_threshold
Definition: cxx11_tensor_convolution_sycl.cpp:32
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
Definition: MathFunctions.h:1923
std::array< T, N > array
Definition: EmulateArray.h:231
Real fabs(const Real &a)
Definition: boostmultiprec.cpp:117