cxx11_tensor_reverse_sycl.cpp File Reference
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<typename DataType , int DataLayout, typename IndexType >
static void test_simple_reverse (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_expr_reverse (const Eigen::SyclDevice &sycl_device, bool LValue)
 
template<typename DataType >
void sycl_reverse_test_per_device (const cl::sycl::device &d)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_reverse_sycl)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_reverse_sycl  )
220  {
221  for (const auto& device : Eigen::get_sycl_supported_devices()) {
222  std::cout << "Running on " << device.get_info<cl::sycl::info::device::name>() << std::endl;
223  CALL_SUBTEST_1(sycl_reverse_test_per_device<short>(device));
224  CALL_SUBTEST_2(sycl_reverse_test_per_device<int>(device));
225  CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device));
226 #ifdef EIGEN_SYCL_DOUBLE_SUPPORT
227  CALL_SUBTEST_4(sycl_reverse_test_per_device<double>(device));
228 #endif
229  CALL_SUBTEST_5(sycl_reverse_test_per_device<half>(device));
230  CALL_SUBTEST_6(sycl_reverse_test_per_device<float>(device));
231  }
232 }
string name
Definition: plotDoE.py:33
#define CALL_SUBTEST_6(FUNC)
Definition: split_test_helper.h:34
#define CALL_SUBTEST_3(FUNC)
Definition: split_test_helper.h:16
#define CALL_SUBTEST_1(FUNC)
Definition: split_test_helper.h:4
#define CALL_SUBTEST_5(FUNC)
Definition: split_test_helper.h:28
#define CALL_SUBTEST_2(FUNC)
Definition: split_test_helper.h:10
#define CALL_SUBTEST_4(FUNC)
Definition: split_test_helper.h:22

References CALL_SUBTEST_1, CALL_SUBTEST_2, CALL_SUBTEST_3, CALL_SUBTEST_4, CALL_SUBTEST_5, CALL_SUBTEST_6, and plotDoE::name.

◆ sycl_reverse_test_per_device()

template<typename DataType >
void sycl_reverse_test_per_device ( const cl::sycl::device &  d)
210  {
211  QueueInterface queueInterface(d);
212  auto sycl_device = Eigen::SyclDevice(&queueInterface);
213  test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
214  test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
215  test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false);
216  test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false);
217  test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true);
218  test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true);
219 }

◆ test_expr_reverse()

template<typename DataType , int DataLayout, typename IndexType >
static void test_expr_reverse ( const Eigen::SyclDevice &  sycl_device,
bool  LValue 
)
static
105  {
106  IndexType dim1 = 2;
107  IndexType dim2 = 3;
108  IndexType dim3 = 5;
109  IndexType dim4 = 7;
110 
111  array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
112  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
113  Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange);
114  Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange);
115  tensor.setRandom();
116 
117  array<bool, 4> dim_rev;
118  dim_rev[0] = false;
119  dim_rev[1] = true;
120  dim_rev[2] = false;
121  dim_rev[3] = true;
122 
123  DataType* gpu_in_data =
124  static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
125  DataType* gpu_out_data_expected =
126  static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize() * sizeof(DataType)));
127  DataType* gpu_out_data_result =
128  static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType)));
129 
130  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
131  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(gpu_out_data_expected, tensorRange);
132  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(gpu_out_data_result, tensorRange);
133 
134  sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(), (tensor.dimensions().TotalSize()) * sizeof(DataType));
135 
136  if (LValue) {
137  out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
138  } else {
139  out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
140  }
141  sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected,
142  expected.dimensions().TotalSize() * sizeof(DataType));
143 
144  array<IndexType, 4> src_slice_dim;
145  src_slice_dim[0] = 2;
146  src_slice_dim[1] = 3;
147  src_slice_dim[2] = 1;
148  src_slice_dim[3] = 7;
149  array<IndexType, 4> src_slice_start;
150  src_slice_start[0] = 0;
151  src_slice_start[1] = 0;
152  src_slice_start[2] = 0;
153  src_slice_start[3] = 0;
154  array<IndexType, 4> dst_slice_dim = src_slice_dim;
155  array<IndexType, 4> dst_slice_start = src_slice_start;
156 
157  for (IndexType i = 0; i < 5; ++i) {
158  if (LValue) {
159  out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
160  in_gpu.slice(src_slice_start, src_slice_dim);
161  } else {
162  out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
163  in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
164  }
165  src_slice_start[2] += 1;
166  dst_slice_start[2] += 1;
167  }
168  sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result,
169  result.dimensions().TotalSize() * sizeof(DataType));
170 
171  for (IndexType i = 0; i < expected.dimension(0); ++i) {
172  for (IndexType j = 0; j < expected.dimension(1); ++j) {
173  for (IndexType k = 0; k < expected.dimension(2); ++k) {
174  for (IndexType l = 0; l < expected.dimension(3); ++l) {
175  VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
176  }
177  }
178  }
179  }
180 
181  dst_slice_start[2] = 0;
182  result.setRandom();
183  sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),
184  (result.dimensions().TotalSize()) * sizeof(DataType));
185  for (IndexType i = 0; i < 5; ++i) {
186  if (LValue) {
187  out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
188  in_gpu.slice(dst_slice_start, dst_slice_dim);
189  } else {
190  out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
191  in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
192  }
193  dst_slice_start[2] += 1;
194  }
195  sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result,
196  result.dimensions().TotalSize() * sizeof(DataType));
197 
198  for (IndexType i = 0; i < expected.dimension(0); ++i) {
199  for (IndexType j = 0; j < expected.dimension(1); ++j) {
200  for (IndexType k = 0; k < expected.dimension(2); ++k) {
201  for (IndexType l = 0; l < expected.dimension(3); ++l) {
202  VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
203  }
204  }
205  }
206  }
207 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, k, Eigen::TensorBase< Derived, AccessLevel >::reverse(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::slice(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_EQUAL.

◆ test_simple_reverse()

template<typename DataType , int DataLayout, typename IndexType >
static void test_simple_reverse ( const Eigen::SyclDevice &  sycl_device)
static
24  {
25  IndexType dim1 = 2;
26  IndexType dim2 = 3;
27  IndexType dim3 = 5;
28  IndexType dim4 = 7;
29 
30  array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
32  Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange);
33  tensor.setRandom();
34 
35  array<bool, 4> dim_rev;
36  dim_rev[0] = false;
37  dim_rev[1] = true;
38  dim_rev[2] = true;
39  dim_rev[3] = false;
40 
41  DataType* gpu_in_data =
42  static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
43  DataType* gpu_out_data =
44  static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize() * sizeof(DataType)));
45 
46  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
47  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, tensorRange);
48 
49  sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(), (tensor.dimensions().TotalSize()) * sizeof(DataType));
50  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
51  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data,
52  reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
53  // Check that the CPU and GPU reductions return the same result.
54  for (IndexType i = 0; i < 2; ++i) {
55  for (IndexType j = 0; j < 3; ++j) {
56  for (IndexType k = 0; k < 5; ++k) {
57  for (IndexType l = 0; l < 7; ++l) {
58  VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(i, 2 - j, 4 - k, l));
59  }
60  }
61  }
62  }
63  dim_rev[0] = true;
64  dim_rev[1] = false;
65  dim_rev[2] = false;
66  dim_rev[3] = false;
67 
68  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
69  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data,
70  reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
71 
72  for (IndexType i = 0; i < 2; ++i) {
73  for (IndexType j = 0; j < 3; ++j) {
74  for (IndexType k = 0; k < 5; ++k) {
75  for (IndexType l = 0; l < 7; ++l) {
76  VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, l));
77  }
78  }
79  }
80  }
81 
82  dim_rev[0] = true;
83  dim_rev[1] = false;
84  dim_rev[2] = false;
85  dim_rev[3] = true;
86  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
87  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data,
88  reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
89 
90  for (IndexType i = 0; i < 2; ++i) {
91  for (IndexType j = 0; j < 3; ++j) {
92  for (IndexType k = 0; k < 5; ++k) {
93  for (IndexType l = 0; l < 7; ++l) {
94  VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, 6 - l));
95  }
96  }
97  }
98  }
99 
100  sycl_device.deallocate(gpu_in_data);
101  sycl_device.deallocate(gpu_out_data);
102 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, k, Eigen::TensorBase< Derived, AccessLevel >::reverse(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_EQUAL.