cxx11_tensor_argmax_sycl.cpp File Reference
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<typename DataType , int Layout, typename DenseIndex >
static void test_sycl_simple_argmax (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename DenseIndex >
static void test_sycl_argmax_dim (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename DenseIndex >
static void test_sycl_argmin_dim (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , typename Device_Selector >
void sycl_argmax_test_per_device (const Device_Selector &d)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_argmax_sycl)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_argmax_sycl  )
253  {
254  for (const auto& device : Eigen::get_sycl_supported_devices()) {
255  CALL_SUBTEST(sycl_argmax_test_per_device<half>(device));
256  CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));
257  }
258 }
#define CALL_SUBTEST(FUNC)
Definition: main.h:382

References CALL_SUBTEST.

◆ sycl_argmax_test_per_device()

template<typename DataType , typename Device_Selector >
void sycl_argmax_test_per_device ( const Device_Selector &  d)
242  {
243  QueueInterface queueInterface(d);
244  auto sycl_device = Eigen::SyclDevice(&queueInterface);
245  test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
246  test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
247  test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
248  test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
249  test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
250  test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
251 }

◆ test_sycl_argmax_dim()

template<typename DataType , int DataLayout, typename DenseIndex >
static void test_sycl_argmax_dim ( const Eigen::SyclDevice &  sycl_device)
static
67  {
68  DenseIndex sizeDim0 = 9;
69  DenseIndex sizeDim1 = 3;
70  DenseIndex sizeDim2 = 5;
71  DenseIndex sizeDim3 = 7;
72  Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
73 
74  std::vector<DenseIndex> dims;
75  dims.push_back(sizeDim0);
76  dims.push_back(sizeDim1);
77  dims.push_back(sizeDim2);
78  dims.push_back(sizeDim3);
79  for (DenseIndex dim = 0; dim < 4; ++dim) {
80  array<DenseIndex, 3> out_shape;
81  for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
82 
84 
86  for (DenseIndex i = 0; i < sizeDim0; ++i) {
87  for (DenseIndex j = 0; j < sizeDim1; ++j) {
88  for (DenseIndex k = 0; k < sizeDim2; ++k) {
89  for (DenseIndex l = 0; l < sizeDim3; ++l) {
90  ix[0] = i;
91  ix[1] = j;
92  ix[2] = k;
93  ix[3] = l;
94  // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l)
95  // = 10.0
96  tensor(ix) = static_cast<DataType>((ix[dim] != 0) ? -1.0 : 10.0);
97  }
98  }
99  }
100  }
101 
102  std::size_t in_bytes = tensor.size() * sizeof(DataType);
103  std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
104 
105  DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
106  DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
107 
109  d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
111 
112  sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
113  gpu_out.device(sycl_device) = gpu_in.argmax(dim);
114  sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
115 
116  VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
117  size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
118 
119  for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
120  // Expect max to be in the first index of the reduced dimension
121  VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
122  }
123 
124  sycl_device.synchronize();
125 
126  for (DenseIndex i = 0; i < sizeDim0; ++i) {
127  for (DenseIndex j = 0; j < sizeDim1; ++j) {
128  for (DenseIndex k = 0; k < sizeDim2; ++k) {
129  for (DenseIndex l = 0; l < sizeDim3; ++l) {
130  ix[0] = i;
131  ix[1] = j;
132  ix[2] = k;
133  ix[3] = l;
134  // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
135  tensor(ix) = static_cast<DataType>((ix[dim] != tensor.dimension(dim) - 1) ? -1.0 : 20.0);
136  }
137  }
138  }
139  }
140 
141  sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
142  gpu_out.device(sycl_device) = gpu_in.argmax(dim);
143  sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
144 
145  for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
146  // Expect max to be in the last index of the reduced dimension
147  VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
148  }
149  sycl_device.deallocate(d_in);
150  sycl_device.deallocate(d_out);
151  }
152 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
Definition: Meta.h:75
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), i, j, k, n, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

◆ test_sycl_argmin_dim()

template<typename DataType , int DataLayout, typename DenseIndex >
static void test_sycl_argmin_dim ( const Eigen::SyclDevice &  sycl_device)
static
155  {
156  DenseIndex sizeDim0 = 9;
157  DenseIndex sizeDim1 = 3;
158  DenseIndex sizeDim2 = 5;
159  DenseIndex sizeDim3 = 7;
160  Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
161 
162  std::vector<DenseIndex> dims;
163  dims.push_back(sizeDim0);
164  dims.push_back(sizeDim1);
165  dims.push_back(sizeDim2);
166  dims.push_back(sizeDim3);
167  for (DenseIndex dim = 0; dim < 4; ++dim) {
168  array<DenseIndex, 3> out_shape;
169  for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
170 
171  Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
172 
174  for (DenseIndex i = 0; i < sizeDim0; ++i) {
175  for (DenseIndex j = 0; j < sizeDim1; ++j) {
176  for (DenseIndex k = 0; k < sizeDim2; ++k) {
177  for (DenseIndex l = 0; l < sizeDim3; ++l) {
178  ix[0] = i;
179  ix[1] = j;
180  ix[2] = k;
181  ix[3] = l;
182  // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
183  tensor(ix) = static_cast<DataType>((ix[dim] != 0) ? 1.0 : -10.0);
184  }
185  }
186  }
187  }
188 
189  std::size_t in_bytes = tensor.size() * sizeof(DataType);
190  std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
191 
192  DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
193  DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
194 
196  d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
198 
199  sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
200  gpu_out.device(sycl_device) = gpu_in.argmin(dim);
201  sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
202 
203  VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
204  size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
205 
206  for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
207  // Expect max to be in the first index of the reduced dimension
208  VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
209  }
210 
211  sycl_device.synchronize();
212 
213  for (DenseIndex i = 0; i < sizeDim0; ++i) {
214  for (DenseIndex j = 0; j < sizeDim1; ++j) {
215  for (DenseIndex k = 0; k < sizeDim2; ++k) {
216  for (DenseIndex l = 0; l < sizeDim3; ++l) {
217  ix[0] = i;
218  ix[1] = j;
219  ix[2] = k;
220  ix[3] = l;
221  // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
222  tensor(ix) = static_cast<DataType>((ix[dim] != tensor.dimension(dim) - 1) ? 1.0 : -20.0);
223  }
224  }
225  }
226  }
227 
228  sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
229  gpu_out.device(sycl_device) = gpu_in.argmin(dim);
230  sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
231 
232  for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
233  // Expect max to be in the last index of the reduced dimension
234  VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
235  }
236  sycl_device.deallocate(d_in);
237  sycl_device.deallocate(d_out);
238  }
239 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), i, j, k, n, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

◆ test_sycl_simple_argmax()

template<typename DataType , int Layout, typename DenseIndex >
static void test_sycl_simple_argmax ( const Eigen::SyclDevice &  sycl_device)
static
30  {
34  in.setRandom();
35  in *= in.constant(static_cast<DataType>(100.0));
36  in(0, 0, 0) = static_cast<DataType>(-1000.0);
37  in(1, 1, 1) = static_cast<DataType>(1000.0);
38 
39  std::size_t in_bytes = in.size() * sizeof(DataType);
40  std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
41 
42  DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
43  DenseIndex* d_out_max = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
44  DenseIndex* d_out_min = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
45 
47  Eigen::array<DenseIndex, 3>{{2, 2, 2}});
50  sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
51 
52  gpu_out_max.device(sycl_device) = gpu_in.argmax();
53  gpu_out_min.device(sycl_device) = gpu_in.argmin();
54 
55  sycl_device.memcpyDeviceToHost(out_max.data(), d_out_max, out_bytes);
56  sycl_device.memcpyDeviceToHost(out_min.data(), d_out_min, out_bytes);
57 
58  VERIFY_IS_EQUAL(out_max(), 2 * 2 * 2 - 1);
59  VERIFY_IS_EQUAL(out_min(), 0);
60 
61  sycl_device.deallocate(d_in);
62  sycl_device.deallocate(d_out_max);
63  sycl_device.deallocate(d_out_min);
64 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & setRandom()
Definition: TensorBase.h:1049
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:102

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.