cxx11_tensor_concatenation_sycl.cpp File Reference
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<typename DataType , int DataLayout, typename IndexType >
static void test_simple_concatenation (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_concatenation_as_lvalue (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , typename Dev_selector >
void tensorConcat_perDevice (Dev_selector s)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_concatenation_sycl)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_concatenation_sycl  )
186  {
187  for (const auto& device : Eigen::get_sycl_supported_devices()) {
188  CALL_SUBTEST(tensorConcat_perDevice<half>(device));
189  CALL_SUBTEST(tensorConcat_perDevice<float>(device));
190  }
191 }
#define CALL_SUBTEST(FUNC)
Definition: main.h:382

References CALL_SUBTEST.

◆ tensorConcat_perDevice()

template<typename DataType , typename Dev_selector >
void tensorConcat_perDevice ( Dev_selector  s)
179  {
180  QueueInterface queueInterface(s);
181  auto sycl_device = Eigen::SyclDevice(&queueInterface);
182  test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
183  test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
184  test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
185 }
RealScalar s
Definition: level1_cplx_impl.h:130

References s.

◆ test_concatenation_as_lvalue()

template<typename DataType , int DataLayout, typename IndexType >
static void test_concatenation_as_lvalue ( const Eigen::SyclDevice &  sycl_device)
static
126  {
127  IndexType leftDim1 = 2;
128  IndexType leftDim2 = 3;
129  Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}};
130 
131  IndexType rightDim1 = 2;
132  IndexType rightDim2 = 3;
133  Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}};
134 
135  IndexType concatDim1 = 4;
136  IndexType concatDim2 = 3;
137  Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}};
138 
142 
143  left.setRandom();
144  right.setRandom();
145  result.setRandom();
146 
147  DataType* gpu_in1_data =
148  static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize() * sizeof(DataType)));
149  DataType* gpu_in2_data =
150  static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize() * sizeof(DataType)));
151  DataType* gpu_out_data =
152  static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType)));
153 
157 
158  sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(), (left.dimensions().TotalSize()) * sizeof(DataType));
159  sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(), (right.dimensions().TotalSize()) * sizeof(DataType));
160  sycl_device.memcpyHostToDevice(gpu_out_data, result.data(), (result.dimensions().TotalSize()) * sizeof(DataType));
161 
162  // t1.concatenate(t2, 0) = result;
163  gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) = gpu_out;
164  sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data, (left.dimensions().TotalSize()) * sizeof(DataType));
165  sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data, (right.dimensions().TotalSize()) * sizeof(DataType));
166 
167  for (IndexType i = 0; i < 2; ++i) {
168  for (IndexType j = 0; j < 3; ++j) {
169  VERIFY_IS_EQUAL(left(i, j), result(i, j));
170  VERIFY_IS_EQUAL(right(i, j), result(i + 2, j));
171  }
172  }
173  sycl_device.deallocate(gpu_in1_data);
174  sycl_device.deallocate(gpu_in2_data);
175  sycl_device.deallocate(gpu_out_data);
176 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::TensorBase< Derived, AccessLevel >::concatenate(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_EQUAL.

◆ test_simple_concatenation()

template<typename DataType , int DataLayout, typename IndexType >
static void test_simple_concatenation ( const Eigen::SyclDevice &  sycl_device)
static
26  {
27  IndexType leftDim1 = 2;
28  IndexType leftDim2 = 3;
29  IndexType leftDim3 = 1;
30  Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}};
31  IndexType rightDim1 = 2;
32  IndexType rightDim2 = 3;
33  IndexType rightDim3 = 1;
34  Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}};
35 
36  // IndexType concatDim1 = 3;
37  // IndexType concatDim2 = 3;
38  // IndexType concatDim3 = 1;
39  // Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}};
40 
43  left.setRandom();
44  right.setRandom();
45 
46  DataType* gpu_in1_data =
47  static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize() * sizeof(DataType)));
48  DataType* gpu_in2_data =
49  static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize() * sizeof(DataType)));
50 
53  sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(), (left.dimensions().TotalSize()) * sizeof(DataType));
54  sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(), (right.dimensions().TotalSize()) * sizeof(DataType));
56  Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1 + rightDim1, leftDim2, leftDim3);
57  DataType* gpu_out_data1 =
58  static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize() * sizeof(DataType)));
60  concatenation1.dimensions());
61 
62  // concatenation = left.concatenate(right, 0);
63  gpu_out1.device(sycl_device) = gpu_in1.concatenate(gpu_in2, 0);
64  sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,
65  (concatenation1.dimensions().TotalSize()) * sizeof(DataType));
66 
67  VERIFY_IS_EQUAL(concatenation1.dimension(0), 4);
68  VERIFY_IS_EQUAL(concatenation1.dimension(1), 3);
69  VERIFY_IS_EQUAL(concatenation1.dimension(2), 1);
70  for (IndexType j = 0; j < 3; ++j) {
71  for (IndexType i = 0; i < 2; ++i) {
72  VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0));
73  }
74  for (IndexType i = 2; i < 4; ++i) {
75  VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0));
76  }
77  }
78 
79  sycl_device.deallocate(gpu_out_data1);
80  Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 + rightDim2, leftDim3);
81  DataType* gpu_out_data2 =
82  static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize() * sizeof(DataType)));
84  concatenation2.dimensions());
85  gpu_out2.device(sycl_device) = gpu_in1.concatenate(gpu_in2, 1);
86  sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,
87  (concatenation2.dimensions().TotalSize()) * sizeof(DataType));
88 
89  // concatenation = left.concatenate(right, 1);
90  VERIFY_IS_EQUAL(concatenation2.dimension(0), 2);
91  VERIFY_IS_EQUAL(concatenation2.dimension(1), 6);
92  VERIFY_IS_EQUAL(concatenation2.dimension(2), 1);
93  for (IndexType i = 0; i < 2; ++i) {
94  for (IndexType j = 0; j < 3; ++j) {
95  VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0));
96  }
97  for (IndexType j = 3; j < 6; ++j) {
98  VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0));
99  }
100  }
101  sycl_device.deallocate(gpu_out_data2);
102  Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3 + rightDim3);
103  DataType* gpu_out_data3 =
104  static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize() * sizeof(DataType)));
106  concatenation3.dimensions());
107  gpu_out3.device(sycl_device) = gpu_in1.concatenate(gpu_in2, 2);
108  sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,
109  (concatenation3.dimensions().TotalSize()) * sizeof(DataType));
110 
111  // concatenation = left.concatenate(right, 2);
112  VERIFY_IS_EQUAL(concatenation3.dimension(0), 2);
113  VERIFY_IS_EQUAL(concatenation3.dimension(1), 3);
114  VERIFY_IS_EQUAL(concatenation3.dimension(2), 2);
115  for (IndexType i = 0; i < 2; ++i) {
116  for (IndexType j = 0; j < 3; ++j) {
117  VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0));
118  VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0));
119  }
120  }
121  sycl_device.deallocate(gpu_out_data3);
122  sycl_device.deallocate(gpu_in1_data);
123  sycl_device.deallocate(gpu_in2_data);
124 }

References Eigen::TensorBase< Derived, AccessLevel >::concatenate(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_EQUAL.