cxx11_tensor_convolution_sycl.cpp File Reference
#include <iostream>
#include <chrono>
#include <ctime>
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <iomanip>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr1D (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr2D (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr3D (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_evals (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_expr (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_modes (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_strides (const Eigen::SyclDevice &sycl_device)
 
template<typename Dev_selector >
void tensorConvolutionPerDevice (Dev_selector &s)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_convolution_sycl)
 

Variables

static const float error_threshold = 1e-4f
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_convolution_sycl  )
454  {
455  for (const auto& device : Eigen::get_sycl_supported_devices()) {
457  }
458 }
void tensorConvolutionPerDevice(Dev_selector &s)
Definition: cxx11_tensor_convolution_sycl.cpp:435
#define CALL_SUBTEST(FUNC)
Definition: main.h:382

References CALL_SUBTEST, and tensorConvolutionPerDevice().

◆ tensorConvolutionPerDevice()

template<typename Dev_selector >
void tensorConvolutionPerDevice ( Dev_selector &  s)
435  {
436  QueueInterface queueInterface(s);
437  auto sycl_device = Eigen::SyclDevice(&queueInterface);
438  test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
439  test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
440  test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
441  test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
442  test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
443  test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
444  test_evals<float, ColMajor, int64_t>(sycl_device);
445  test_evals<float, RowMajor, int64_t>(sycl_device);
446  test_expr<float, ColMajor, int64_t>(sycl_device);
447  test_expr<float, RowMajor, int64_t>(sycl_device);
448  test_modes<float, ColMajor, int64_t>(sycl_device);
449  test_modes<float, RowMajor, int64_t>(sycl_device);
450  test_strides<float, ColMajor, int64_t>(sycl_device);
451  test_strides<float, RowMajor, int64_t>(sycl_device);
452 }
RealScalar s
Definition: level1_cplx_impl.h:130

References s.

Referenced by EIGEN_DECLARE_TEST().

◆ test_evals()

template<typename DataType , int DataLayout, typename IndexType >
static void test_evals ( const Eigen::SyclDevice &  sycl_device)
static
215  {
216  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
217  Eigen::array<IndexType, 1> kernel_dims = {{2}};
218  Eigen::array<IndexType, 2> result_dims = {{2, 3}};
219 
221  Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
222  Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
223 
224  Eigen::array<IndexType, 1> dims3{{0}};
225 
226  input.setRandom();
227  kernel.setRandom();
228  result.setZero();
229 
230  std::size_t input_bytes = input.size() * sizeof(DataType);
231  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
232  std::size_t result_bytes = result.size() * sizeof(DataType);
233 
234  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
235  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
236  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
237 
239  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
240  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
241  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
242  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
243 
244  gpu_result.device(sycl_device) = gpu_input.convolve(gpu_kernel, dims3);
245  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
246 
247  VERIFY_IS_APPROX(result(0, 0), input(0, 0) * kernel(0) + input(1, 0) * kernel(1)); // index 0
248  VERIFY_IS_APPROX(result(0, 1), input(0, 1) * kernel(0) + input(1, 1) * kernel(1)); // index 2
249  VERIFY_IS_APPROX(result(0, 2), input(0, 2) * kernel(0) + input(1, 2) * kernel(1)); // index 4
250  VERIFY_IS_APPROX(result(1, 0), input(1, 0) * kernel(0) + input(2, 0) * kernel(1)); // index 1
251  VERIFY_IS_APPROX(result(1, 1), input(1, 1) * kernel(0) + input(2, 1) * kernel(1)); // index 3
252  VERIFY_IS_APPROX(result(1, 2), input(1, 2) * kernel(0) + input(2, 2) * kernel(1)); // index 5
253 
254  sycl_device.deallocate(d_input);
255  sycl_device.deallocate(d_kernel);
256  sycl_device.deallocate(d_result);
257 }
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
std::array< T, N > array
Definition: EmulateArray.h:231

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::setZero(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ test_expr()

template<typename DataType , int DataLayout, typename IndexType >
static void test_expr ( const Eigen::SyclDevice &  sycl_device)
static
260  {
261  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
262  Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
263  Eigen::array<IndexType, 2> result_dims = {{2, 2}};
264 
266  Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
267  Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
268 
269  input.setRandom();
270  kernel.setRandom();
272  dims[0] = 0;
273  dims[1] = 1;
274 
275  std::size_t input_bytes = input.size() * sizeof(DataType);
276  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
277  std::size_t result_bytes = result.size() * sizeof(DataType);
278 
279  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
280  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
281  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
282 
284  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
285  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
286  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
287  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
288 
289  gpu_result.device(sycl_device) = gpu_input.convolve(gpu_kernel, dims);
290  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
291 
292  VERIFY_IS_APPROX(result(0, 0), input(0, 0) * kernel(0, 0) + input(0, 1) * kernel(0, 1) + input(1, 0) * kernel(1, 0) +
293  input(1, 1) * kernel(1, 1));
294  VERIFY_IS_APPROX(result(0, 1), input(0, 1) * kernel(0, 0) + input(0, 2) * kernel(0, 1) + input(1, 1) * kernel(1, 0) +
295  input(1, 2) * kernel(1, 1));
296  VERIFY_IS_APPROX(result(1, 0), input(1, 0) * kernel(0, 0) + input(1, 1) * kernel(0, 1) + input(2, 0) * kernel(1, 0) +
297  input(2, 1) * kernel(1, 1));
298  VERIFY_IS_APPROX(result(1, 1), input(1, 1) * kernel(0, 0) + input(1, 2) * kernel(0, 1) + input(2, 1) * kernel(1, 0) +
299  input(2, 2) * kernel(1, 1));
300 
301  sycl_device.deallocate(d_input);
302  sycl_device.deallocate(d_kernel);
303  sycl_device.deallocate(d_result);
304 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ test_larg_expr1D()

template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr1D ( const Eigen::SyclDevice &  sycl_device)
static
35  {
36  IndexType indim0 = 53;
37  IndexType indim1 = 55;
38  IndexType indim2 = 51;
39  IndexType outdim0 = 50;
40  IndexType outdim1 = 55;
41  IndexType outdim2 = 51;
42  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
43  Eigen::array<IndexType, 1> kernel_dims = {{4}};
44  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
45 
49  Tensor<DataType, 3, DataLayout, IndexType> result_host(result_dims);
50 
51  Eigen::array<IndexType, 1> dims3{{0}};
52 
53  input.setRandom();
54  kernel.setRandom();
55  result.setZero();
56  result_host.setZero();
57 
58  std::size_t input_bytes = input.size() * sizeof(DataType);
59  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
60  std::size_t result_bytes = result.size() * sizeof(DataType);
61 
62  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
63  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
64  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
65 
69  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
70  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
71 
72  gpu_result.device(sycl_device) = gpu_input.convolve(gpu_kernel, dims3);
73  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
74 
75  result_host = input.convolve(kernel, dims3);
76 
77  for (IndexType i = 0; i < outdim0; i++) {
78  for (IndexType j = 0; j < outdim1; j++) {
79  for (IndexType k = 0; k < outdim2; k++) {
80  if (!(Eigen::internal::isApprox(result(i, j, k), result_host(i, j, k), error_threshold))) {
81  std::cout << std::setprecision(16) << "mismatch detected at index ( " << i << " , " << j << ", " << k
82  << " ) "
83  << " \t " << result(i, j, k) << " vs " << result_host(i, j, k) << std::endl;
84  assert(false);
85  }
86  }
87  }
88  }
89  sycl_device.deallocate(d_input);
90  sycl_device.deallocate(d_kernel);
91  sycl_device.deallocate(d_result);
92 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define assert(e,...)
Definition: Logger.h:744
static const float error_threshold
Definition: cxx11_tensor_convolution_sycl.cpp:32
char char char int int * k
Definition: level2_impl.h:374
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
Definition: MathFunctions.h:1923
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), error_threshold, i, Eigen::internal::isApprox(), j, k, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::setZero(), and Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size().

◆ test_larg_expr2D()

template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr2D ( const Eigen::SyclDevice &  sycl_device)
static
95  {
96  IndexType indim0 = 53;
97  IndexType indim1 = 55;
98  IndexType indim2 = 51;
99  IndexType outdim0 = 50;
100  IndexType outdim1 = 51;
101  IndexType outdim2 = 51;
102  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
103  Eigen::array<IndexType, 2> kernel_dims = {{4, 5}};
104  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
105 
107  Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
108  Tensor<DataType, 3, DataLayout, IndexType> result(result_dims);
109  Tensor<DataType, 3, DataLayout, IndexType> result_host(result_dims);
110 
111  Eigen::array<IndexType, 2> dims3{{0, 1}};
112 
113  input.setRandom();
114  kernel.setRandom();
115  result.setZero();
116  result_host.setZero();
117 
118  std::size_t input_bytes = input.size() * sizeof(DataType);
119  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
120  std::size_t result_bytes = result.size() * sizeof(DataType);
121 
122  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
123  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
124  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
125 
127  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
128  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
129  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
130  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
131 
132  gpu_result.device(sycl_device) = gpu_input.convolve(gpu_kernel, dims3);
133  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
134 
135  result_host = input.convolve(kernel, dims3);
136 
137  for (IndexType i = 0; i < outdim0; i++) {
138  for (IndexType j = 0; j < outdim1; j++) {
139  for (IndexType k = 0; k < outdim2; k++) {
140  if (!(Eigen::internal::isApprox(result(i, j, k), result_host(i, j, k), error_threshold))) {
141  std::cout << std::setprecision(16) << "mismatch detected at index ( " << i << " , " << j << ", " << k
142  << " ) "
143  << " \t " << result(i, j, k) << " vs " << result_host(i, j, k) << std::endl;
144  assert(false);
145  }
146  }
147  }
148  }
149  sycl_device.deallocate(d_input);
150  sycl_device.deallocate(d_kernel);
151  sycl_device.deallocate(d_result);
152 }

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), error_threshold, i, Eigen::internal::isApprox(), j, k, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::setZero(), and Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size().

◆ test_larg_expr3D()

template<typename DataType , int DataLayout, typename IndexType >
static void test_larg_expr3D ( const Eigen::SyclDevice &  sycl_device)
static
155  {
156  IndexType indim0 = 53;
157  IndexType indim1 = 55;
158  IndexType indim2 = 51;
159  IndexType outdim0 = 50;
160  IndexType outdim1 = 51;
161  IndexType outdim2 = 49;
162  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
163  Eigen::array<IndexType, 3> kernel_dims = {{4, 5, 3}};
164  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
165 
167  Tensor<DataType, 3, DataLayout, IndexType> kernel(kernel_dims);
168  Tensor<DataType, 3, DataLayout, IndexType> result(result_dims);
169  Tensor<DataType, 3, DataLayout, IndexType> result_host(result_dims);
170 
171  Eigen::array<IndexType, 3> dims3{{0, 1, 2}};
172 
173  input.setRandom();
174  kernel.setRandom();
175  result.setZero();
176  result_host.setZero();
177 
178  std::size_t input_bytes = input.size() * sizeof(DataType);
179  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
180  std::size_t result_bytes = result.size() * sizeof(DataType);
181 
182  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
183  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
184  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
185 
187  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
188  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
189  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
190  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
191 
192  gpu_result.device(sycl_device) = gpu_input.convolve(gpu_kernel, dims3);
193  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
194 
195  result_host = input.convolve(kernel, dims3);
196 
197  for (IndexType i = 0; i < outdim0; i++) {
198  for (IndexType j = 0; j < outdim1; j++) {
199  for (IndexType k = 0; k < outdim2; k++) {
200  if (!(Eigen::internal::isApprox(result(i, j, k), result_host(i, j, k), error_threshold))) {
201  std::cout << std::setprecision(16) << "mismatch detected at index ( " << i << " , " << j << ", " << k
202  << " ) "
203  << " \t " << result(i, j, k) << " vs " << result_host(i, j, k) << std::endl;
204  assert(false);
205  }
206  }
207  }
208  }
209  sycl_device.deallocate(d_input);
210  sycl_device.deallocate(d_kernel);
211  sycl_device.deallocate(d_result);
212 }

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), error_threshold, i, Eigen::internal::isApprox(), j, k, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::setZero(), and Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size().

◆ test_modes()

template<typename DataType , int DataLayout, typename IndexType >
static void test_modes ( const Eigen::SyclDevice &  sycl_device)
static
307  {
308  Eigen::array<IndexType, 1> input_dims = {{3}};
309  Eigen::array<IndexType, 1> kernel_dims = {{3}};
310 
312  Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
313 
314  input.setRandom();
315  kernel.setRandom();
317  dims[0] = 0;
318 
319  input(0) = 1.0f;
320  input(1) = 2.0f;
321  input(2) = 3.0f;
322  kernel(0) = 0.5f;
323  kernel(1) = 1.0f;
324  kernel(2) = 0.0f;
325 
327 
328  // Emulate VALID mode (as defined in
329  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
330  padding[0] = std::make_pair(0, 0);
332 
333  std::size_t input_bytes = input.size() * sizeof(DataType);
334  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
335  std::size_t valid_bytes = valid.size() * sizeof(DataType);
336 
337  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
338  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
339  DataType* d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes));
340 
342  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
343  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_valid(d_valid, valid.dimensions());
344  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
345  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
346 
347  gpu_valid.device(sycl_device) = gpu_input.pad(padding).convolve(gpu_kernel, dims);
348  sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
349 
350  VERIFY_IS_EQUAL(valid.dimension(0), 1);
351  VERIFY_IS_APPROX(valid(0), 2.5f);
352 
353  // Emulate SAME mode (as defined in
354  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
355  padding[0] = std::make_pair(1, 1);
357  std::size_t same_bytes = same.size() * sizeof(DataType);
358  DataType* d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes));
359  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_same(d_same, same.dimensions());
360  gpu_same.device(sycl_device) = gpu_input.pad(padding).convolve(gpu_kernel, dims);
361  sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
362 
363  VERIFY_IS_EQUAL(same.dimension(0), 3);
364  VERIFY_IS_APPROX(same(0), 1.0f);
365  VERIFY_IS_APPROX(same(1), 2.5f);
366  VERIFY_IS_APPROX(same(2), 4.0f);
367 
368  // Emulate FULL mode (as defined in
369  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
370  padding[0] = std::make_pair(2, 2);
371 
373  std::size_t full_bytes = full.size() * sizeof(DataType);
374  DataType* d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes));
375  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_full(d_full, full.dimensions());
376  gpu_full.device(sycl_device) = gpu_input.pad(padding).convolve(gpu_kernel, dims);
377  sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
378 
379  VERIFY_IS_EQUAL(full.dimension(0), 5);
380  VERIFY_IS_APPROX(full(0), 0.0f);
381  VERIFY_IS_APPROX(full(1), 1.0f);
382  VERIFY_IS_APPROX(full(2), 2.5f);
383  VERIFY_IS_APPROX(full(3), 4.0f);
384  VERIFY_IS_APPROX(full(4), 1.5f);
385 
386  sycl_device.deallocate(d_input);
387  sycl_device.deallocate(d_kernel);
388  sycl_device.deallocate(d_valid);
389  sycl_device.deallocate(d_same);
390  sycl_device.deallocate(d_full);
391 }
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), VERIFY_IS_APPROX, and VERIFY_IS_EQUAL.

◆ test_strides()

template<typename DataType , int DataLayout, typename IndexType >
static void test_strides ( const Eigen::SyclDevice &  sycl_device)
static
394  {
395  Eigen::array<IndexType, 1> input_dims = {{13}};
396  Eigen::array<IndexType, 1> kernel_dims = {{3}};
397 
399  Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
401 
402  input.setRandom();
403  kernel.setRandom();
405  dims[0] = 0;
406 
407  Eigen::array<IndexType, 1> stride_of_3;
408  stride_of_3[0] = 3;
409  Eigen::array<IndexType, 1> stride_of_2;
410  stride_of_2[0] = 2;
411 
412  std::size_t input_bytes = input.size() * sizeof(DataType);
413  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
414  std::size_t result_bytes = result.size() * sizeof(DataType);
415 
416  DataType* d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
417  DataType* d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
418  DataType* d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
419 
421  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
422  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_result(d_result, result.dimensions());
423  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
424  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
425 
426  gpu_result.device(sycl_device) = gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
427  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
428 
429  VERIFY_IS_EQUAL(result.dimension(0), 2);
430  VERIFY_IS_APPROX(result(0), (input(0) * kernel(0) + input(3) * kernel(1) + input(6) * kernel(2)));
431  VERIFY_IS_APPROX(result(1), (input(6) * kernel(0) + input(9) * kernel(1) + input(12) * kernel(2)));
432 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimension(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), Eigen::TensorBase< Derived, AccessLevel >::stride(), VERIFY_IS_APPROX, and VERIFY_IS_EQUAL.

Variable Documentation

◆ error_threshold