cxx11_tensor_broadcast_sycl.cpp File Reference
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<typename DataType , int DataLayout, typename IndexType >
static void test_broadcast_sycl_fixed (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType , int DataLayout, typename IndexType >
static void test_broadcast_sycl (const Eigen::SyclDevice &sycl_device)
 
template<typename DataType >
void sycl_broadcast_test_per_device (const cl::sycl::device &d)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_broadcast_sycl)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_broadcast_sycl  )
135  {
136  for (const auto& device : Eigen::get_sycl_supported_devices()) {
137  CALL_SUBTEST(sycl_broadcast_test_per_device<half>(device));
138  CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
139  }
140 }
#define CALL_SUBTEST(FUNC)
Definition: main.h:382

References CALL_SUBTEST.

◆ sycl_broadcast_test_per_device()

template<typename DataType >
void sycl_broadcast_test_per_device ( const cl::sycl::device &  d)
125  {
126  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
127  QueueInterface queueInterface(d);
128  auto sycl_device = Eigen::SyclDevice(&queueInterface);
129  test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
130  test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
131  test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
132  test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
133 }

◆ test_broadcast_sycl()

template<typename DataType , int DataLayout, typename IndexType >
static void test_broadcast_sycl ( const Eigen::SyclDevice &  sycl_device)
static
77  {
78  // BROADCAST test:
79  IndexType inDim1 = 2;
80  IndexType inDim2 = 3;
81  IndexType inDim3 = 5;
82  IndexType inDim4 = 7;
83  IndexType bDim1 = 2;
84  IndexType bDim2 = 3;
85  IndexType bDim3 = 1;
86  IndexType bDim4 = 4;
87  array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
88  array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
89  array<IndexType, 4> out_range; // = in_range * broadcasts
90  for (size_t i = 0; i < out_range.size(); ++i) out_range[i] = in_range[i] * broadcasts[i];
91 
94 
95  for (size_t i = 0; i < in_range.size(); ++i) VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
96 
97  for (IndexType i = 0; i < input.size(); ++i) input(i) = static_cast<DataType>(i);
98 
99  DataType* gpu_in_data =
100  static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize() * sizeof(DataType)));
101  DataType* gpu_out_data =
102  static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize() * sizeof(DataType)));
103 
104  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
105  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
106  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(), (input.dimensions().TotalSize()) * sizeof(DataType));
107  gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
108  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, (out.dimensions().TotalSize()) * sizeof(DataType));
109 
110  for (IndexType i = 0; i < inDim1 * bDim1; ++i) {
111  for (IndexType j = 0; j < inDim2 * bDim2; ++j) {
112  for (IndexType k = 0; k < inDim3 * bDim3; ++k) {
113  for (IndexType l = 0; l < inDim4 * bDim4; ++l) {
114  VERIFY_IS_APPROX(input(i % inDim1, j % inDim2, k % inDim3, l % inDim4), out(i, j, k, l));
115  }
116  }
117  }
118  }
119  printf("Broadcast Test Passed\n");
120  sycl_device.deallocate(gpu_in_data);
121  sycl_device.deallocate(gpu_out_data);
122 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::array< T, N > array
Definition: EmulateArray.h:231
std::ofstream out("Result.txt")
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, k, out(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), VERIFY_IS_APPROX, and VERIFY_IS_EQUAL.

◆ test_broadcast_sycl_fixed()

template<typename DataType , int DataLayout, typename IndexType >
static void test_broadcast_sycl_fixed ( const Eigen::SyclDevice &  sycl_device)
static
29  {
30  // BROADCAST test:
31  IndexType inDim1 = 2;
32  IndexType inDim2 = 3;
33  IndexType inDim3 = 5;
34  IndexType inDim4 = 7;
35  IndexType bDim1 = 2;
36  IndexType bDim2 = 3;
37  IndexType bDim3 = 1;
38  IndexType bDim4 = 4;
39  array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
40  array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
41  array<IndexType, 4> out_range; // = in_range * broadcasts
42  for (size_t i = 0; i < out_range.size(); ++i) out_range[i] = in_range[i] * broadcasts[i];
43 
46 
47  for (size_t i = 0; i < in_range.size(); ++i) VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
48 
49  for (IndexType i = 0; i < input.size(); ++i) input(i) = static_cast<DataType>(i);
50 
51  DataType* gpu_in_data =
52  static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize() * sizeof(DataType)));
53  DataType* gpu_out_data =
54  static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize() * sizeof(DataType)));
55 
56  TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
57  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
58  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(), (input.dimensions().TotalSize()) * sizeof(DataType));
59  gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
60  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, (out.dimensions().TotalSize()) * sizeof(DataType));
61 
62  for (IndexType i = 0; i < inDim1 * bDim1; ++i) {
63  for (IndexType j = 0; j < inDim2 * bDim2; ++j) {
64  for (IndexType k = 0; k < inDim3 * bDim3; ++k) {
65  for (IndexType l = 0; l < inDim4 * bDim4; ++l) {
66  VERIFY_IS_APPROX(input(i % 2, j % 3, k % 5, l % 7), out(i, j, k, l));
67  }
68  }
69  }
70  }
71  printf("Broadcast Test with fixed size Passed\n");
72  sycl_device.deallocate(gpu_in_data);
73  sycl_device.deallocate(gpu_out_data);
74 }
static const int DataLayout
Definition: cxx11_tensor_image_patch_sycl.cpp:24

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), DataLayout, Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, j, k, out(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), VERIFY_IS_APPROX, and VERIFY_IS_EQUAL.