sycl_basic.cpp File Reference
#include "main.h"
#include <Eigen/Dense>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int
 
#define EIGEN_USE_SYCL
 

Functions

template<bool verifyNan = false, bool singleTask = false, typename Operation , typename Input , typename Output >
void run_and_verify (Operation &ope, size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_coeff_wise (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_complex_sqrt (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_complex_operators (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_redux (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_replicate (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType1 , typename DataType2 , typename Input , typename Output >
void test_product (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType1 , typename DataType2 , typename Input , typename Output >
void test_diagonal (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_eigenvalues_direct (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_matrix_inverse (size_t num_elements, const Input &in, Output &out)
 
template<typename DataType , typename Input , typename Output >
void test_numeric_limits (const Input &in, Output &out)
 
 EIGEN_DECLARE_TEST (sycl_basic)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( sycl_basic  )
341  {
342  Eigen::VectorXf in, out;
343  Eigen::VectorXcf cfin, cfout;
344 
345  constexpr size_t num_elements = 100;
346  constexpr size_t data_size = num_elements * 512;
347  in.setRandom(data_size);
348  out.setConstant(data_size, -1);
349  cfin.setRandom(data_size);
350  cfout.setConstant(data_size, -1);
351 
352  CALL_SUBTEST(test_coeff_wise<Vector3f>(num_elements, in, out));
353  CALL_SUBTEST(test_coeff_wise<Array44f>(num_elements, in, out));
354 
355  CALL_SUBTEST(test_complex_operators<Vector3cf>(num_elements, cfin, cfout));
356  CALL_SUBTEST(test_complex_sqrt<Vector3cf>(num_elements, cfin, cfout));
357 
358  CALL_SUBTEST(test_redux<Array4f>(num_elements, in, out));
359  CALL_SUBTEST(test_redux<Matrix3f>(num_elements, in, out));
360 
361  CALL_SUBTEST(test_replicate<Array4f>(num_elements, in, out));
362  CALL_SUBTEST(test_replicate<Array33f>(num_elements, in, out));
363 
364  auto test_prod_mm = [&]() { test_product<Matrix3f, Matrix3f>(num_elements, in, out); };
365  auto test_prod_mv = [&]() { test_product<Matrix4f, Vector4f>(num_elements, in, out); };
366  CALL_SUBTEST(test_prod_mm());
367  CALL_SUBTEST(test_prod_mv());
368 
369  auto test_diagonal_mv3f = [&]() { test_diagonal<Matrix3f, Vector3f>(num_elements, in, out); };
370  auto test_diagonal_mv4f = [&]() { test_diagonal<Matrix4f, Vector4f>(num_elements, in, out); };
371  CALL_SUBTEST(test_diagonal_mv3f());
372  CALL_SUBTEST(test_diagonal_mv4f());
373 
374  CALL_SUBTEST(test_eigenvalues_direct<Matrix3f>(num_elements, in, out));
375  CALL_SUBTEST(test_eigenvalues_direct<Matrix2f>(num_elements, in, out));
376 
377  CALL_SUBTEST(test_matrix_inverse<Matrix2f>(num_elements, in, out));
378  CALL_SUBTEST(test_matrix_inverse<Matrix3f>(num_elements, in, out));
379  CALL_SUBTEST(test_matrix_inverse<Matrix4f>(num_elements, in, out));
380 
381  CALL_SUBTEST(test_numeric_limits<Vector3f>(in, out));
382 }
#define CALL_SUBTEST(FUNC)
Definition: main.h:382
std::ofstream out("Result.txt")

References CALL_SUBTEST, and out().

◆ run_and_verify()

template<bool verifyNan = false, bool singleTask = false, typename Operation , typename Input , typename Output >
void run_and_verify ( Operation &  ope,
size_t  num_elements,
const Input &  in,
Output &  out 
)
22  {
23  Output out_gpu, out_cpu;
24  out_gpu = out_cpu = out;
25  auto queue = sycl::queue{sycl::default_selector_v};
26 
27  auto in_size_bytes = sizeof(typename Input::Scalar) * in.size();
28  auto out_size_bytes = sizeof(typename Output::Scalar) * out.size();
29  auto in_d = sycl::malloc_device<typename Input::Scalar>(in.size(), queue);
30  auto out_d = sycl::malloc_device<typename Output::Scalar>(out.size(), queue);
31 
32  queue.memcpy(in_d, in.data(), in_size_bytes).wait();
33  queue.memcpy(out_d, out.data(), out_size_bytes).wait();
34 
35  if constexpr (singleTask) {
36  queue.single_task([=]() { ope(in_d, out_d); }).wait();
37  } else {
38  queue
39  .parallel_for(sycl::range{num_elements},
40  [=](sycl::id<1> idx) {
41  auto id = idx[0];
42  ope(id, in_d, out_d);
43  })
44  .wait();
45  }
46 
47  queue.memcpy(out_gpu.data(), out_d, out_size_bytes).wait();
48 
49  sycl::free(in_d, queue);
50  sycl::free(out_d, queue);
51 
52  queue.throw_asynchronous();
53 
54  // Run on CPU and compare the output
55  if constexpr (singleTask == 1) {
56  ope(in.data(), out_cpu.data());
57  } else {
58  for (size_t i = 0; i < num_elements; ++i) {
59  ope(i, in.data(), out_cpu.data());
60  }
61  }
62  if constexpr (verifyNan) {
63  VERIFY_IS_CWISE_APPROX(out_gpu, out_cpu);
64  } else {
65  VERIFY_IS_APPROX(out_gpu, out_cpu);
66  }
67 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
SCALAR Scalar
Definition: bench_gemm.cpp:45
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
#define VERIFY_IS_CWISE_APPROX(a, b)
Definition: main.h:376

References i, out(), VERIFY_IS_APPROX, and VERIFY_IS_CWISE_APPROX.

Referenced by test_coeff_wise(), test_diagonal(), test_eigenvalues_direct(), test_matrix_inverse(), test_product(), test_redux(), and test_replicate().

◆ test_coeff_wise()

template<typename DataType , typename Input , typename Output >
void test_coeff_wise ( size_t  num_elements,
const Input &  in,
Output &  out 
)
70  {
71  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
72  DataType x1(in + i);
73  DataType x2(in + i + 1);
74  DataType x3(in + i + 2);
75  Map<DataType> res(out + i * DataType::MaxSizeAtCompileTime);
76 
77  res.array() += (in[0] * x1 + x2).array() * x3.array();
78  };
79 
80  run_and_verify(operation, num_elements, in, out);
81 }
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition: PartialRedux_count.cpp:3
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:96
std::array< T, N > array
Definition: EmulateArray.h:231
Vector< double > x1(const Vector< double > &coord)
Cartesian coordinates centered at the point (0.5,1)
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:86
Vector< double > x2(const Vector< double > &coord)
Cartesian coordinates centered at the point (1.5,1)
Definition: poisson/poisson_with_singularity/two_d_poisson.cc:102
void run_and_verify(Operation &ope, size_t num_elements, const Input &in, Output &out)
Definition: sycl_basic.cpp:22

References i, out(), res, run_and_verify(), Global_parameters::x1(), and Global_parameters::x2().

◆ test_complex_operators()

template<typename DataType , typename Input , typename Output >
void test_complex_operators ( size_t  num_elements,
const Input &  in,
Output &  out 
)
129  {
130  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
131  using namespace Eigen;
132  typedef typename DataType::Scalar ComplexType;
133  typedef typename DataType::Scalar::value_type ValueType;
134  const int num_scalar_operators = 24;
135  const int num_vector_operators = 23; // no unary + operator.
136  size_t out_idx = i * (num_scalar_operators + num_vector_operators * DataType::MaxSizeAtCompileTime);
137 
138  // Scalar operators.
139  const ComplexType a = in[i];
140  const ComplexType b = in[i + 1];
141 
142  out[out_idx++] = +a;
143  out[out_idx++] = -a;
144 
145  out[out_idx++] = a + b;
146  out[out_idx++] = a + numext::real(b);
147  out[out_idx++] = numext::real(a) + b;
148  out[out_idx++] = a - b;
149  out[out_idx++] = a - numext::real(b);
150  out[out_idx++] = numext::real(a) - b;
151  out[out_idx++] = a * b;
152  out[out_idx++] = a * numext::real(b);
153  out[out_idx++] = numext::real(a) * b;
154  out[out_idx++] = a / b;
155  out[out_idx++] = a / numext::real(b);
156  out[out_idx++] = numext::real(a) / b;
157 
158  out[out_idx] = a;
159  out[out_idx++] += b;
160  out[out_idx] = a;
161  out[out_idx++] -= b;
162  out[out_idx] = a;
163  out[out_idx++] *= b;
164  out[out_idx] = a;
165  out[out_idx++] /= b;
166 
167  const ComplexType true_value = ComplexType(ValueType(1), ValueType(0));
168  const ComplexType false_value = ComplexType(ValueType(0), ValueType(0));
169  out[out_idx++] = (a == b ? true_value : false_value);
170  out[out_idx++] = (a == numext::real(b) ? true_value : false_value);
171  out[out_idx++] = (numext::real(a) == b ? true_value : false_value);
172  out[out_idx++] = (a != b ? true_value : false_value);
173  out[out_idx++] = (a != numext::real(b) ? true_value : false_value);
174  out[out_idx++] = (numext::real(a) != b ? true_value : false_value);
175 
176  // Vector versions.
177  DataType x1(in + i);
178  DataType x2(in + i + 1);
179  const int res_size = DataType::MaxSizeAtCompileTime * num_scalar_operators;
180  const int size = DataType::MaxSizeAtCompileTime;
181  int block_idx = 0;
182 
183  Map<VectorX<ComplexType>> res(out + out_idx, res_size);
184  res.segment(block_idx, size) = -x1;
185  block_idx += size;
186 
187  res.segment(block_idx, size) = x1 + x2;
188  block_idx += size;
189  res.segment(block_idx, size) = x1 + x2.real();
190  block_idx += size;
191  res.segment(block_idx, size) = x1.real() + x2;
192  block_idx += size;
193  res.segment(block_idx, size) = x1 - x2;
194  block_idx += size;
195  res.segment(block_idx, size) = x1 - x2.real();
196  block_idx += size;
197  res.segment(block_idx, size) = x1.real() - x2;
198  block_idx += size;
199  res.segment(block_idx, size) = x1.array() * x2.array();
200  block_idx += size;
201  res.segment(block_idx, size) = x1.array() * x2.real().array();
202  block_idx += size;
203  res.segment(block_idx, size) = x1.real().array() * x2.array();
204  block_idx += size;
205  res.segment(block_idx, size) = x1.array() / x2.array();
206  block_idx += size;
207  res.segment(block_idx, size) = x1.array() / x2.real().array();
208  block_idx += size;
209  res.segment(block_idx, size) = x1.real().array() / x2.array();
210  block_idx += size;
211 
212  res.segment(block_idx, size) = x1;
213  res.segment(block_idx, size) += x2;
214  block_idx += size;
215  res.segment(block_idx, size) = x1;
216  res.segment(block_idx, size) -= x2;
217  block_idx += size;
218  res.segment(block_idx, size) = x1;
219  res.segment(block_idx, size).array() *= x2.array();
220  block_idx += size;
221  res.segment(block_idx, size) = x1;
222  res.segment(block_idx, size).array() /= x2.array();
223  block_idx += size;
224 
225  const DataType true_vector = DataType::Constant(true_value);
226  const DataType false_vector = DataType::Constant(false_value);
227  res.segment(block_idx, size) = (x1 == x2 ? true_vector : false_vector);
228  block_idx += size;
229  res.segment(block_idx, size) = (x1 == x2.real() ? true_vector : false_vector);
230  block_idx += size;
231  // res.segment(block_idx, size) = (x1.real() == x2) ? true_vector : false_vector;
232  // block_idx += size;
233  res.segment(block_idx, size) = (x1 != x2 ? true_vector : false_vector);
234  block_idx += size;
235  res.segment(block_idx, size) = (x1 != x2.real() ? true_vector : false_vector);
236  block_idx += size;
237  // res.segment(block_idx, size) = (x1.real() != x2 ? true_vector : false_vector);
238  // block_idx += size;
239  };
240  run_and_verify<true>(operation, num_elements, in, out);
241 }
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
Scalar * b
Definition: benchVecAdd.cpp:17
float real
Definition: datatypes.h:10
const Scalar * a
Definition: level2_cplx_impl.h:32
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

References a, b, i, out(), res, size, Global_parameters::x1(), and Global_parameters::x2().

◆ test_complex_sqrt()

template<typename DataType , typename Input , typename Output >
void test_complex_sqrt ( size_t  num_elements,
const Input &  in,
Output &  out 
)
84  {
85  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
86  using namespace Eigen;
87  typedef typename DataType::Scalar ComplexType;
88  typedef typename DataType::Scalar::value_type ValueType;
89  const int num_special_inputs = 18;
90 
91  if (i == 0) {
92  const ValueType nan = std::numeric_limits<ValueType>::quiet_NaN();
94  SpecialInputs special_in;
95  special_in.setZero();
96  int idx = 0;
97  special_in[idx++] = ComplexType(0, 0);
98  special_in[idx++] = ComplexType(-0, 0);
99  special_in[idx++] = ComplexType(0, -0);
100  special_in[idx++] = ComplexType(-0, -0);
101  const ValueType inf = std::numeric_limits<ValueType>::infinity();
102  special_in[idx++] = ComplexType(1.0, inf);
103  special_in[idx++] = ComplexType(nan, inf);
104  special_in[idx++] = ComplexType(1.0, -inf);
105  special_in[idx++] = ComplexType(nan, -inf);
106  special_in[idx++] = ComplexType(-inf, 1.0);
107  special_in[idx++] = ComplexType(inf, 1.0);
108  special_in[idx++] = ComplexType(-inf, -1.0);
109  special_in[idx++] = ComplexType(inf, -1.0);
110  special_in[idx++] = ComplexType(-inf, nan);
111  special_in[idx++] = ComplexType(inf, nan);
112  special_in[idx++] = ComplexType(1.0, nan);
113  special_in[idx++] = ComplexType(nan, 1.0);
114  special_in[idx++] = ComplexType(nan, -1.0);
115  special_in[idx++] = ComplexType(nan, nan);
116 
117  Map<SpecialInputs> special_out(out);
118  special_out = special_in.cwiseSqrt();
119  }
120 
121  DataType x1(in + i);
122  Map<DataType> res(out + num_special_inputs + i * DataType::MaxSizeAtCompileTime);
123  res = x1.cwiseSqrt();
124  };
125  run_and_verify<true>(operation, num_elements, in, out);
126 }
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
EIGEN_DEVICE_FUNC Derived & setZero(Index size)
Definition: CwiseNullaryOp.h:569
const Mdouble inf
Definition: GeneralDefine.h:23

References i, constants::inf, out(), res, Eigen::PlainObjectBase< Derived >::setZero(), and Global_parameters::x1().

◆ test_diagonal()

template<typename DataType1 , typename DataType2 , typename Input , typename Output >
void test_diagonal ( size_t  num_elements,
const Input &  in,
Output &  out 
)
292  {
293  auto operation = [](size_t i, const typename DataType1::Scalar* in, typename DataType1::Scalar* out) {
294  using namespace Eigen;
295  DataType1 x1(in + i);
296  Map<DataType2> res(out + i * DataType2::MaxSizeAtCompileTime);
297  res += x1.diagonal();
298  };
299  run_and_verify(operation, num_elements, in, out);
300 }

References i, out(), res, run_and_verify(), and Global_parameters::x1().

◆ test_eigenvalues_direct()

template<typename DataType , typename Input , typename Output >
void test_eigenvalues_direct ( size_t  num_elements,
const Input &  in,
Output &  out 
)
303  {
304  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
305  using namespace Eigen;
307  DataType M(in + i);
308  Map<Vec> res(out + i * Vec::MaxSizeAtCompileTime);
309  DataType A = M * M.adjoint();
311  eig.computeDirect(A);
312  res = eig.eigenvalues();
313  };
314  run_and_verify(operation, num_elements, in, out);
315 }
Matrix< RealScalar, Dynamic, Dynamic > M
Definition: bench_gemm.cpp:50
Computes eigenvalues and eigenvectors of selfadjoint matrices.
Definition: SelfAdjointEigenSolver.h:82
EIGEN_DEVICE_FUNC SelfAdjointEigenSolver & computeDirect(const MatrixType &matrix, int options=ComputeEigenvectors)
Computes eigendecomposition of given matrix using a closed-form algorithm.
Definition: SelfAdjointEigenSolver.h:777
EIGEN_DEVICE_FUNC const RealVectorType & eigenvalues() const
Returns the eigenvalues of given matrix.
Definition: SelfAdjointEigenSolver.h:300
Matrix< Scalar, Dynamic, 1 > Vec
Definition: gemv_common.h:17

References Eigen::SelfAdjointEigenSolver< MatrixType_ >::computeDirect(), Eigen::SelfAdjointEigenSolver< MatrixType_ >::eigenvalues(), i, out(), res, and run_and_verify().

◆ test_matrix_inverse()

template<typename DataType , typename Input , typename Output >
void test_matrix_inverse ( size_t  num_elements,
const Input &  in,
Output &  out 
)
318  {
319  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
320  using namespace Eigen;
321  DataType M(in + i);
322  Map<DataType> res(out + i * DataType::MaxSizeAtCompileTime);
323  res = M.inverse();
324  };
325  run_and_verify(operation, num_elements, in, out);
326 }

References i, out(), res, and run_and_verify().

◆ test_numeric_limits()

template<typename DataType , typename Input , typename Output >
void test_numeric_limits ( const Input &  in,
Output &  out 
)
329  {
330  auto operation = [](const typename DataType::Scalar* in, typename DataType::Scalar* out) {
335  out[3] = numext::numeric_limits<float>::infinity();
336  out[4] = numext::numeric_limits<float>::quiet_NaN();
337  };
338  run_and_verify<true, true>(operation, 1, in, out);
339 }
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
#define min(a, b)
Definition: datatypes.h:22
#define max(a, b)
Definition: datatypes.h:23
double epsilon
Definition: osc_ring_sarah_asymptotics.h:43

References EIGEN_UNUSED_VARIABLE, oomph::SarahBL::epsilon, max, min, and out().

◆ test_product()

template<typename DataType1 , typename DataType2 , typename Input , typename Output >
void test_product ( size_t  num_elements,
const Input &  in,
Output &  out 
)
279  {
280  auto operation = [](size_t i, const typename DataType1::Scalar* in, typename DataType1::Scalar* out) {
281  using namespace Eigen;
283  DataType1 x1(in + i);
284  DataType2 x2(in + i + 1);
285  Map<DataType3> res(out + i * DataType3::MaxSizeAtCompileTime);
286  res += in[i] * x1 * x2;
287  };
288  run_and_verify(operation, num_elements, in, out);
289 }

References i, out(), res, run_and_verify(), Global_parameters::x1(), and Global_parameters::x2().

◆ test_redux()

template<typename DataType , typename Input , typename Output >
void test_redux ( size_t  num_elements,
const Input &  in,
Output &  out 
)
244  {
245  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
246  using namespace Eigen;
247  int N = 10;
248  DataType x1(in + i);
249  out[i * N + 0] = x1.minCoeff();
250  out[i * N + 1] = x1.maxCoeff();
251  out[i * N + 2] = x1.sum();
252  out[i * N + 3] = x1.prod();
253  out[i * N + 4] = x1.matrix().squaredNorm();
254  out[i * N + 5] = x1.matrix().norm();
255  out[i * N + 6] = x1.colwise().sum().maxCoeff();
256  out[i * N + 7] = x1.rowwise().maxCoeff().sum();
257  out[i * N + 8] = x1.matrix().colwise().squaredNorm().sum();
258  };
259  run_and_verify(operation, num_elements, in, out);
260 }
@ N
Definition: constructor.cpp:22

References i, N, out(), run_and_verify(), and Global_parameters::x1().

◆ test_replicate()

template<typename DataType , typename Input , typename Output >
void test_replicate ( size_t  num_elements,
const Input &  in,
Output &  out 
)
263  {
264  auto operation = [](size_t i, const typename DataType::Scalar* in, typename DataType::Scalar* out) {
265  using namespace Eigen;
266  DataType x1(in + i);
267  int step = x1.size() * 4;
268  int stride = 3 * step;
269 
271  MapType(out + i * stride + 0 * step, x1.rows() * 2, x1.cols() * 2) = x1.replicate(2, 2);
272  MapType(out + i * stride + 1 * step, x1.rows() * 3, x1.cols()) = in[i] * x1.colwise().replicate(3);
273  MapType(out + i * stride + 2 * step, x1.rows(), x1.cols() * 3) = in[i] * x1.rowwise().replicate(3);
274  };
275  run_and_verify(operation, num_elements, in, out);
276 }
Map< MatrixType > MapType
Definition: Tutorial_Map_using.cpp:2

References i, out(), run_and_verify(), and Global_parameters::x1().