cxx11_tensor_contract_sycl.cpp File Reference
#include <algorithm>
#include <chrono>
#include <ctime>
#include <iostream>
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>

Macros

#define EIGEN_TEST_NO_LONGDOUBLE
 
#define EIGEN_TEST_NO_COMPLEX
 
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t
 
#define EIGEN_USE_SYCL
 

Functions

template<int DataLayout, typename DataType , typename IndexType , typename Device >
static void test_sycl_contraction (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_m (const Device &sycl_device)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_k (const Device &sycl_device)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_n (const Device &sycl_device)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_sizes (const Device &sycl_device)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
static void test_no_out_of_bounds (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_scalar (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_batch (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size, IndexType m_batch, IndexType start, IndexType limit)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_rhs_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_lhs_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_both_transposed (const Device &sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
 
template<typename Dev >
void tensorOutofBound (const Dev &sycl_device)
 
template<typename Dev >
void tensorTensor (const Dev &sycl_device)
 
template<typename Dev >
void tensorTensor_m (const Dev &sycl_device)
 
template<typename Dev >
void tensorTensor_n (const Dev &sycl_device)
 
template<typename Dev >
void tensorTensor_k (const Dev &sycl_device)
 
template<typename Dev >
void tensorTensor_sizes (const Dev &sycl_device)
 
template<typename Dev >
void vectorVector (const Dev &sycl_device)
 
template<typename Dev >
void vectorTensor (const Dev &sycl_device)
 
template<typename Dev >
void tensorVector (const Dev &sycl_device)
 
template<typename Dev >
void tensorScalar (const Dev &sycl_device)
 
template<typename Dev >
void skinnyTensor_row (const Dev &sycl_device)
 
template<typename Dev >
void skinnyTensor_col (const Dev &sycl_device)
 
template<typename Dev >
void tensor_contraction_batch_per_device (const Dev &sycl_device)
 
template<typename Dev >
void tensor_contraction_lhs_transposed_per_device (const Dev &sycl_device)
 
template<typename Dev >
void tensor_contraction_rhs_transposed_per_device (const Dev &sycl_device)
 
template<typename Dev >
void tensor_contraction_both_transposed_per_device (const Dev &sycl_device)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_contract_sycl)
 

Macro Definition Documentation

◆ EIGEN_DEFAULT_DENSE_INDEX_TYPE

#define EIGEN_DEFAULT_DENSE_INDEX_TYPE   int64_t

◆ EIGEN_TEST_NO_COMPLEX

#define EIGEN_TEST_NO_COMPLEX

◆ EIGEN_TEST_NO_LONGDOUBLE

#define EIGEN_TEST_NO_LONGDOUBLE

◆ EIGEN_USE_SYCL

#define EIGEN_USE_SYCL

Function Documentation

◆ contraction_batch()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_batch ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size,
IndexType  m_batch,
IndexType  start,
IndexType  limit 
)
271  {
273  static const DataType error_threshold = DataType(1e-4);
274  typedef Eigen::array<IndexType, 3> TensorDim;
276  TensorDim left_dims = {{m_batch, k_size, m_size}};
277  TensorDim right_dims = {{m_batch, n_size, k_size}};
278  TensorDim res_dims = {{m_batch, m_size, n_size}};
279  Eigen::array<DimPair, 1> contract_pairs = {{DimPair(0, 1)}};
280 
281  TensorType t_left(left_dims);
282  TensorType t_right(right_dims);
283  TensorType t_result_gpu(res_dims);
284  TensorType t_result(res_dims);
285 
286  t_left.setRandom();
287  t_right.setRandom();
288 
289  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
290  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
291  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
292 
293  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
294  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
295  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
296 
297  Eigen::TensorMap<TensorType> gpu_t_left(d_t_left, left_dims);
298  Eigen::TensorMap<TensorType> gpu_t_right(d_t_right, right_dims);
299  Eigen::TensorMap<TensorType> gpu_t_result(d_t_result, res_dims);
300 
301  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
302  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
303  for (int i = start; i < limit; ++i) {
304  auto x = gpu_t_left.template chip<0>(i);
305  auto y = gpu_t_right.template chip<0>(i);
306  auto z = gpu_t_result.template chip<0>(i);
307  z.device(sycl_device) = x.contract(y, contract_pairs);
308  }
309  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
310 
311  for (int i = start; i < limit; ++i) {
312  auto x = t_left.template chip<0>(i);
313  auto y = t_right.template chip<0>(i);
314  auto z = t_result.template chip<0>(i);
315  z = x.contract(y, contract_pairs);
316  }
317 
318  for (IndexType i = 0; i < t_result.size(); i++) {
319  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(i) - t_result_gpu(i)))) < error_threshold) {
320  continue;
321  }
322  if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
323  continue;
324  }
325  std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) << " vs " << t_result_gpu(i)
326  << std::endl;
327  VERIFY_IS_APPROX(t_result_gpu(i), t_result(i));
328  }
329  sycl_device.deallocate(d_t_left);
330  sycl_device.deallocate(d_t_right);
331  sycl_device.deallocate(d_t_result);
332 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Array< double, 1, 3 > e(1./3., 0.5, 2.)
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
The tensor class.
Definition: Tensor.h:68
Tensor< float, 1 >::DimensionPair DimPair
Definition: cxx11_tensor_contraction.cpp:17
static const float error_threshold
Definition: cxx11_tensor_convolution_sycl.cpp:32
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
Scalar * y
Definition: level1_cplx_impl.h:128
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
Definition: MathFunctions.h:1923
std::array< T, N > array
Definition: EmulateArray.h:231
Real fabs(const Real &a)
Definition: boostmultiprec.cpp:117
void start(const unsigned &i)
(Re-)start i-th timer
Definition: oomph_utilities.cc:243
list x
Definition: plotDoE.py:28

References e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), oomph::CumulativeTimings::start(), VERIFY_IS_APPROX, plotDoE::x, and y.

◆ contraction_both_transposed()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_both_transposed ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
445  {
447  static const DataType error_threshold = DataType(1e-4);
448  Eigen::array<IndexType, 2> left_dims = {{k_size, m_size}};
449  Eigen::array<IndexType, 2> right_dims = {{n_size, k_size}};
450  Eigen::array<IndexType, 2> res_dims = {{m_size, n_size}};
451  Eigen::array<DimPair, 1> dims = {{DimPair(0, 1)}};
452 
454  Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims);
455  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims);
457 
458  t_left.setRandom();
459  t_right.setRandom();
460 
461  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
462  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
463  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
464 
465  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
466  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
467  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
468 
470  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_right(d_t_right, right_dims);
471  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_result(d_t_result, res_dims);
472 
473  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
474  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
475 
476  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
477  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
478 
479  t_result = t_left.contract(t_right, dims);
480 
481  for (IndexType i = 0; i < t_result.size(); i++) {
482  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(i) - t_result_gpu(i)))) < error_threshold) {
483  continue;
484  }
485  if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
486  continue;
487  }
488  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size << ", mismatch detected at IndexType "
489  << i << ": " << t_result(i) << " vs " << t_result_gpu(i) << std::endl;
490 
491  VERIFY_IS_APPROX(t_result_gpu(i), t_result(i));
492  }
493  sycl_device.deallocate(d_t_left);
494  sycl_device.deallocate(d_t_right);
495  sycl_device.deallocate(d_t_result);
496 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ contraction_lhs_transposed()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_lhs_transposed ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
392  {
394  static const DataType error_threshold = DataType(1e-4);
395  Eigen::array<IndexType, 2> left_dims = {{k_size, m_size}};
396  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
397  Eigen::array<IndexType, 2> res_dims = {{m_size, n_size}};
398  Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
399 
401  Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims);
402  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims);
404 
405  t_left.setRandom();
406  t_right.setRandom();
407 
408  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
409  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
410  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
411 
412  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
413  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
414  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
415 
417  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_right(d_t_right, right_dims);
418  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_result(d_t_result, res_dims);
419 
420  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
421  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
422 
423  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
424  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
425 
426  t_result = t_left.contract(t_right, dims);
427 
428  for (IndexType i = 0; i < t_result.size(); i++) {
429  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(i) - t_result_gpu(i)))) < error_threshold) {
430  continue;
431  }
432  if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
433  continue;
434  }
435  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size << ", mismatch detected at IndexType "
436  << i << ": " << t_result(i) << " vs " << t_result_gpu(i) << std::endl;
437  VERIFY_IS_APPROX(t_result_gpu(i), t_result(i));
438  }
439  sycl_device.deallocate(d_t_left);
440  sycl_device.deallocate(d_t_right);
441  sycl_device.deallocate(d_t_result);
442 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ contraction_rhs_transposed()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void contraction_rhs_transposed ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
335  {
337  static const DataType error_threshold = DataType(1e-4);
338  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
339  Eigen::array<IndexType, 2> right_dims = {{n_size, k_size}};
340  Eigen::array<IndexType, 2> res_dims = {{m_size, n_size}};
341  Eigen::array<DimPair, 1> dims = {{DimPair(1, 1)}};
342 
344  Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims);
345  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims);
347 
348  t_left.setRandom();
349  t_right.setRandom();
350 
351  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
352  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
353  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
354 
355  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
356  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
357  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
358 
360  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_right(d_t_right, right_dims);
361  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_result(d_t_result, res_dims);
362 
363  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
364  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
365 
366  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
367  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
368 
369  t_result = t_left.contract(t_right, dims);
370 
371  for (IndexType j = 0; j < m_size; j++) {
372  for (IndexType i = 0; i < n_size; i++) {
373  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(j, i) - t_result_gpu(j, i)))) <
374  error_threshold) {
375  continue;
376  }
377  if (Eigen::internal::isApprox(t_result(j, i), t_result_gpu(j, i), error_threshold)) {
378  continue;
379  }
380  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size
381  << ", mismatch detected at IndexType m: " << j << " n: " << i << " CPU : " << t_result(j, i)
382  << " vs SYCL:" << t_result_gpu(j, i) << std::endl;
383  VERIFY_IS_APPROX(t_result_gpu(j, i), t_result(j, i));
384  }
385  }
386  sycl_device.deallocate(d_t_left);
387  sycl_device.deallocate(d_t_right);
388  sycl_device.deallocate(d_t_result);
389 }
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_contract_sycl  )
824  {
825  for (const auto &device : Eigen::get_sycl_supported_devices()) {
826  std::cout << "Running on " << device.template get_info<cl::sycl::info::device::name>() << std::endl;
827  QueueInterface queueInterface(device);
828  auto sycl_device = Eigen::SyclDevice(&queueInterface);
829  CALL_SUBTEST_1(tensorOutofBound(sycl_device));
830  CALL_SUBTEST_2(tensorTensor(sycl_device));
831  CALL_SUBTEST_2(tensorTensor_m(sycl_device));
832  CALL_SUBTEST_2(tensorTensor_n(sycl_device));
833  CALL_SUBTEST_2(tensorTensor_k(sycl_device));
834  CALL_SUBTEST_2(tensorTensor_sizes(sycl_device));
835  CALL_SUBTEST_3(vectorVector(sycl_device));
836  CALL_SUBTEST_4(vectorTensor(sycl_device));
837  CALL_SUBTEST_5(tensorVector(sycl_device));
838  CALL_SUBTEST_6(tensorScalar(sycl_device));
839  CALL_SUBTEST_7(skinnyTensor_row(sycl_device));
840  CALL_SUBTEST_7(skinnyTensor_col(sycl_device));
845  }
846 }
void tensorTensor_n(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:556
void tensor_contraction_batch_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:749
void vectorVector(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:605
void tensorTensor_k(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:573
void skinnyTensor_col(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:730
void tensorTensor_sizes(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:589
void vectorTensor(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:626
void tensorVector(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:655
void skinnyTensor_row(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:711
void tensorTensor_m(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:539
void tensor_contraction_lhs_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:765
void tensorOutofBound(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:499
void tensor_contraction_rhs_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:786
void tensorTensor(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:523
void tensorScalar(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:690
void tensor_contraction_both_transposed_per_device(const Dev &sycl_device)
Definition: cxx11_tensor_contract_sycl.cpp:808
#define CALL_SUBTEST_6(FUNC)
Definition: split_test_helper.h:34
#define CALL_SUBTEST_3(FUNC)
Definition: split_test_helper.h:16
#define CALL_SUBTEST_1(FUNC)
Definition: split_test_helper.h:4
#define CALL_SUBTEST_8(FUNC)
Definition: split_test_helper.h:46
#define CALL_SUBTEST_5(FUNC)
Definition: split_test_helper.h:28
#define CALL_SUBTEST_11(FUNC)
Definition: split_test_helper.h:64
#define CALL_SUBTEST_2(FUNC)
Definition: split_test_helper.h:10
#define CALL_SUBTEST_7(FUNC)
Definition: split_test_helper.h:40
#define CALL_SUBTEST_4(FUNC)
Definition: split_test_helper.h:22
#define CALL_SUBTEST_9(FUNC)
Definition: split_test_helper.h:52
#define CALL_SUBTEST_10(FUNC)
Definition: split_test_helper.h:58

References CALL_SUBTEST_1, CALL_SUBTEST_10, CALL_SUBTEST_11, CALL_SUBTEST_2, CALL_SUBTEST_3, CALL_SUBTEST_4, CALL_SUBTEST_5, CALL_SUBTEST_6, CALL_SUBTEST_7, CALL_SUBTEST_8, CALL_SUBTEST_9, skinnyTensor_col(), skinnyTensor_row(), tensor_contraction_batch_per_device(), tensor_contraction_both_transposed_per_device(), tensor_contraction_lhs_transposed_per_device(), tensor_contraction_rhs_transposed_per_device(), tensorOutofBound(), tensorScalar(), tensorTensor(), tensorTensor_k(), tensorTensor_m(), tensorTensor_n(), tensorTensor_sizes(), tensorVector(), vectorTensor(), and vectorVector().

◆ skinnyTensor_col()

template<typename Dev >
void skinnyTensor_col ( const Dev &  sycl_device)
inline
730  {
731  typedef float DataType;
732  typedef int64_t IndexType;
733  std::chrono::time_point<std::chrono::system_clock> start, end;
734  start = std::chrono::system_clock::now();
735  // Tensor Tensor Contraction
736  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 16, 4, 16);
737  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 257, 131073, 257);
738  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 256, 131072, 256);
739  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 16, 131073, 16);
740  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 17, 131072, 17);
741  end = std::chrono::system_clock::now();
742  std::chrono::duration<double> elapsed_seconds = end - start;
743  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
744  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
745  << "s\n";
746 }
static constexpr lastp1_t end
Definition: IndexedViewHelper.h:79
std::int64_t int64_t
Definition: Meta.h:43

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ skinnyTensor_row()

template<typename Dev >
void skinnyTensor_row ( const Dev &  sycl_device)
inline
711  {
712  typedef float DataType;
713  typedef int64_t IndexType;
714  std::chrono::time_point<std::chrono::system_clock> start, end;
715  start = std::chrono::system_clock::now();
716  // Tensor Tensor Contraction
717  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 16, 4, 16);
718  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 257, 131073, 257);
719  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 256, 131072, 256);
720  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 16, 131073, 16);
721  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 17, 131072, 17);
722  end = std::chrono::system_clock::now();
723  std::chrono::duration<double> elapsed_seconds = end - start;
724  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
725  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
726  << "s\n";
727 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensor_contraction_batch_per_device()

template<typename Dev >
void tensor_contraction_batch_per_device ( const Dev &  sycl_device)
inline
749  {
750  typedef float DataType;
751  typedef int64_t IndexType;
752  std::chrono::time_point<std::chrono::system_clock> start, end;
753  start = std::chrono::system_clock::now();
754 
755  contraction_batch<RowMajor, DataType, IndexType>(sycl_device, 64, 75, 30, 4, 0, 4);
756  contraction_batch<ColMajor, DataType, IndexType>(sycl_device, 64, 75, 30, 4, 0, 4);
757  end = std::chrono::system_clock::now();
758  std::chrono::duration<double> elapsed_seconds = end - start;
759  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
760  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
761  << "s\n";
762 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensor_contraction_both_transposed_per_device()

template<typename Dev >
void tensor_contraction_both_transposed_per_device ( const Dev &  sycl_device)
inline
808  {
809  typedef float DataType;
810  typedef int64_t IndexType;
811  std::chrono::time_point<std::chrono::system_clock> start, end;
812  start = std::chrono::system_clock::now();
813 
814  contraction_both_transposed<RowMajor, DataType, IndexType>(sycl_device, 17, 5, 17);
815  contraction_both_transposed<RowMajor, DataType, IndexType>(sycl_device, 32, 8, 32);
816  contraction_both_transposed<RowMajor, DataType, IndexType>(sycl_device, 64, 16, 64);
817  end = std::chrono::system_clock::now();
818  std::chrono::duration<double> elapsed_seconds = end - start;
819  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
820  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
821  << "s\n";
822 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensor_contraction_lhs_transposed_per_device()

template<typename Dev >
void tensor_contraction_lhs_transposed_per_device ( const Dev &  sycl_device)
inline
765  {
766  typedef float DataType;
767  typedef int64_t IndexType;
768  std::chrono::time_point<std::chrono::system_clock> start, end;
769  start = std::chrono::system_clock::now();
770 
771  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 8, 4, 8);
772  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 32, 8, 32);
773  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 64, 16, 64);
774  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 784, 2048, 1024);
775  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 1024, 10, 1024);
776  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 4096, 1024, 1024);
777  contraction_lhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 2048, 4096, 1024);
778  end = std::chrono::system_clock::now();
779  std::chrono::duration<double> elapsed_seconds = end - start;
780  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
781  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
782  << "s\n";
783 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensor_contraction_rhs_transposed_per_device()

template<typename Dev >
void tensor_contraction_rhs_transposed_per_device ( const Dev &  sycl_device)
inline
786  {
787  typedef float DataType;
788  typedef int64_t IndexType;
789  std::chrono::time_point<std::chrono::system_clock> start, end;
790  start = std::chrono::system_clock::now();
791 
792  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 16, 4, 16);
793  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 17, 5, 17);
794  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 32, 8, 32);
795  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 64, 16, 64);
796  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 10, 1024, 1024);
797  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 1024, 1024, 4096);
798  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 4096, 1024, 2048);
799  contraction_rhs_transposed<RowMajor, DataType, IndexType>(sycl_device, 2048, 1024, 784);
800  end = std::chrono::system_clock::now();
801  std::chrono::duration<double> elapsed_seconds = end - start;
802  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
803  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
804  << "s\n";
805 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorOutofBound()

template<typename Dev >
void tensorOutofBound ( const Dev &  sycl_device)
inline
499  {
500  typedef float DataType;
501  typedef int64_t IndexType;
502  std::chrono::time_point<std::chrono::system_clock> start, end;
503  start = std::chrono::system_clock::now();
504  // Test out of bound for Tensor-Tensor
505  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 10, 1024, 1024);
506  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 1024, 1024, 4096);
507  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 4096, 1024, 2048);
508  test_no_out_of_bounds<ColMajor, DataType, IndexType>(sycl_device, 784, 2048, 1024);
509  test_no_out_of_bounds<ColMajor, DataType, IndexType>(sycl_device, 2048, 1024, 784);
510  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 10, 1024, 10);
511  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 513, 4096, 513);
512  test_no_out_of_bounds<RowMajor, DataType, IndexType>(sycl_device, 783, 1024, 783);
513  test_no_out_of_bounds<ColMajor, DataType, IndexType>(sycl_device, 784, 2048, 784);
514  test_no_out_of_bounds<ColMajor, DataType, IndexType>(sycl_device, 11, 1024, 11);
515  end = std::chrono::system_clock::now();
516  std::chrono::duration<double> elapsed_seconds = end - start;
517  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
518  std::cout << "tensor out of bound tests finished computation at " << std::ctime(&end_time)
519  << "elapsed time: " << elapsed_seconds.count() << "s\n";
520 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorScalar()

template<typename Dev >
void tensorScalar ( const Dev &  sycl_device)
inline
690  {
691  typedef float DataType;
692  typedef int64_t IndexType;
693  std::chrono::time_point<std::chrono::system_clock> start, end;
694  start = std::chrono::system_clock::now();
695  // SCALAR Contraction
696  test_scalar<ColMajor, DataType, IndexType>(sycl_device, 127, 127, 127);
697  test_scalar<RowMajor, DataType, IndexType>(sycl_device, 127, 127, 127);
698  test_scalar<ColMajor, DataType, IndexType>(sycl_device, 128, 128, 128);
699  test_scalar<RowMajor, DataType, IndexType>(sycl_device, 128, 128, 128);
700  test_scalar<ColMajor, DataType, IndexType>(sycl_device, 129, 129, 129);
701  test_scalar<RowMajor, DataType, IndexType>(sycl_device, 129, 129, 129);
702 
703  end = std::chrono::system_clock::now();
704  std::chrono::duration<double> elapsed_seconds = end - start;
705  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
706  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
707  << "s\n";
708 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorTensor()

template<typename Dev >
void tensorTensor ( const Dev &  sycl_device)
inline
523  {
524  typedef float DataType;
525  typedef int64_t IndexType;
526  std::chrono::time_point<std::chrono::system_clock> start, end;
527  start = std::chrono::system_clock::now();
528  // Tensor Tensor Contraction
529  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 128, 128, 128);
530  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 128, 128, 128);
531  end = std::chrono::system_clock::now();
532  std::chrono::duration<double> elapsed_seconds = end - start;
533  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
534  std::cout << "tensor tensor tests finished computation at " << std::ctime(&end_time)
535  << "elapsed time: " << elapsed_seconds.count() << "s\n";
536 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorTensor_k()

template<typename Dev >
void tensorTensor_k ( const Dev &  sycl_device)
inline
573  {
574  typedef float DataType;
575  typedef int64_t IndexType;
576  std::chrono::time_point<std::chrono::system_clock> start, end;
577  start = std::chrono::system_clock::now();
578  test_sycl_contraction_k<ColMajor, DataType, IndexType>(sycl_device);
579  test_sycl_contraction_k<RowMajor, DataType, IndexType>(sycl_device);
580 
581  end = std::chrono::system_clock::now();
582  std::chrono::duration<double> elapsed_seconds = end - start;
583  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
584  std::cout << "tensor tensor tests finished computation at " << std::ctime(&end_time)
585  << "elapsed time: " << elapsed_seconds.count() << "s\n";
586 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorTensor_m()

template<typename Dev >
void tensorTensor_m ( const Dev &  sycl_device)
inline
539  {
540  typedef float DataType;
541  typedef int64_t IndexType;
542  std::chrono::time_point<std::chrono::system_clock> start, end;
543  start = std::chrono::system_clock::now();
544  // Tensor Tensor Contraction
545  test_sycl_contraction_m<ColMajor, DataType, IndexType>(sycl_device);
546  test_sycl_contraction_m<RowMajor, DataType, IndexType>(sycl_device);
547 
548  end = std::chrono::system_clock::now();
549  std::chrono::duration<double> elapsed_seconds = end - start;
550  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
551  std::cout << "tensor tensor tests finished computation at " << std::ctime(&end_time)
552  << "elapsed time: " << elapsed_seconds.count() << "s\n";
553 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorTensor_n()

template<typename Dev >
void tensorTensor_n ( const Dev &  sycl_device)
inline
556  {
557  typedef float DataType;
558  typedef int64_t IndexType;
559  std::chrono::time_point<std::chrono::system_clock> start, end;
560  start = std::chrono::system_clock::now();
561  // Tensor Tensor Contraction
562  test_sycl_contraction_n<ColMajor, DataType, IndexType>(sycl_device);
563  test_sycl_contraction_n<RowMajor, DataType, IndexType>(sycl_device);
564 
565  end = std::chrono::system_clock::now();
566  std::chrono::duration<double> elapsed_seconds = end - start;
567  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
568  std::cout << "tensor tensor tests finished computation at " << std::ctime(&end_time)
569  << "elapsed time: " << elapsed_seconds.count() << "s\n";
570 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorTensor_sizes()

template<typename Dev >
void tensorTensor_sizes ( const Dev &  sycl_device)
inline
589  {
590  typedef float DataType;
591  typedef int64_t IndexType;
592  std::chrono::time_point<std::chrono::system_clock> start, end;
593  start = std::chrono::system_clock::now();
594  // Tensor Tensor Contraction
595  test_sycl_contraction_sizes<ColMajor, DataType, IndexType>(sycl_device);
596  test_sycl_contraction_sizes<RowMajor, DataType, IndexType>(sycl_device);
597 
598  end = std::chrono::system_clock::now();
599  std::chrono::duration<double> elapsed_seconds = end - start;
600  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
601  std::cout << "tensor tensor tests finished computation at " << std::ctime(&end_time)
602  << "elapsed time: " << elapsed_seconds.count() << "s\n";
603 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ tensorVector()

template<typename Dev >
void tensorVector ( const Dev &  sycl_device)
inline
655  {
656  typedef float DataType;
657  typedef int64_t IndexType;
658  std::chrono::time_point<std::chrono::system_clock> start, end;
659  start = std::chrono::system_clock::now();
660  // Matrix-Vector
661  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1025, 1025, 1);
662  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1125, 1025, 1);
663  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1224, 1024, 1);
664  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1024, 1024, 1);
665  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1023, 1023, 1);
666  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1023, 1023, 1);
667  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 4097, 4197, 1);
668  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 4097, 4097, 1);
669  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 4096, 4096, 1);
670  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 4096, 8196, 1);
671  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 4095, 4095, 1);
672  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 4095, 4095, 1);
673 // If the GEMV disabled it will creates one kernel to calculate the contraction.
674 // Therefore the acumuation of float number will overflow the precision
675 // threshold for float and cause the test to fail. While it the GMV multiple
676 // kernel will be created and each one run the overflow of accumutation breaks
677 // among the kernels.
678 #ifndef EIGEN_SYCL_DISABLE_GEMV
679  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 32, 802032, 1);
680 #endif
681 
682  end = std::chrono::system_clock::now();
683  std::chrono::duration<double> elapsed_seconds = end - start;
684  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
685  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
686  << "s\n";
687 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ test_no_out_of_bounds()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
static void test_no_out_of_bounds ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
static
129  {
131  static const DataType error_threshold = DataType(1e-4);
132  Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
133  Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
134  Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size);
135 
136  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
137  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
138  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
139  Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}};
140 
141  t_left.setRandom();
142  t_right.setRandom();
143 
144  // Allocate buffers twice as big to check for invalid read and write
145  auto padded_left_size = 2 * t_left.size();
146  auto padded_right_size = 2 * t_right.size();
147  auto padded_result_size = 2 * t_result.size();
148 
149  std::size_t t_left_bytes = padded_left_size * sizeof(DataType);
150  std::size_t t_right_bytes = padded_right_size * sizeof(DataType);
151  std::size_t t_result_bytes = padded_result_size * sizeof(DataType);
152 
153  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
154  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
155  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
156 
157  // TensorMaps are still of the same size than the Tensors
159  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_right(d_t_right, right_dims);
160  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_result(d_t_result, result_dims);
161 
162  // Write nan after the actual buffer to propagate nans everywhere in case of
163  // invalid reads
164  DataType nan = std::numeric_limits<DataType>::quiet_NaN();
165  auto host_left_data = new DataType[padded_left_size];
166  std::copy_n(t_left.data(), t_left.size(), host_left_data);
167  std::fill_n(host_left_data + t_left.size(), t_left.size(), nan);
168  auto host_right_data = new DataType[padded_right_size];
169  std::copy_n(t_right.data(), t_right.size(), host_right_data);
170  std::fill_n(host_right_data + t_right.size(), t_right.size(), nan);
171  auto host_result_data = new DataType[padded_result_size];
172  std::fill_n(host_result_data, padded_result_size, nan);
173 
174  sycl_device.memcpyHostToDevice(d_t_left, host_left_data, t_left_bytes);
175  sycl_device.memcpyHostToDevice(d_t_right, host_right_data, t_right_bytes);
176  sycl_device.memcpyHostToDevice(d_t_result, host_result_data, t_result_bytes);
177 
178  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
179  sycl_device.memcpyDeviceToHost(host_result_data, d_t_result, t_result_bytes);
180 
181  t_result = t_left.contract(t_right, dims);
182 
183  for (IndexType i = 0; i < t_result.size(); i++) {
184  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(i) - host_result_data[i]))) < error_threshold) {
185  continue;
186  }
187  if (Eigen::internal::isApprox(t_result(i), host_result_data[i], error_threshold)) {
188  continue;
189  }
190  if (std::isnan(host_result_data[i])) {
191  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size
192  << ", invalid read detected at IndexType " << i << ": " << t_result(i) << " vs " << host_result_data[i]
193  << std::endl;
194  } else {
195  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size << ", mismatch detected at IndexType "
196  << i << ": " << t_result(i) << " vs " << host_result_data[i] << std::endl;
197  }
198  VERIFY_IS_APPROX(host_result_data[i], t_result(i));
199  }
200  // Make sure that the rest of the result is still nans
201  for (IndexType i = t_result.size(); i < padded_result_size; i++) {
202  if (std::isnan(host_result_data[i])) {
203  continue;
204  }
205  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size
206  << ", invalid write detected at IndexType " << i << ": " << host_result_data[i] << std::endl;
207  VERIFY_IS_APPROX(host_result_data[i], t_result(i));
208  }
209  sycl_device.deallocate(d_t_left);
210  sycl_device.deallocate(d_t_right);
211  sycl_device.deallocate(d_t_result);
212 
213  delete[] host_left_data;
214  delete[] host_right_data;
215  delete[] host_result_data;
216 }
#define isnan(X)
Definition: main.h:109

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), isnan, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ test_scalar()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_scalar ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
219  {
220  // std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size <<
221  // ")" << std::endl;
222  // with these dimensions, the output has 300 * 140 elements, which is
223  // more than 30 * 1024, which is the number of threads in blocks on
224  // a 15 SM GK110 GPU
226  static const DataType error_threshold = DataType(1e-4);
227  Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
228  Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
231  Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
232  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
233  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
234  t_left.setRandom();
235  t_right.setRandom();
236 
237  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
238  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
239  std::size_t t_result_bytes = sizeof(DataType);
240 
241  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
242  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
243  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
244 
246  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_right(d_t_right, right_dims);
248 
249  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
250  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
251 
252  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
253  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
254 
255  t_result = t_left.contract(t_right, dims);
256 
257  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result() - t_result_gpu()))) > error_threshold &&
258  !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) {
259  std::cout << "K: " << k_size << ", N: " << n_size << ", M: " << m_size << " : mismatch detected: " << t_result()
260  << " vs " << t_result_gpu() << std::endl;
261  VERIFY_IS_APPROX(t_result_gpu(), t_result());
262  }
263 
264  sycl_device.deallocate(d_t_left);
265  sycl_device.deallocate(d_t_right);
266  sycl_device.deallocate(d_t_result);
267 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:102

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ test_sycl_contraction()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
static void test_sycl_contraction ( const Device &  sycl_device,
IndexType  m_size,
IndexType  k_size,
IndexType  n_size 
)
static
35  {
37  static const DataType error_threshold = DataType(1e-4);
38  // with these dimensions, the output has 300 * 140 elements, which is
39  // more than 30 * 1024, which is the number of threads in blocks on
40  // a 15 SM GK110 GPU
41  Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
42  Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
43  Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size);
44  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(m_size, n_size);
45  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
46  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
47  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
48  Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}};
49 
50  t_left.setRandom();
51  t_right.setRandom();
52 
53  std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
54  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
55  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
56 
57  DataType *d_t_left = static_cast<DataType *>(sycl_device.allocate(t_left_bytes));
58  DataType *d_t_right = static_cast<DataType *>(sycl_device.allocate(t_right_bytes));
59  DataType *d_t_result = static_cast<DataType *>(sycl_device.allocate(t_result_bytes));
60 
63  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_t_result(d_t_result, result_dims);
64 
65  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(), t_left_bytes);
66  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(), t_right_bytes);
67 
68  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
69  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
70 
71  t_result = t_left.contract(t_right, dims);
72 
73  for (IndexType i = 0; i < t_result.size(); i++) {
74  if (static_cast<DataType>(std::fabs(static_cast<DataType>(t_result(i) - t_result_gpu(i)))) < error_threshold) {
75  continue;
76  }
77  if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
78  continue;
79  }
80 
81  std::cout << "M : " << m_size << ", N : " << n_size << ", K : " << k_size << ", mismatch detected at IndexType "
82  << i << ": " << t_result(i) << " vs " << t_result_gpu(i) << std::endl;
83  VERIFY_IS_APPROX(t_result_gpu(i), t_result(i));
84  }
85  sycl_device.deallocate(d_t_left);
86  sycl_device.deallocate(d_t_right);
87  sycl_device.deallocate(d_t_result);
88 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), e(), error_threshold, boost::multiprecision::fabs(), i, Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_APPROX.

◆ test_sycl_contraction_k()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_k ( const Device &  sycl_device)
98  {
99  for (IndexType k = 32; k < 256; k++) {
100  test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, k, 128);
101  }
102 }
char char char int int * k
Definition: level2_impl.h:374

References k.

◆ test_sycl_contraction_m()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_m ( const Device &  sycl_device)
91  {
92  for (IndexType k = 32; k < 256; k++) {
93  test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, k, 128, 128);
94  }
95 }

References k.

◆ test_sycl_contraction_n()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_n ( const Device &  sycl_device)
105  {
106  for (IndexType k = 32; k < 256; k++) {
107  test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, 128, k);
108  }
109 }

References k.

◆ test_sycl_contraction_sizes()

template<int DataLayout, typename DataType , typename IndexType , typename Device >
void test_sycl_contraction_sizes ( const Device &  sycl_device)
112  {
113  IndexType m_sizes[] = {31, 39, 63, 64, 65, 127, 129, 255, 257, 511, 512, 513, 1023, 1024, 1025};
114 
115  IndexType n_sizes[] = {31, 39, 63, 64, 65, 127, 129, 255, 257, 511, 512, 513, 1023, 1024, 1025};
116 
117  IndexType k_sizes[] = {31, 39, 63, 64, 65, 95, 96, 127, 129, 255, 257, 511, 512, 513, 1023, 1024, 1025};
118 
119  for (IndexType i = 0; i < 15; i++) {
120  for (IndexType j = 0; j < 15; j++) {
121  for (IndexType k = 0; k < 17; k++) {
122  test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]);
123  }
124  }
125  }
126 }

References i, j, and k.

◆ vectorTensor()

template<typename Dev >
void vectorTensor ( const Dev &  sycl_device)
inline
626  {
627  typedef float DataType;
628  typedef int64_t IndexType;
629  std::chrono::time_point<std::chrono::system_clock> start, end;
630  start = std::chrono::system_clock::now();
631  // Vector-Tensor
632  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 1025, 1025);
633  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 1025, 1025);
634  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 1024, 1024);
635  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 1024, 1024);
636  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 1023, 1023);
637  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 1023, 1023);
638 
639  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 4097, 4097);
640  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 4097, 4097);
641  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 4096, 4096);
642  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 4096, 4096);
643  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 4095, 4095);
644  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1, 4095, 4095);
645  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1, 802816, 32);
646 
647  end = std::chrono::system_clock::now();
648  std::chrono::duration<double> elapsed_seconds = end - start;
649  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
650  std::cout << "finished computation at " << std::ctime(&end_time) << "elapsed time: " << elapsed_seconds.count()
651  << "s\n";
652 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().

◆ vectorVector()

template<typename Dev >
void vectorVector ( const Dev &  sycl_device)
inline
605  {
606  typedef float DataType;
607  typedef int64_t IndexType;
608  std::chrono::time_point<std::chrono::system_clock> start, end;
609  start = std::chrono::system_clock::now();
610  // VECTOR-VECTOR
611  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1025, 1, 1025);
612  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1025, 1, 1025);
613  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1024, 1, 1024);
614  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1024, 1, 1024);
615  test_sycl_contraction<ColMajor, DataType, IndexType>(sycl_device, 1023, 1, 1023);
616  test_sycl_contraction<RowMajor, DataType, IndexType>(sycl_device, 1023, 1, 1023);
617 
618  end = std::chrono::system_clock::now();
619  std::chrono::duration<double> elapsed_seconds = end - start;
620  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
621  std::cout << "contracted tensor tests finished computation at " << std::ctime(&end_time)
622  << "elapsed time: " << elapsed_seconds.count() << "s\n";
623 }

References Eigen::placeholders::end, and oomph::CumulativeTimings::start().

Referenced by EIGEN_DECLARE_TEST().