cxx11_tensor_executor.cpp File Reference
#include "main.h"
#include <Eigen/CXX11/Tensor>

Classes

struct  test_execute_chipping_rvalue_runner< T, NumDims, Device, Vectorizable, Tiling, Layout >
 
struct  test_execute_chipping_lvalue_runner< T, NumDims, Device, Vectorizable, Tiling, Layout >
 
struct  DummyGenerator< T, NumDims >
 

Macros

#define EIGEN_USE_THREADS
 
#define EIGEN_DONT_VECTORIZE   0
 
#define VECTORIZABLE(T, VAL)   !EIGEN_DONT_VECTORIZE&& Eigen::internal::packet_traits<T>::Vectorizable&& VAL
 
#define CALL_SUBTEST_PART(PART)   CALL_SUBTEST_##PART
 
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
 
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
 

Functions

template<typename Dst , typename Expr >
void DefaultAssign (Dst &dst, Expr expr)
 
template<bool Vectorizable, TiledEvaluation Tiling, typename Device , typename Dst , typename Expr >
void DeviceAssign (Device &d, Dst &dst, Expr expr)
 
template<int NumDims>
static array< Index, NumDims > RandomDims (int min_dim=1, int max_dim=20)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_unary_expr (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_binary_expr (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_broadcasting (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_chipping_rvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_chipping_lvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_shuffle_rvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_shuffle_lvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_reshape (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_slice_rvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_slice_lvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_broadcasting_of_forced_eval (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_generator_op (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_reverse_rvalue (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_async_execute_unary_expr (Device d)
 
template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_async_execute_binary_expr (Device d)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_executor)
 

Macro Definition Documentation

◆ CALL_ASYNC_SUBTEST_COMBINATIONS

#define CALL_ASYNC_SUBTEST_COMBINATIONS (   PART,
  NAME,
  T,
  NUM_DIMS 
)
Value:
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
#define CALL_SUBTEST_PART(PART)
Definition: cxx11_tensor_executor.cpp:583

◆ CALL_SUBTEST_COMBINATIONS

#define CALL_SUBTEST_COMBINATIONS (   PART,
  NAME,
  T,
  NUM_DIMS 
)
Value:
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART) \
((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))

◆ CALL_SUBTEST_PART

#define CALL_SUBTEST_PART (   PART)    CALL_SUBTEST_##PART

◆ EIGEN_DONT_VECTORIZE

#define EIGEN_DONT_VECTORIZE   0

◆ EIGEN_USE_THREADS

#define EIGEN_USE_THREADS

◆ VECTORIZABLE

#define VECTORIZABLE (   T,
  VAL 
)    !EIGEN_DONT_VECTORIZE&& Eigen::internal::packet_traits<T>::Vectorizable&& VAL

Function Documentation

◆ DefaultAssign()

template<typename Dst , typename Expr >
void DefaultAssign ( Dst &  dst,
Expr  expr 
)
27  {
29  using Executor = Eigen::internal::TensorExecutor<const Assign, DefaultDevice,
30  /*Vectorizable=*/false,
31  /*Tiling=*/TiledEvaluation::Off>;
32 
33  Executor::run(Assign(dst, expr), DefaultDevice());
34 }
Definition: TensorAssign.h:57
Definition: TensorExecutor.h:78
@ Off
Definition: TensorForwardDeclarations.h:187
Definition: TensorDeviceDefault.h:19
void run(const string &dir_name, LinearSolver *linear_solver_pt, const unsigned nel_1d, bool mess_up_order)
Definition: two_d_poisson_compare_solvers.cc:317

References Eigen::internal::Off, and run().

Referenced by test_execute_shuffle_lvalue(), and test_execute_shuffle_rvalue().

◆ DeviceAssign()

template<bool Vectorizable, TiledEvaluation Tiling, typename Device , typename Dst , typename Expr >
void DeviceAssign ( Device &  d,
Dst &  dst,
Expr  expr 
)
38  {
41 
42  Executor::run(Assign(dst, expr), d);
43 }

References run().

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_executor  )
626  {
627  Eigen::DefaultDevice default_device;
628  // Default device is unused in ASYNC tests.
629  EIGEN_UNUSED_VARIABLE(default_device);
630 
631  const auto num_threads = internal::random<int>(20, 24);
632  Eigen::ThreadPool tp(num_threads);
633  Eigen::ThreadPoolDevice tp_device(&tp, num_threads);
634 
638 
642 
646 
650 
654 
658 
662 
667 
672 
677 
682 
687 
693 
697 
701 
702  // Force CMake to split this test.
703  // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16
704 }
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
Definition: NonBlockingThreadPool.h:19
void test_execute_binary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:80
void test_execute_shuffle_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:233
void test_execute_shuffle_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:269
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
Definition: cxx11_tensor_executor.cpp:612
void test_execute_slice_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:340
void test_execute_generator_op(Device d)
Definition: cxx11_tensor_executor.cpp:456
void test_execute_chipping_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:175
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)
Definition: cxx11_tensor_executor.cpp:585
void test_execute_unary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:55
void test_execute_chipping_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:228
void test_execute_reshape(Device d)
Definition: cxx11_tensor_executor.cpp:303
void test_execute_broadcasting_of_forced_eval(Device d)
Definition: cxx11_tensor_executor.cpp:416
void test_execute_reverse_rvalue(Device d)
Definition: cxx11_tensor_executor.cpp:484
void test_execute_broadcasting(Device d)
Definition: cxx11_tensor_executor.cpp:108
void test_async_execute_binary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:546
void test_async_execute_unary_expr(Device d)
Definition: cxx11_tensor_executor.cpp:516
void test_execute_slice_lvalue(Device d)
Definition: cxx11_tensor_executor.cpp:376

References CALL_ASYNC_SUBTEST_COMBINATIONS, CALL_SUBTEST_COMBINATIONS, EIGEN_UNUSED_VARIABLE, test_async_execute_binary_expr(), test_async_execute_unary_expr(), test_execute_binary_expr(), test_execute_broadcasting(), test_execute_broadcasting_of_forced_eval(), test_execute_chipping_lvalue(), test_execute_chipping_rvalue(), test_execute_generator_op(), test_execute_reshape(), test_execute_reverse_rvalue(), test_execute_shuffle_lvalue(), test_execute_shuffle_rvalue(), test_execute_slice_lvalue(), test_execute_slice_rvalue(), and test_execute_unary_expr().

◆ RandomDims()

template<int NumDims>
static array<Index, NumDims> RandomDims ( int  min_dim = 1,
int  max_dim = 20 
)
static
46  {
48  for (int i = 0; i < NumDims; ++i) {
49  dims[i] = internal::random<int>(min_dim, max_dim);
50  }
51  return dims;
52 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
std::array< T, N > array
Definition: EmulateArray.h:231

References i.

◆ test_async_execute_binary_expr()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_async_execute_binary_expr ( Device  d)
546  {
547  static constexpr int Options = 0 | Layout;
548 
549  // Pick a large enough tensor size to bypass small tensor block evaluation
550  // optimization.
551  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
552 
556 
557  lhs.setRandom();
558  rhs.setRandom();
559 
560  const auto expr = lhs + rhs;
561 
562  Eigen::Barrier done(1);
563  auto on_done = [&done]() { done.Notify(); };
564 
565  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
566  using DoneCallback = decltype(on_done);
567  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback, Vectorizable, Tiling>;
568 
569  Executor::runAsync(Assign(dst, expr), d, on_done);
570  done.Wait();
571 
572  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
573  T sum = lhs.coeff(i) + rhs.coeff(i);
574  VERIFY_IS_EQUAL(sum, dst.coeff(i));
575  }
576 }
Definition: Barrier.h:21
The tensor class.
Definition: Tensor.h:68
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::Barrier::Notify(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), VERIFY_IS_EQUAL, and Eigen::Barrier::Wait().

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_execute_unary_expr()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_async_execute_unary_expr ( Device  d)
516  {
517  static constexpr int Options = 0 | Layout;
518 
519  // Pick a large enough tensor size to bypass small tensor block evaluation
520  // optimization.
521  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
522 
525 
526  src.setRandom();
527  const auto expr = src.square();
528 
529  Eigen::Barrier done(1);
530  auto on_done = [&done]() { done.Notify(); };
531 
532  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
533  using DoneCallback = decltype(on_done);
534  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback, Vectorizable, Tiling>;
535 
536  Executor::runAsync(Assign(dst, expr), d, on_done);
537  done.Wait();
538 
539  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
540  T square = src.coeff(i) * src.coeff(i);
541  VERIFY_IS_EQUAL(square, dst.coeff(i));
542  }
543 }
squared absolute sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2 square(power 2)

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::Barrier::Notify(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::square(), VERIFY_IS_EQUAL, and Eigen::Barrier::Wait().

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_binary_expr()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_binary_expr ( Device  d)
80  {
81  static constexpr int Options = 0 | Layout;
82 
83  // Pick a large enough tensor size to bypass small tensor block evaluation
84  // optimization.
85  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
86 
90 
91  lhs.setRandom();
92  rhs.setRandom();
93 
94  const auto expr = lhs + rhs;
95 
96  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
97  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
98 
99  Executor::run(Assign(dst, expr), d);
100 
101  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
102  T sum = lhs.coeff(i) + rhs.coeff(i);
103  VERIFY_IS_EQUAL(sum, dst.coeff(i));
104  }
105 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_broadcasting()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_broadcasting ( Device  d)
108  {
109  static constexpr int Options = 0 | Layout;
110 
111  auto dims = RandomDims<NumDims>(1, 10);
113  src.setRandom();
114 
115  const auto broadcasts = RandomDims<NumDims>(1, 7);
116  const auto expr = src.broadcast(broadcasts);
117 
118  // We assume that broadcasting on a default device is tested and correct, so
119  // we can rely on it to verify correctness of tensor executor and tiling.
121  golden = expr;
122 
123  // Now do the broadcasting using configured tensor executor.
125 
126  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
127  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
128 
129  Executor::run(Assign(dst, expr), d);
130 
131  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
132  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
133  }
134 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: Tensor.h:100
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar & coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
Definition: Tensor.h:112

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_broadcasting_of_forced_eval()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_broadcasting_of_forced_eval ( Device  d)
416  {
417  static constexpr int Options = 0 | Layout;
418 
419  auto dims = RandomDims<NumDims>(1, 10);
421  src.setRandom();
422 
423  const auto broadcasts = RandomDims<NumDims>(1, 7);
424  const auto expr = src.square().eval().broadcast(broadcasts);
425 
426  // We assume that broadcasting on a default device is tested and correct, so
427  // we can rely on it to verify correctness of tensor executor and tiling.
429  golden = expr;
430 
431  // Now do the broadcasting using configured tensor executor.
433 
434  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
435  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
436 
437  Executor::run(Assign(dst, expr), d);
438 
439  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
440  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
441  }
442 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_chipping_lvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_chipping_lvalue ( Device  d)

◆ test_execute_chipping_rvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_chipping_rvalue ( Device  d)

◆ test_execute_generator_op()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_generator_op ( Device  d)
456  {
457  static constexpr int Options = 0 | Layout;
458 
459  auto dims = RandomDims<NumDims>(20, 30);
461  src.setRandom();
462 
463  const auto expr = src.generate(DummyGenerator<T, NumDims>());
464 
465  // We assume that generator on a default device is tested and correct, so
466  // we can rely on it to verify correctness of tensor executor and tiling.
468  golden = expr;
469 
470  // Now do the broadcasting using configured tensor executor.
472 
473  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
474  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
475 
476  Executor::run(Assign(dst, expr), d);
477 
478  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
479  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
480  }
481 }
Definition: cxx11_tensor_executor.cpp:445

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_reshape()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_reshape ( Device  d)
303  {
304  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
305 
306  static constexpr int ReshapedDims = NumDims - 1;
307  static constexpr int Options = 0 | Layout;
308 
309  auto dims = RandomDims<NumDims>(5, 10);
311  src.setRandom();
312 
313  // Multiple 0th dimension and then shuffle.
314  std::vector<Index> shuffle;
315  for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i);
316  std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
317 
318  DSizes<Index, ReshapedDims> reshaped_dims;
319  reshaped_dims[shuffle[0]] = dims[0] * dims[1];
320  for (int i = 1; i < ReshapedDims; ++i) reshaped_dims[shuffle[i]] = dims[i + 1];
321 
322  Tensor<T, ReshapedDims, Options, Index> golden = src.reshape(reshaped_dims);
323 
324  // Now reshape using configured tensor executor.
326 
327  auto expr = src.reshape(reshaped_dims);
328 
329  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
330  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
331 
332  Executor::run(Assign(dst, expr), d);
333 
334  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
335  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
336  }
337 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp< const NewDimensions, const Derived > reshape(const NewDimensions &newDimensions) const
Definition: TensorBase.h:1106
EIGEN_STRONG_INLINE Packet2d shuffle(const Packet2d &m, const Packet2d &n, int mask)
Definition: LSX/PacketMath.h:150
Definition: TensorDimensions.h:161

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::reshape(), run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::internal::shuffle(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_reverse_rvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_reverse_rvalue ( Device  d)
484  {
485  static constexpr int Options = 0 | Layout;
486 
487  auto dims = RandomDims<NumDims>(1, numext::pow(1000000.0, 1.0 / NumDims));
489  src.setRandom();
490 
491  // Reverse half of the dimensions.
493  for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
494 
495  const auto expr = src.reverse(reverse);
496 
497  // We assume that reversing on a default device is tested and correct, so
498  // we can rely on it to verify correctness of tensor executor and tiling.
500  golden = expr;
501 
502  // Now do the reversing using configured tensor executor.
504 
505  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
506  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
507 
508  Executor::run(Assign(dst, expr), d);
509 
510  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
511  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
512  }
513 }
void reverse(const MatrixType &m)
Definition: array_reverse.cpp:17
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:625

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::bfloat16_impl::pow(), reverse(), Eigen::TensorBase< Derived, AccessLevel >::reverse(), run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_shuffle_lvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_shuffle_lvalue ( Device  d)
269  {
270  static constexpr int Options = 0 | Layout;
271 
272  auto dims = RandomDims<NumDims>(5, 10);
274  src.setRandom();
275 
277  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
278 
279  // Test all possible shuffle permutations.
280  do {
281  DSizes<Index, NumDims> shuffled_dims;
282  for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i];
283 
284  // We assume that shuffling on a default device is tested and correct, so
285  // we can rely on it to verify correctness of tensor executor and tiling.
286  Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
287  auto golden_shuffle = golden.shuffle(shuffle);
288  DefaultAssign(golden_shuffle, src);
289 
290  // Now do the shuffling using configured tensor executor.
291  Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
292  auto dst_shuffle = dst.shuffle(shuffle);
293  DeviceAssign<Vectorizable, Tiling>(d, dst_shuffle, src);
294 
295  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
296  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
297  }
298 
299  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
300 }
void DefaultAssign(Dst &dst, Expr expr)
Definition: cxx11_tensor_executor.cpp:27

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), DefaultAssign(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::internal::shuffle(), Eigen::TensorBase< Derived, AccessLevel >::shuffle(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_shuffle_rvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_shuffle_rvalue ( Device  d)
233  {
234  static constexpr int Options = 0 | Layout;
235 
236  auto dims = RandomDims<NumDims>(1, 10);
238  src.setRandom();
239 
241  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
242 
243  // Test all possible shuffle permutations.
244  do {
245  DSizes<Index, NumDims> shuffled_dims;
246  for (int i = 0; i < NumDims; ++i) {
247  shuffled_dims[i] = dims[shuffle[i]];
248  }
249 
250  const auto expr = src.shuffle(shuffle);
251 
252  // We assume that shuffling on a default device is tested and correct, so
253  // we can rely on it to verify correctness of tensor executor and tiling.
254  Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
255  DefaultAssign(golden, expr);
256 
257  // Now do the shuffling using configured tensor executor.
258  Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
259  DeviceAssign<Vectorizable, Tiling>(d, dst, expr);
260 
261  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
262  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
263  }
264 
265  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
266 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), DefaultAssign(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::internal::shuffle(), Eigen::TensorBase< Derived, AccessLevel >::shuffle(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_slice_lvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_slice_lvalue ( Device  d)
376  {
377  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
378  static constexpr int Options = 0 | Layout;
379 
380  auto dims = RandomDims<NumDims>(5, 10);
382  src.setRandom();
383 
384  // Pick a random slice of src tensor.
385  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
386  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
387 
388  // Make sure that slice start + size do not overflow tensor dims.
389  for (int i = 0; i < NumDims; ++i) {
390  slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
391  slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
392  }
393 
394  Tensor<T, NumDims, Options, Index> slice(slice_size);
395  slice.setRandom();
396 
397  // Assign a slice using default executor.
399  golden.slice(slice_start, slice_size) = slice;
400 
401  // And using configured execution strategy.
403  auto expr = dst.slice(slice_start, slice_size);
404 
405  using Assign = TensorAssignOp<decltype(expr), const decltype(slice)>;
406  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
407 
408  Executor::run(Assign(expr, slice), d);
409 
410  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
411  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
412  }
413 }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSlicingOp< const StartIndices, const Sizes, const Derived > slice(const StartIndices &startIndices, const Sizes &sizes) const
Definition: TensorBase.h:1117
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::numext::mini(), run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::slice(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_slice_rvalue()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_slice_rvalue ( Device  d)
340  {
341  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
342  static constexpr int Options = 0 | Layout;
343 
344  auto dims = RandomDims<NumDims>(5, 10);
346  src.setRandom();
347 
348  // Pick a random slice of src tensor.
349  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>());
350  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>());
351 
352  // Make sure that slice start + size do not overflow tensor dims.
353  for (int i = 0; i < NumDims; ++i) {
354  slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
355  slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
356  }
357 
358  Tensor<T, NumDims, Options, Index> golden = src.slice(slice_start, slice_size);
359 
360  // Now reshape using configured tensor executor.
362 
363  auto expr = src.slice(slice_start, slice_size);
364 
365  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
366  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
367 
368  Executor::run(Assign(dst, expr), d);
369 
370  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
371  VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
372  }
373 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::numext::mini(), run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::TensorBase< Derived, AccessLevel >::slice(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_execute_unary_expr()

template<typename T , int NumDims, typename Device , bool Vectorizable, TiledEvaluation Tiling, int Layout>
void test_execute_unary_expr ( Device  d)
55  {
56  static constexpr int Options = 0 | Layout;
57 
58  // Pick a large enough tensor size to bypass small tensor block evaluation
59  // optimization.
60  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
61 
64 
65  src.setRandom();
66  const auto expr = src.square();
67 
68  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
69  using Executor = internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
70 
71  Executor::run(Assign(dst, expr), d);
72 
73  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
74  T square = src.coeff(i) * src.coeff(i);
75  VERIFY_IS_EQUAL(square, dst.coeff(i));
76  }
77 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::coeff(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, run(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::square(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().