cxx11_tensor_thread_pool.cpp File Reference
#include "main.h"
#include <iostream>
#include <Eigen/CXX11/Tensor>

Classes

class  TestAllocator
 
struct  SqrtOutputKernel
 

Macros

#define EIGEN_USE_THREADS
 

Functions

void test_multithread_elementwise ()
 
void test_async_multithread_elementwise ()
 
void test_multithread_chip ()
 
void test_async_multithread_chip ()
 
void test_multithread_volume_patch ()
 
void test_async_multithread_volume_patch ()
 
void test_multithread_compound_assignment ()
 
template<int DataLayout>
void test_multithread_contraction ()
 
template<int DataLayout>
void test_contraction_corner_cases ()
 
template<int DataLayout>
void test_multithread_contraction_agrees_with_singlethread ()
 
template<int DataLayout>
static void test_multithread_contraction_with_output_kernel ()
 
template<int DataLayout>
void test_async_multithread_contraction_agrees_with_singlethread ()
 
template<int DataLayout>
static void test_sharded_by_inner_dim_contraction ()
 
template<int DataLayout>
static void test_sharded_by_inner_dim_contraction_with_output_kernel ()
 
template<int DataLayout>
static void test_async_sharded_by_inner_dim_contraction ()
 
template<int DataLayout>
static void test_async_sharded_by_inner_dim_contraction_with_output_kernel ()
 
template<int DataLayout>
void test_full_contraction ()
 
template<int DataLayout>
void test_multithreaded_reductions ()
 
void test_memcpy ()
 
void test_multithread_random ()
 
template<int DataLayout>
void test_multithread_shuffle (Allocator *allocator)
 
void test_threadpool_allocate (TestAllocator *allocator)
 
 EIGEN_DECLARE_TEST (cxx11_tensor_thread_pool)
 

Macro Definition Documentation

◆ EIGEN_USE_THREADS

#define EIGEN_USE_THREADS

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_thread_pool  )
723  {
727 
728  CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
729  CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
730 
733 
736 
737  CALL_SUBTEST_5(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
738  CALL_SUBTEST_5(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
739  CALL_SUBTEST_5(test_multithread_contraction_with_output_kernel<ColMajor>());
740  CALL_SUBTEST_5(test_multithread_contraction_with_output_kernel<RowMajor>());
741 
742  CALL_SUBTEST_6(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>());
743  CALL_SUBTEST_6(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>());
744 
745  // Test EvalShardedByInnerDimContext parallelization strategy.
746  CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction<ColMajor>());
747  CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction<RowMajor>());
748  CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
749  CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
750 
751  CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction<ColMajor>());
752  CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction<RowMajor>());
753  CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
754  CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
755 
756  // Exercise various cases that have been problematic in the past.
757  CALL_SUBTEST_9(test_contraction_corner_cases<ColMajor>());
758  CALL_SUBTEST_9(test_contraction_corner_cases<RowMajor>());
759 
760  CALL_SUBTEST_10(test_full_contraction<ColMajor>());
761  CALL_SUBTEST_10(test_full_contraction<RowMajor>());
762 
763  CALL_SUBTEST_11(test_multithreaded_reductions<ColMajor>());
764  CALL_SUBTEST_11(test_multithreaded_reductions<RowMajor>());
765 
768 
769  TestAllocator test_allocator;
770  CALL_SUBTEST_13(test_multithread_shuffle<ColMajor>(NULL));
771  CALL_SUBTEST_13(test_multithread_shuffle<RowMajor>(&test_allocator));
772  CALL_SUBTEST_13(test_threadpool_allocate(&test_allocator));
773 
774  // Force CMake to split this test.
775  // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13
776 }
Definition: cxx11_tensor_thread_pool.cpp:18
void test_multithread_chip()
Definition: cxx11_tensor_thread_pool.cpp:83
void test_async_multithread_elementwise()
Definition: cxx11_tensor_thread_pool.cpp:59
void test_multithread_random()
Definition: cxx11_tensor_thread_pool.cpp:677
void test_multithread_elementwise()
Definition: cxx11_tensor_thread_pool.cpp:38
void test_threadpool_allocate(TestAllocator *allocator)
Definition: cxx11_tensor_thread_pool.cpp:708
void test_async_multithread_volume_patch()
Definition: cxx11_tensor_thread_pool.cpp:145
void test_memcpy()
Definition: cxx11_tensor_thread_pool.cpp:660
void test_multithread_volume_patch()
Definition: cxx11_tensor_thread_pool.cpp:129
void test_async_multithread_chip()
Definition: cxx11_tensor_thread_pool.cpp:105
void test_multithread_compound_assignment()
Definition: cxx11_tensor_thread_pool.cpp:163
#define CALL_SUBTEST_6(FUNC)
Definition: split_test_helper.h:34
#define CALL_SUBTEST_3(FUNC)
Definition: split_test_helper.h:16
#define CALL_SUBTEST_1(FUNC)
Definition: split_test_helper.h:4
#define CALL_SUBTEST_13(FUNC)
Definition: split_test_helper.h:76
#define CALL_SUBTEST_8(FUNC)
Definition: split_test_helper.h:46
#define CALL_SUBTEST_5(FUNC)
Definition: split_test_helper.h:28
#define CALL_SUBTEST_11(FUNC)
Definition: split_test_helper.h:64
#define CALL_SUBTEST_12(FUNC)
Definition: split_test_helper.h:70
#define CALL_SUBTEST_2(FUNC)
Definition: split_test_helper.h:10
#define CALL_SUBTEST_7(FUNC)
Definition: split_test_helper.h:40
#define CALL_SUBTEST_4(FUNC)
Definition: split_test_helper.h:22
#define CALL_SUBTEST_9(FUNC)
Definition: split_test_helper.h:52
#define CALL_SUBTEST_10(FUNC)
Definition: split_test_helper.h:58

References CALL_SUBTEST_1, CALL_SUBTEST_10, CALL_SUBTEST_11, CALL_SUBTEST_12, CALL_SUBTEST_13, CALL_SUBTEST_2, CALL_SUBTEST_3, CALL_SUBTEST_4, CALL_SUBTEST_5, CALL_SUBTEST_6, CALL_SUBTEST_7, CALL_SUBTEST_8, CALL_SUBTEST_9, test_async_multithread_chip(), test_async_multithread_elementwise(), test_async_multithread_volume_patch(), test_memcpy(), test_multithread_chip(), test_multithread_compound_assignment(), test_multithread_elementwise(), test_multithread_random(), test_multithread_volume_patch(), and test_threadpool_allocate().

◆ test_async_multithread_chip()

void test_async_multithread_chip ( )
105  {
106  Tensor<float, 5> in(2, 3, 5, 7, 11);
107  Tensor<float, 4> out(3, 5, 7, 11);
108 
109  in.setRandom();
110 
111  Eigen::ThreadPool tp(internal::random<int>(3, 11));
112  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
113 
114  Eigen::Barrier b(1);
115  out.device(thread_pool_device, [&b]() { b.Notify(); }) = in.chip(1, 0);
116  b.Wait();
117 
118  for (int i = 0; i < 3; ++i) {
119  for (int j = 0; j < 5; ++j) {
120  for (int k = 0; k < 7; ++k) {
121  for (int l = 0; l < 11; ++l) {
122  VERIFY_IS_EQUAL(out(i, j, k, l), in(1, i, j, k, l));
123  }
124  }
125  }
126  }
127 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Scalar * b
Definition: benchVecAdd.cpp:17
Definition: Barrier.h:21
The tensor class.
Definition: Tensor.h:68
Definition: NonBlockingThreadPool.h:19
char char char int int * k
Definition: level2_impl.h:374
#define VERIFY_IS_EQUAL(a, b)
Definition: main.h:367
std::ofstream out("Result.txt")
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References b, Eigen::TensorBase< Derived, AccessLevel >::chip(), i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_multithread_contraction_agrees_with_singlethread()

template<int DataLayout>
void test_async_multithread_contraction_agrees_with_singlethread ( )
403  {
404  int contract_size = internal::random<int>(100, 500);
405 
406  Tensor<float, 3, DataLayout> left(internal::random<int>(10, 40), contract_size, internal::random<int>(10, 40));
407 
408  Tensor<float, 4, DataLayout> right(internal::random<int>(1, 20), internal::random<int>(1, 20), contract_size,
409  internal::random<int>(1, 20));
410 
411  left.setRandom();
412  right.setRandom();
413 
414  // add constants to shift values away from 0 for more precision
415  left += left.constant(1.5f);
416  right += right.constant(1.5f);
417 
419  Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
420 
421  Eigen::ThreadPool tp(internal::random<int>(2, 11));
422  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(8, 32));
423 
425  st_result = left.contract(right, dims);
426 
427  Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
428 
429  Eigen::Barrier barrier(1);
430  tp_result.device(thread_pool_device, [&barrier]() { barrier.Notify(); }) = left.contract(right, dims);
431  barrier.Wait();
432 
433  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
434  for (ptrdiff_t i = 0; i < st_result.size(); i++) {
435  // if both of the values are very small, then do nothing (because the test
436  // will fail due to numerical precision issues when values are small)
437  if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
438  VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
439  }
440  }
441 }
AnnoyingScalar abs(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:135
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: Tensor.h:100
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const
Definition: Tensor.h:101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar * data()
Definition: Tensor.h:102
Tensor< float, 1 >::DimensionPair DimPair
Definition: cxx11_tensor_contraction.cpp:17
#define VERIFY_IS_APPROX(a, b)
Definition: integer_types.cpp:13
#define VERIFY(a)
Definition: main.h:362
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
Definition: TensorDimensions.h:322
std::array< T, N > array
Definition: EmulateArray.h:231

References abs(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), i, Eigen::Barrier::Notify(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), VERIFY, VERIFY_IS_APPROX, and Eigen::Barrier::Wait().

◆ test_async_multithread_elementwise()

void test_async_multithread_elementwise ( )
59  {
60  Tensor<float, 3> in1(200, 30, 70);
61  Tensor<float, 3> in2(200, 30, 70);
62  Tensor<double, 3> out(200, 30, 70);
63 
64  in1.setRandom();
65  in2.setRandom();
66 
67  Eigen::ThreadPool tp(internal::random<int>(3, 11));
68  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
69 
70  Eigen::Barrier b(1);
71  out.device(thread_pool_device, [&b]() { b.Notify(); }) = (in1 + in2 * 3.14f).cast<double>();
72  b.Wait();
73 
74  for (int i = 0; i < 200; ++i) {
75  for (int j = 0; j < 30; ++j) {
76  for (int k = 0; k < 70; ++k) {
77  VERIFY_IS_APPROX(out(i, j, k), static_cast<double>(in1(i, j, k) + in2(i, j, k) * 3.14f));
78  }
79  }
80  }
81 }

References b, i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_multithread_volume_patch()

void test_async_multithread_volume_patch ( )
145  {
146  Tensor<float, 5> in(4, 2, 3, 5, 7);
147  Tensor<float, 6> out(4, 1, 1, 1, 2 * 3 * 5, 7);
148 
149  in.setRandom();
150 
151  Eigen::ThreadPool tp(internal::random<int>(3, 11));
152  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
153 
154  Eigen::Barrier b(1);
155  out.device(thread_pool_device, [&b]() { b.Notify(); }) = in.extract_volume_patches(1, 1, 1);
156  b.Wait();
157 
158  for (int i = 0; i < in.size(); ++i) {
159  VERIFY_IS_EQUAL(in.data()[i], out.data()[i]);
160  }
161 }

References b, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_sharded_by_inner_dim_contraction()

template<int DataLayout>
static void test_async_sharded_by_inner_dim_contraction ( )
static
523  {
525 
526  const int num_threads = internal::random<int>(4, 16);
527  ThreadPool threads(num_threads);
528  Eigen::ThreadPoolDevice device(&threads, num_threads);
529 
530  Tensor<float, 2, DataLayout> t_left(2, 10000);
531  Tensor<float, 2, DataLayout> t_right(10000, 10);
532  Tensor<float, 2, DataLayout> t_result(2, 10);
533 
534  t_left.setRandom();
535  t_right.setRandom();
536  // Put trash in t_result to verify contraction clears output memory.
537  t_result.setRandom();
538 
539  // Add a little offset so that the results won't be close to zero.
540  t_left += t_left.constant(1.0f);
541  t_right += t_right.constant(1.0f);
542 
544  MapXf m_left(t_left.data(), 2, 10000);
545  MapXf m_right(t_right.data(), 10000, 10);
547 
548  // this contraction should be equivalent to a single matrix multiplication
549  Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
550 
551  // compute results by separate methods
552  Eigen::Barrier barrier(1);
553  t_result.device(device, [&barrier]() { barrier.Notify(); }) = t_left.contract(t_right, dims);
554  barrier.Wait();
555 
556  m_result = m_left * m_right;
557 
558  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
559  VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
560  }
561 }
A matrix or vector expression mapping an existing array of data.
Definition: Map.h:96
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

◆ test_async_sharded_by_inner_dim_contraction_with_output_kernel()

template<int DataLayout>
static void test_async_sharded_by_inner_dim_contraction_with_output_kernel ( )
static
565  {
567 
568  const int num_threads = internal::random<int>(4, 16);
569  ThreadPool threads(num_threads);
570  Eigen::ThreadPoolDevice device(&threads, num_threads);
571 
572  Tensor<float, 2, DataLayout> t_left(2, 10000);
573  Tensor<float, 2, DataLayout> t_right(10000, 10);
574  Tensor<float, 2, DataLayout> t_result(2, 10);
575 
576  t_left.setRandom();
577  t_right.setRandom();
578  // Put trash in t_result to verify contraction clears output memory.
579  t_result.setRandom();
580 
581  // Add a little offset so that the results won't be close to zero.
582  t_left += t_left.constant(1.0f);
583  t_right += t_right.constant(1.0f);
584 
586  MapXf m_left(t_left.data(), 2, 10000);
587  MapXf m_right(t_right.data(), 10000, 10);
589 
590  // this contraction should be equivalent to a single matrix multiplication
591  Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
592 
593  // compute results by separate methods
594  Eigen::Barrier barrier(1);
595  t_result.device(device, [&barrier]() { barrier.Notify(); }) = t_left.contract(t_right, dims, SqrtOutputKernel());
596  barrier.Wait();
597  m_result = m_left * m_right;
598 
599  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
600  VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
601  }
602 }
AnnoyingScalar sqrt(const AnnoyingScalar &x)
Definition: AnnoyingScalar.h:134
Definition: cxx11_tensor_contraction.cpp:484

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), and VERIFY_IS_APPROX.

◆ test_contraction_corner_cases()

template<int DataLayout>
void test_contraction_corner_cases ( )
224  {
225  Tensor<float, 2, DataLayout> t_left(32, 500);
226  Tensor<float, 2, DataLayout> t_right(32, 28 * 28);
227  Tensor<float, 2, DataLayout> t_result(500, 28 * 28);
228 
229  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
230  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
231  t_result = t_result.constant(NAN);
232 
233  // this contraction should be equivalent to a single matrix multiplication
235  Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
236 
238  MapXf m_left(t_left.data(), 32, 500);
239  MapXf m_right(t_right.data(), 32, 28 * 28);
240  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28 * 28);
241 
242  Eigen::ThreadPool tp(12);
243  Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
244 
245  // compute results by separate methods
246  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
247  m_result = m_left.transpose() * m_right;
248 
249  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
250  assert(!(numext::isnan)(t_result.data()[i]));
251  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
252  std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i]
253  << std::endl;
254  assert(false);
255  }
256  }
257 
258  t_left.resize(32, 1);
259  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
260  t_result.resize(1, 28 * 28);
261  t_result = t_result.constant(NAN);
262  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
263  new (&m_left) MapXf(t_left.data(), 32, 1);
264  m_result = m_left.transpose() * m_right;
265  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
266  assert(!(numext::isnan)(t_result.data()[i]));
267  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
268  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
269  assert(false);
270  }
271  }
272 
273  t_left.resize(32, 500);
274  t_right.resize(32, 4);
275  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
276  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
277  t_result.resize(500, 4);
278  t_result = t_result.constant(NAN);
279  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
280  new (&m_left) MapXf(t_left.data(), 32, 500);
281  new (&m_right) MapXf(t_right.data(), 32, 4);
282  m_result = m_left.transpose() * m_right;
283  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
284  assert(!(numext::isnan)(t_result.data()[i]));
285  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
286  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
287  assert(false);
288  }
289  }
290 
291  t_left.resize(32, 1);
292  t_right.resize(32, 4);
293  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
294  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
295  t_result.resize(1, 4);
296  t_result = t_result.constant(NAN);
297  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
298  new (&m_left) MapXf(t_left.data(), 32, 1);
299  new (&m_right) MapXf(t_right.data(), 32, 4);
300  m_result = m_left.transpose() * m_right;
301  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
302  assert(!(numext::isnan)(t_result.data()[i]));
303  if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
304  std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
305  assert(false);
306  }
307  }
308 }
#define assert(e,...)
Definition: Logger.h:744
#define isnan(X)
Definition: main.h:109

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), i, isnan, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::resize(), and Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size().

◆ test_full_contraction()

template<int DataLayout>
void test_full_contraction ( )
605  {
606  int contract_size1 = internal::random<int>(1, 500);
607  int contract_size2 = internal::random<int>(1, 500);
608 
609  Tensor<float, 2, DataLayout> left(contract_size1, contract_size2);
610  Tensor<float, 2, DataLayout> right(contract_size1, contract_size2);
611  left.setRandom();
612  right.setRandom();
613 
614  // add constants to shift values away from 0 for more precision
615  left += left.constant(1.5f);
616  right += right.constant(1.5f);
617 
619  Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
620 
621  Eigen::ThreadPool tp(internal::random<int>(2, 11));
622  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
623 
625  st_result = left.contract(right, dims);
626 
628  tp_result.device(thread_pool_device) = left.contract(right, dims);
629 
630  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
631  // if both of the values are very small, then do nothing (because the test will fail
632  // due to numerical precision issues when values are small)
633  if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
634  VERIFY_IS_APPROX(st_result(), tp_result());
635  }
636 }
TensorDevice< Derived, DeviceType > device(const DeviceType &dev)
Definition: TensorBase.h:1209

References abs(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), VERIFY, and VERIFY_IS_APPROX.

◆ test_memcpy()

void test_memcpy ( )
660  {
661  for (int i = 0; i < 5; ++i) {
662  const int num_threads = internal::random<int>(3, 11);
663  Eigen::ThreadPool tp(num_threads);
664  Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
665 
666  const int size = internal::random<int>(13, 7632);
668  t1.setRandom();
669  std::vector<float> result(size);
670  thread_pool_device.memcpy(&result[0], t1.data(), size * sizeof(float));
671  for (int j = 0; j < size; j++) {
672  VERIFY_IS_EQUAL(t1(j), result[j]);
673  }
674  }
675 }
Scalar Scalar int size
Definition: benchVecAdd.cpp:17

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), size, and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_chip()

void test_multithread_chip ( )
83  {
84  Tensor<float, 5> in(2, 3, 5, 7, 11);
85  Tensor<float, 4> out(3, 5, 7, 11);
86 
87  in.setRandom();
88 
89  Eigen::ThreadPool tp(internal::random<int>(3, 11));
90  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
91 
92  out.device(thread_pool_device) = in.chip(1, 0);
93 
94  for (int i = 0; i < 3; ++i) {
95  for (int j = 0; j < 5; ++j) {
96  for (int k = 0; k < 7; ++k) {
97  for (int l = 0; l < 11; ++l) {
98  VERIFY_IS_EQUAL(out(i, j, k, l), in(1, i, j, k, l));
99  }
100  }
101  }
102  }
103 }

References Eigen::TensorBase< Derived, AccessLevel >::chip(), i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_compound_assignment()

void test_multithread_compound_assignment ( )
163  {
164  Tensor<float, 3> in1(2, 3, 7);
165  Tensor<float, 3> in2(2, 3, 7);
166  Tensor<float, 3> out(2, 3, 7);
167 
168  in1.setRandom();
169  in2.setRandom();
170 
171  Eigen::ThreadPool tp(internal::random<int>(3, 11));
172  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
173  out.device(thread_pool_device) = in1;
174  out.device(thread_pool_device) += in2 * 3.14f;
175 
176  for (int i = 0; i < 2; ++i) {
177  for (int j = 0; j < 3; ++j) {
178  for (int k = 0; k < 7; ++k) {
179  VERIFY_IS_APPROX(out(i, j, k), in1(i, j, k) + in2(i, j, k) * 3.14f);
180  }
181  }
182  }
183 }

References i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_contraction()

template<int DataLayout>
void test_multithread_contraction ( )
186  {
187  Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
188  Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
189  Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
190 
191  t_left.setRandom();
192  t_right.setRandom();
193 
194  // this contraction should be equivalent to a single matrix multiplication
196  Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
197 
199  MapXf m_left(t_left.data(), 1500, 1147);
200  MapXf m_right(t_right.data(), 1147, 1400);
201  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
202 
203  Eigen::ThreadPool tp(4);
204  Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
205 
206  // compute results by separate methods
207  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
208  m_result = m_left * m_right;
209 
210  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
211  VERIFY(&t_result.data()[i] != &m_result.data()[i]);
212  if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
213  continue;
214  }
215  if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
216  continue;
217  }
218  std::cout << "mismatch detected at index " << i << ": " << t_result(i) << " vs " << m_result(i) << std::endl;
219  assert(false);
220  }
221 }
EIGEN_DEVICE_FUNC bool isApprox(const Scalar &x, const Scalar &y, const typename NumTraits< Scalar >::Real &precision=NumTraits< Scalar >::dummy_precision())
Definition: MathFunctions.h:1923

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), i, Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY.

◆ test_multithread_contraction_agrees_with_singlethread()

template<int DataLayout>
void test_multithread_contraction_agrees_with_singlethread ( )
311  {
312  int contract_size = internal::random<int>(1, 5000);
313 
314  Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80), contract_size, internal::random<int>(1, 100));
315 
316  Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25), internal::random<int>(1, 37), contract_size,
317  internal::random<int>(1, 51));
318 
319  left.setRandom();
320  right.setRandom();
321 
322  // add constants to shift values away from 0 for more precision
323  left += left.constant(1.5f);
324  right += right.constant(1.5f);
325 
327  Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
328 
329  Eigen::ThreadPool tp(internal::random<int>(2, 11));
330  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
331 
333  st_result = left.contract(right, dims);
334 
335  Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
336  tp_result.device(thread_pool_device) = left.contract(right, dims);
337 
338  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
339  for (ptrdiff_t i = 0; i < st_result.size(); i++) {
340  // if both of the values are very small, then do nothing (because the test will fail
341  // due to numerical precision issues when values are small)
342  if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
343  VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
344  }
345  }
346 }

References abs(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), VERIFY, and VERIFY_IS_APPROX.

◆ test_multithread_contraction_with_output_kernel()

template<int DataLayout>
static void test_multithread_contraction_with_output_kernel ( )
static
363  {
365 
366  const int num_threads = internal::random<int>(2, 11);
367  ThreadPool threads(num_threads);
368  Eigen::ThreadPoolDevice device(&threads, num_threads);
369 
370  Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31);
371  Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10);
372  Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10);
373 
374  t_left.setRandom();
375  t_right.setRandom();
376  // Put trash in mat4 to verify contraction clears output memory.
377  t_result.setRandom();
378 
379  // Add a little offset so that the results won't be close to zero.
380  t_left += t_left.constant(1.0f);
381  t_right += t_right.constant(1.0f);
382 
384  MapXf m_left(t_left.data(), 1500, 248);
385  MapXf m_right(t_right.data(), 248, 1400);
387 
388  // this contraction should be equivalent to a single matrix multiplication
389  Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
390 
391  // compute results by separate methods
392  t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
393 
394  m_result = m_left * m_right;
395 
396  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
397  VERIFY(&t_result.data()[i] != &m_result.data()[i]);
398  VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
399  }
400 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), VERIFY, and VERIFY_IS_APPROX.

◆ test_multithread_elementwise()

void test_multithread_elementwise ( )
38  {
39  Tensor<float, 3> in1(200, 30, 70);
40  Tensor<float, 3> in2(200, 30, 70);
41  Tensor<double, 3> out(200, 30, 70);
42 
43  in1.setRandom();
44  in2.setRandom();
45 
46  Eigen::ThreadPool tp(internal::random<int>(3, 11));
47  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
48  out.device(thread_pool_device) = (in1 + in2 * 3.14f).cast<double>();
49 
50  for (int i = 0; i < 200; ++i) {
51  for (int j = 0; j < 30; ++j) {
52  for (int k = 0; k < 70; ++k) {
53  VERIFY_IS_APPROX(out(i, j, k), static_cast<double>(in1(i, j, k) + in2(i, j, k) * 3.14f));
54  }
55  }
56  }
57 }

References i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_random()

void test_multithread_random ( )
677  {
678  Eigen::ThreadPool tp(2);
679  Eigen::ThreadPoolDevice device(&tp, 2);
680  Tensor<float, 1> t(1 << 20);
681  t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
682 }
Definition: TensorRandom.h:229
t
Definition: plotPSD.py:36

References plotPSD::t.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_shuffle()

template<int DataLayout>
void test_multithread_shuffle ( Allocator allocator)
685  {
686  Tensor<float, 4, DataLayout> tensor(17, 5, 7, 11);
687  tensor.setRandom();
688 
689  const int num_threads = internal::random<int>(2, 11);
690  ThreadPool threads(num_threads);
691  Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
692 
694  array<ptrdiff_t, 4> shuffles = {{2, 1, 3, 0}};
695  shuffle.device(device) = tensor.shuffle(shuffles);
696 
697  for (int i = 0; i < 17; ++i) {
698  for (int j = 0; j < 5; ++j) {
699  for (int k = 0; k < 7; ++k) {
700  for (int l = 0; l < 11; ++l) {
701  VERIFY_IS_EQUAL(tensor(i, j, k, l), shuffle(k, j, l, i));
702  }
703  }
704  }
705  }
706 }
EIGEN_STRONG_INLINE Packet2d shuffle(const Packet2d &m, const Packet2d &n, int mask)
Definition: LSX/PacketMath.h:150

References i, j, k, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::internal::shuffle(), Eigen::TensorBase< Derived, AccessLevel >::shuffle(), and VERIFY_IS_EQUAL.

◆ test_multithread_volume_patch()

void test_multithread_volume_patch ( )
129  {
130  Tensor<float, 5> in(4, 2, 3, 5, 7);
131  Tensor<float, 6> out(4, 1, 1, 1, 2 * 3 * 5, 7);
132 
133  in.setRandom();
134 
135  Eigen::ThreadPool tp(internal::random<int>(3, 11));
136  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
137 
138  out.device(thread_pool_device) = in.extract_volume_patches(1, 1, 1);
139 
140  for (int i = 0; i < in.size(); ++i) {
141  VERIFY_IS_EQUAL(in.data()[i], out.data()[i]);
142  }
143 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithreaded_reductions()

template<int DataLayout>
void test_multithreaded_reductions ( )
639  {
640  const int num_threads = internal::random<int>(3, 11);
641  ThreadPool thread_pool(num_threads);
642  Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
643 
644  const int num_rows = internal::random<int>(13, 732);
645  const int num_cols = internal::random<int>(13, 732);
646  Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
647  t1.setRandom();
648 
649  Tensor<float, 0, DataLayout> full_redux;
650  full_redux = t1.sum();
651 
652  Tensor<float, 0, DataLayout> full_redux_tp;
653  full_redux_tp.device(thread_pool_device) = t1.sum();
654 
655  // Check that the single threaded and the multi threaded reductions return
656  // the same result.
657  VERIFY_IS_APPROX(full_redux(), full_redux_tp());
658 }

References Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

◆ test_sharded_by_inner_dim_contraction()

template<int DataLayout>
static void test_sharded_by_inner_dim_contraction ( )
static
445  {
447 
448  const int num_threads = internal::random<int>(4, 16);
449  ThreadPool threads(num_threads);
450  Eigen::ThreadPoolDevice device(&threads, num_threads);
451 
452  Tensor<float, 2, DataLayout> t_left(2, 10000);
453  Tensor<float, 2, DataLayout> t_right(10000, 10);
454  Tensor<float, 2, DataLayout> t_result(2, 10);
455 
456  t_left.setRandom();
457  t_right.setRandom();
458  // Put trash in t_result to verify contraction clears output memory.
459  t_result.setRandom();
460 
461  // Add a little offset so that the results won't be close to zero.
462  t_left += t_left.constant(1.0f);
463  t_right += t_right.constant(1.0f);
464 
466  MapXf m_left(t_left.data(), 2, 10000);
467  MapXf m_right(t_right.data(), 10000, 10);
469 
470  // this contraction should be equivalent to a single matrix multiplication
471  Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
472 
473  // compute results by separate methods
474  t_result.device(device) = t_left.contract(t_right, dims);
475  m_result = m_left * m_right;
476 
477  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
478  VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
479  }
480 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_APPROX.

◆ test_sharded_by_inner_dim_contraction_with_output_kernel()

template<int DataLayout>
static void test_sharded_by_inner_dim_contraction_with_output_kernel ( )
static
484  {
486 
487  const int num_threads = internal::random<int>(4, 16);
488  ThreadPool threads(num_threads);
489  Eigen::ThreadPoolDevice device(&threads, num_threads);
490 
491  Tensor<float, 2, DataLayout> t_left(2, 10000);
492  Tensor<float, 2, DataLayout> t_right(10000, 10);
493  Tensor<float, 2, DataLayout> t_result(2, 10);
494 
495  t_left.setRandom();
496  t_right.setRandom();
497  // Put trash in t_result to verify contraction clears output memory.
498  t_result.setRandom();
499 
500  // Add a little offset so that the results won't be close to zero.
501  t_left += t_left.constant(1.0f);
502  t_right += t_right.constant(1.0f);
503 
505  MapXf m_left(t_left.data(), 2, 10000);
506  MapXf m_right(t_right.data(), 10000, 10);
508 
509  // this contraction should be equivalent to a single matrix multiplication
510  Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
511 
512  // compute results by separate methods
513  t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
514  m_result = m_left * m_right;
515 
516  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
517  VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
518  }
519 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_APPROX.

◆ test_threadpool_allocate()

void test_threadpool_allocate ( TestAllocator allocator)
708  {
709  const int num_threads = internal::random<int>(2, 11);
710  const int num_allocs = internal::random<int>(2, 11);
711  ThreadPool threads(num_threads);
712  Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
713 
714  for (int a = 0; a < num_allocs; ++a) {
715  void* ptr = device.allocate(512);
716  device.deallocate(ptr);
717  }
718  VERIFY(allocator != NULL);
719  VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
720  VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
721 }
int dealloc_count() const
Definition: cxx11_tensor_thread_pool.cpp:31
int alloc_count() const
Definition: cxx11_tensor_thread_pool.cpp:30
const Scalar * a
Definition: level2_cplx_impl.h:32

References a, TestAllocator::alloc_count(), TestAllocator::dealloc_count(), VERIFY, and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().