#include "main.h"
#include <iostream>
#include <Eigen/CXX11/Tensor>

Classes
class	TestAllocator

struct	SqrtOutputKernel

Macros
#define	EIGEN_USE_THREADS

Functions
void	test_multithread_elementwise ()

void	test_async_multithread_elementwise ()

void	test_multithread_chip ()

void	test_async_multithread_chip ()

void	test_multithread_volume_patch ()

void	test_async_multithread_volume_patch ()

void	test_multithread_compound_assignment ()

template<int DataLayout>
void	test_multithread_contraction ()

template<int DataLayout>
void	test_contraction_corner_cases ()

template<int DataLayout>
void	test_multithread_contraction_agrees_with_singlethread ()

template<int DataLayout>
static void	test_multithread_contraction_with_output_kernel ()

template<int DataLayout>
void	test_async_multithread_contraction_agrees_with_singlethread ()

template<int DataLayout>
static void	test_sharded_by_inner_dim_contraction ()

template<int DataLayout>
static void	test_sharded_by_inner_dim_contraction_with_output_kernel ()

template<int DataLayout>
static void	test_async_sharded_by_inner_dim_contraction ()

template<int DataLayout>
static void	test_async_sharded_by_inner_dim_contraction_with_output_kernel ()

template<int DataLayout>
void	test_full_contraction ()

template<int DataLayout>
void	test_multithreaded_reductions ()

void	test_memcpy ()

void	test_multithread_random ()

template<int DataLayout>
void	test_multithread_shuffle (Allocator *allocator)

void	test_threadpool_allocate (TestAllocator *allocator)

	EIGEN_DECLARE_TEST (cxx11_tensor_thread_pool)

Macro Definition Documentation

◆ EIGEN_USE_THREADS

#define EIGEN_USE_THREADS

Function Documentation

◆ EIGEN_DECLARE_TEST()

EIGEN_DECLARE_TEST ( cxx11_tensor_thread_pool )

                                              {
   CALL_SUBTEST_1(test_multithread_elementwise());
   CALL_SUBTEST_1(test_async_multithread_elementwise());
   CALL_SUBTEST_1(test_multithread_compound_assignment());
  
   CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
   CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
  
   CALL_SUBTEST_3(test_multithread_chip());
   CALL_SUBTEST_3(test_async_multithread_chip());
  
   CALL_SUBTEST_4(test_multithread_volume_patch());
   CALL_SUBTEST_4(test_async_multithread_volume_patch());
  
   CALL_SUBTEST_5(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
   CALL_SUBTEST_5(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
   CALL_SUBTEST_5(test_multithread_contraction_with_output_kernel<ColMajor>());
   CALL_SUBTEST_5(test_multithread_contraction_with_output_kernel<RowMajor>());
  
   CALL_SUBTEST_6(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>());
   CALL_SUBTEST_6(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>());
  
   // Test EvalShardedByInnerDimContext parallelization strategy.
   CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction<ColMajor>());
   CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction<RowMajor>());
   CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
   CALL_SUBTEST_7(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
  
   CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction<ColMajor>());
   CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction<RowMajor>());
   CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
   CALL_SUBTEST_8(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
  
   // Exercise various cases that have been problematic in the past.
   CALL_SUBTEST_9(test_contraction_corner_cases<ColMajor>());
   CALL_SUBTEST_9(test_contraction_corner_cases<RowMajor>());
  
   CALL_SUBTEST_10(test_full_contraction<ColMajor>());
   CALL_SUBTEST_10(test_full_contraction<RowMajor>());
  
   CALL_SUBTEST_11(test_multithreaded_reductions<ColMajor>());
   CALL_SUBTEST_11(test_multithreaded_reductions<RowMajor>());
  
   CALL_SUBTEST_12(test_memcpy());
   CALL_SUBTEST_12(test_multithread_random());
  
   TestAllocator test_allocator;
   CALL_SUBTEST_13(test_multithread_shuffle<ColMajor>(NULL));
   CALL_SUBTEST_13(test_multithread_shuffle<RowMajor>(&test_allocator));
   CALL_SUBTEST_13(test_threadpool_allocate(&test_allocator));
  
   // Force CMake to split this test.
   // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13
 }

References CALL_SUBTEST_1, CALL_SUBTEST_10, CALL_SUBTEST_11, CALL_SUBTEST_12, CALL_SUBTEST_13, CALL_SUBTEST_2, CALL_SUBTEST_3, CALL_SUBTEST_4, CALL_SUBTEST_5, CALL_SUBTEST_6, CALL_SUBTEST_7, CALL_SUBTEST_8, CALL_SUBTEST_9, test_async_multithread_chip(), test_async_multithread_elementwise(), test_async_multithread_volume_patch(), test_memcpy(), test_multithread_chip(), test_multithread_compound_assignment(), test_multithread_elementwise(), test_multithread_random(), test_multithread_volume_patch(), and test_threadpool_allocate().

◆ test_async_multithread_chip()

void test_async_multithread_chip ( )

                                    {
   Tensor<float, 5> in(2, 3, 5, 7, 11);
   Tensor<float, 4> out(3, 5, 7, 11);
  
   in.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
  
   Eigen::Barrier b(1);
   out.device(thread_pool_device, [&b]() { b.Notify(); }) = in.chip(1, 0);
   b.Wait();
  
   for (int i = 0; i < 3; ++i) {
     for (int j = 0; j < 5; ++j) {
       for (int k = 0; k < 7; ++k) {
         for (int l = 0; l < 11; ++l) {
           VERIFY_IS_EQUAL(out(i, j, k, l), in(1, i, j, k, l));
         }
       }
     }
   }
 }

References b, Eigen::TensorBase< Derived, AccessLevel >::chip(), i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_multithread_contraction_agrees_with_singlethread()

template<int DataLayout>

void test_async_multithread_contraction_agrees_with_singlethread ( )

                                                                    {
   int contract_size = internal::random<int>(100, 500);
  
   Tensor<float, 3, DataLayout> left(internal::random<int>(10, 40), contract_size, internal::random<int>(10, 40));
  
   Tensor<float, 4, DataLayout> right(internal::random<int>(1, 20), internal::random<int>(1, 20), contract_size,
                                      internal::random<int>(1, 20));
  
   left.setRandom();
   right.setRandom();
  
   // add constants to shift values away from 0 for more precision
   left += left.constant(1.5f);
   right += right.constant(1.5f);
  
   typedef Tensor<float, 1>::DimensionPair DimPair;
   Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
  
   Eigen::ThreadPool tp(internal::random<int>(2, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(8, 32));
  
   Tensor<float, 5, DataLayout> st_result;
   st_result = left.contract(right, dims);
  
   Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
  
   Eigen::Barrier barrier(1);
   tp_result.device(thread_pool_device, [&barrier]() { barrier.Notify(); }) = left.contract(right, dims);
   barrier.Wait();
  
   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
   for (ptrdiff_t i = 0; i < st_result.size(); i++) {
     // if both of the values are very small, then do nothing (because the test
     // will fail due to numerical precision issues when values are small)
     if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
       VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
     }
   }
 }

References abs(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), i, Eigen::Barrier::Notify(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), VERIFY, VERIFY_IS_APPROX, and Eigen::Barrier::Wait().

◆ test_async_multithread_elementwise()

void test_async_multithread_elementwise ( )

                                           {
   Tensor<float, 3> in1(200, 30, 70);
   Tensor<float, 3> in2(200, 30, 70);
   Tensor<double, 3> out(200, 30, 70);
  
   in1.setRandom();
   in2.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
  
   Eigen::Barrier b(1);
   out.device(thread_pool_device, [&b]() { b.Notify(); }) = (in1 + in2 * 3.14f).cast<double>();
   b.Wait();
  
   for (int i = 0; i < 200; ++i) {
     for (int j = 0; j < 30; ++j) {
       for (int k = 0; k < 70; ++k) {
         VERIFY_IS_APPROX(out(i, j, k), static_cast<double>(in1(i, j, k) + in2(i, j, k) * 3.14f));
       }
     }
   }
 }

References b, i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_multithread_volume_patch()

void test_async_multithread_volume_patch ( )

                                            {
   Tensor<float, 5> in(4, 2, 3, 5, 7);
   Tensor<float, 6> out(4, 1, 1, 1, 2 * 3 * 5, 7);
  
   in.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
  
   Eigen::Barrier b(1);
   out.device(thread_pool_device, [&b]() { b.Notify(); }) = in.extract_volume_patches(1, 1, 1);
   b.Wait();
  
   for (int i = 0; i < in.size(); ++i) {
     VERIFY_IS_EQUAL(in.data()[i], out.data()[i]);
   }
 }

References b, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_async_sharded_by_inner_dim_contraction()

template<int DataLayout>

static void test_async_sharded_by_inner_dim_contraction ( )

static

                                                           {
   typedef Tensor<float, 1>::DimensionPair DimPair;
  
   const int num_threads = internal::random<int>(4, 16);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads);
  
   Tensor<float, 2, DataLayout> t_left(2, 10000);
   Tensor<float, 2, DataLayout> t_right(10000, 10);
   Tensor<float, 2, DataLayout> t_result(2, 10);
  
   t_left.setRandom();
   t_right.setRandom();
   // Put trash in t_result to verify contraction clears output memory.
   t_result.setRandom();
  
   // Add a little offset so that the results won't be close to zero.
   t_left += t_left.constant(1.0f);
   t_right += t_right.constant(1.0f);
  
   typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 2, 10000);
   MapXf m_right(t_right.data(), 10000, 10);
   Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(2, 10);
  
   // this contraction should be equivalent to a single matrix multiplication
   Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
  
   // compute results by separate methods
   Eigen::Barrier barrier(1);
   t_result.device(device, [&barrier]() { barrier.Notify(); }) = t_left.contract(t_right, dims);
   barrier.Wait();
  
   m_result = m_left * m_right;
  
   for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

◆ test_async_sharded_by_inner_dim_contraction_with_output_kernel()

template<int DataLayout>

static void test_async_sharded_by_inner_dim_contraction_with_output_kernel ( )

static

                                                                              {
   typedef Tensor<float, 1>::DimensionPair DimPair;
  
   const int num_threads = internal::random<int>(4, 16);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads);
  
   Tensor<float, 2, DataLayout> t_left(2, 10000);
   Tensor<float, 2, DataLayout> t_right(10000, 10);
   Tensor<float, 2, DataLayout> t_result(2, 10);
  
   t_left.setRandom();
   t_right.setRandom();
   // Put trash in t_result to verify contraction clears output memory.
   t_result.setRandom();
  
   // Add a little offset so that the results won't be close to zero.
   t_left += t_left.constant(1.0f);
   t_right += t_right.constant(1.0f);
  
   typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 2, 10000);
   MapXf m_right(t_right.data(), 10000, 10);
   Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(2, 10);
  
   // this contraction should be equivalent to a single matrix multiplication
   Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
  
   // compute results by separate methods
   Eigen::Barrier barrier(1);
   t_result.device(device, [&barrier]() { barrier.Notify(); }) = t_left.contract(t_right, dims, SqrtOutputKernel());
   barrier.Wait();
   m_result = m_left * m_right;
  
   for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), and VERIFY_IS_APPROX.

◆ test_contraction_corner_cases()

template<int DataLayout>

void test_contraction_corner_cases ( )

                                      {
   Tensor<float, 2, DataLayout> t_left(32, 500);
   Tensor<float, 2, DataLayout> t_right(32, 28 * 28);
   Tensor<float, 2, DataLayout> t_result(500, 28 * 28);
  
   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
   t_result = t_result.constant(NAN);
  
   // this contraction should be equivalent to a single matrix multiplication
   typedef Tensor<float, 1>::DimensionPair DimPair;
   Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
  
   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 32, 500);
   MapXf m_right(t_right.data(), 32, 28 * 28);
   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28 * 28);
  
   Eigen::ThreadPool tp(12);
   Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
  
   // compute results by separate methods
   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
   m_result = m_left.transpose() * m_right;
  
   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     assert(!(numext::isnan)(t_result.data()[i]));
     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
       std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i]
                 << std::endl;
       assert(false);
     }
   }
  
   t_left.resize(32, 1);
   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
   t_result.resize(1, 28 * 28);
   t_result = t_result.constant(NAN);
   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
   new (&m_left) MapXf(t_left.data(), 32, 1);
   m_result = m_left.transpose() * m_right;
   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     assert(!(numext::isnan)(t_result.data()[i]));
     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
       assert(false);
     }
   }
  
   t_left.resize(32, 500);
   t_right.resize(32, 4);
   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
   t_result.resize(500, 4);
   t_result = t_result.constant(NAN);
   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
   new (&m_left) MapXf(t_left.data(), 32, 500);
   new (&m_right) MapXf(t_right.data(), 32, 4);
   m_result = m_left.transpose() * m_right;
   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     assert(!(numext::isnan)(t_result.data()[i]));
     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
       assert(false);
     }
   }
  
   t_left.resize(32, 1);
   t_right.resize(32, 4);
   t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
   t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
   t_result.resize(1, 4);
   t_result = t_result.constant(NAN);
   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
   new (&m_left) MapXf(t_left.data(), 32, 1);
   new (&m_right) MapXf(t_right.data(), 32, 4);
   m_result = m_left.transpose() * m_right;
   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     assert(!(numext::isnan)(t_result.data()[i]));
     if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
       std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
       assert(false);
     }
   }
 }

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), i, isnan, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::resize(), and Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size().

◆ test_full_contraction()

template<int DataLayout>

void test_full_contraction ( )

                              {
   int contract_size1 = internal::random<int>(1, 500);
   int contract_size2 = internal::random<int>(1, 500);
  
   Tensor<float, 2, DataLayout> left(contract_size1, contract_size2);
   Tensor<float, 2, DataLayout> right(contract_size1, contract_size2);
   left.setRandom();
   right.setRandom();
  
   // add constants to shift values away from 0 for more precision
   left += left.constant(1.5f);
   right += right.constant(1.5f);
  
   typedef Tensor<float, 2>::DimensionPair DimPair;
   Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
  
   Eigen::ThreadPool tp(internal::random<int>(2, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
  
   Tensor<float, 0, DataLayout> st_result;
   st_result = left.contract(right, dims);
  
   Tensor<float, 0, DataLayout> tp_result;
   tp_result.device(thread_pool_device) = left.contract(right, dims);
  
   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
   // if both of the values are very small, then do nothing (because the test will fail
   // due to numerical precision issues when values are small)
   if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
     VERIFY_IS_APPROX(st_result(), tp_result());
   }
 }

References abs(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), VERIFY, and VERIFY_IS_APPROX.

◆ test_memcpy()

void test_memcpy ( )

                    {
   for (int i = 0; i < 5; ++i) {
     const int num_threads = internal::random<int>(3, 11);
     Eigen::ThreadPool tp(num_threads);
     Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
  
     const int size = internal::random<int>(13, 7632);
     Tensor<float, 1> t1(size);
     t1.setRandom();
     std::vector<float> result(size);
     thread_pool_device.memcpy(&result[0], t1.data(), size * sizeof(float));
     for (int j = 0; j < size; j++) {
       VERIFY_IS_EQUAL(t1(j), result[j]);
     }
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, j, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), size, and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_chip()

void test_multithread_chip ( )

                              {
   Tensor<float, 5> in(2, 3, 5, 7, 11);
   Tensor<float, 4> out(3, 5, 7, 11);
  
   in.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
  
   out.device(thread_pool_device) = in.chip(1, 0);
  
   for (int i = 0; i < 3; ++i) {
     for (int j = 0; j < 5; ++j) {
       for (int k = 0; k < 7; ++k) {
         for (int l = 0; l < 11; ++l) {
           VERIFY_IS_EQUAL(out(i, j, k, l), in(1, i, j, k, l));
         }
       }
     }
   }
 }

References Eigen::TensorBase< Derived, AccessLevel >::chip(), i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_compound_assignment()

void test_multithread_compound_assignment ( )

                                             {
   Tensor<float, 3> in1(2, 3, 7);
   Tensor<float, 3> in2(2, 3, 7);
   Tensor<float, 3> out(2, 3, 7);
  
   in1.setRandom();
   in2.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
   out.device(thread_pool_device) = in1;
   out.device(thread_pool_device) += in2 * 3.14f;
  
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 7; ++k) {
         VERIFY_IS_APPROX(out(i, j, k), in1(i, j, k) + in2(i, j, k) * 3.14f);
       }
     }
   }
 }

References i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_contraction()

template<int DataLayout>

void test_multithread_contraction ( )

                                     {
   Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
   Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
   Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
  
   t_left.setRandom();
   t_right.setRandom();
  
   // this contraction should be equivalent to a single matrix multiplication
   typedef Tensor<float, 1>::DimensionPair DimPair;
   Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
  
   typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 1500, 1147);
   MapXf m_right(t_right.data(), 1147, 1400);
   Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
  
   Eigen::ThreadPool tp(4);
   Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
  
   // compute results by separate methods
   t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
   m_result = m_left * m_right;
  
   for (ptrdiff_t i = 0; i < t_result.size(); i++) {
     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
       continue;
     }
     if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
       continue;
     }
     std::cout << "mismatch detected at index " << i << ": " << t_result(i) << " vs " << m_result(i) << std::endl;
     assert(false);
   }
 }

References assert, Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), i, Eigen::internal::isApprox(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY.

◆ test_multithread_contraction_agrees_with_singlethread()

template<int DataLayout>

void test_multithread_contraction_agrees_with_singlethread ( )

                                                              {
   int contract_size = internal::random<int>(1, 5000);
  
   Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80), contract_size, internal::random<int>(1, 100));
  
   Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25), internal::random<int>(1, 37), contract_size,
                                      internal::random<int>(1, 51));
  
   left.setRandom();
   right.setRandom();
  
   // add constants to shift values away from 0 for more precision
   left += left.constant(1.5f);
   right += right.constant(1.5f);
  
   typedef Tensor<float, 1>::DimensionPair DimPair;
   Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
  
   Eigen::ThreadPool tp(internal::random<int>(2, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
  
   Tensor<float, 5, DataLayout> st_result;
   st_result = left.contract(right, dims);
  
   Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
   tp_result.device(thread_pool_device) = left.contract(right, dims);
  
   VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
   for (ptrdiff_t i = 0; i < st_result.size(); i++) {
     // if both of the values are very small, then do nothing (because the test will fail
     // due to numerical precision issues when values are small)
     if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
       VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
     }
   }
 }

References abs(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), Eigen::dimensions_match(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), VERIFY, and VERIFY_IS_APPROX.

◆ test_multithread_contraction_with_output_kernel()

template<int DataLayout>

static void test_multithread_contraction_with_output_kernel ( )

static

                                                               {
   typedef Tensor<float, 1>::DimensionPair DimPair;
  
   const int num_threads = internal::random<int>(2, 11);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads);
  
   Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31);
   Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10);
   Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10);
  
   t_left.setRandom();
   t_right.setRandom();
   // Put trash in mat4 to verify contraction clears output memory.
   t_result.setRandom();
  
   // Add a little offset so that the results won't be close to zero.
   t_left += t_left.constant(1.0f);
   t_right += t_right.constant(1.0f);
  
   typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 1500, 248);
   MapXf m_right(t_right.data(), 248, 1400);
   Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
  
   // this contraction should be equivalent to a single matrix multiplication
   Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
  
   // compute results by separate methods
   t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
  
   m_result = m_left * m_right;
  
   for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), VERIFY, and VERIFY_IS_APPROX.

◆ test_multithread_elementwise()

void test_multithread_elementwise ( )

                                     {
   Tensor<float, 3> in1(200, 30, 70);
   Tensor<float, 3> in2(200, 30, 70);
   Tensor<double, 3> out(200, 30, 70);
  
   in1.setRandom();
   in2.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
   out.device(thread_pool_device) = (in1 + in2 * 3.14f).cast<double>();
  
   for (int i = 0; i < 200; ++i) {
     for (int j = 0; j < 30; ++j) {
       for (int k = 0; k < 70; ++k) {
         VERIFY_IS_APPROX(out(i, j, k), static_cast<double>(in1(i, j, k) + in2(i, j, k) * 3.14f));
       }
     }
   }
 }

References i, j, k, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_random()

void test_multithread_random ( )

                                {
   Eigen::ThreadPool tp(2);
   Eigen::ThreadPoolDevice device(&tp, 2);
   Tensor<float, 1> t(1 << 20);
   t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
 }

References plotPSD::t.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithread_shuffle()

template<int DataLayout>

void test_multithread_shuffle ( Allocator * allocator )

                                                     {
   Tensor<float, 4, DataLayout> tensor(17, 5, 7, 11);
   tensor.setRandom();
  
   const int num_threads = internal::random<int>(2, 11);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
  
   Tensor<float, 4, DataLayout> shuffle(7, 5, 11, 17);
   array<ptrdiff_t, 4> shuffles = {{2, 1, 3, 0}};
   shuffle.device(device) = tensor.shuffle(shuffles);
  
   for (int i = 0; i < 17; ++i) {
     for (int j = 0; j < 5; ++j) {
       for (int k = 0; k < 7; ++k) {
         for (int l = 0; l < 11; ++l) {
           VERIFY_IS_EQUAL(tensor(i, j, k, l), shuffle(k, j, l, i));
         }
       }
     }
   }
 }

References i, j, k, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::internal::shuffle(), Eigen::TensorBase< Derived, AccessLevel >::shuffle(), and VERIFY_IS_EQUAL.

◆ test_multithread_volume_patch()

void test_multithread_volume_patch ( )

                                      {
   Tensor<float, 5> in(4, 2, 3, 5, 7);
   Tensor<float, 6> out(4, 1, 1, 1, 2 * 3 * 5, 7);
  
   in.setRandom();
  
   Eigen::ThreadPool tp(internal::random<int>(3, 11));
   Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
  
   out.device(thread_pool_device) = in.extract_volume_patches(1, 1, 1);
  
   for (int i = 0; i < in.size(); ++i) {
     VERIFY_IS_EQUAL(in.data()[i], out.data()[i]);
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), i, out(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::size(), and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

◆ test_multithreaded_reductions()

template<int DataLayout>

void test_multithreaded_reductions ( )

                                      {
   const int num_threads = internal::random<int>(3, 11);
   ThreadPool thread_pool(num_threads);
   Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
  
   const int num_rows = internal::random<int>(13, 732);
   const int num_cols = internal::random<int>(13, 732);
   Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
   t1.setRandom();
  
   Tensor<float, 0, DataLayout> full_redux;
   full_redux = t1.sum();
  
   Tensor<float, 0, DataLayout> full_redux_tp;
   full_redux_tp.device(thread_pool_device) = t1.sum();
  
   // Check that the single threaded and the multi threaded reductions return
   // the same result.
   VERIFY_IS_APPROX(full_redux(), full_redux_tp());
 }

References Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::TensorBase< Derived, AccessLevel >::setRandom(), and VERIFY_IS_APPROX.

◆ test_sharded_by_inner_dim_contraction()

template<int DataLayout>

static void test_sharded_by_inner_dim_contraction ( )

static

                                                     {
   typedef Tensor<float, 1>::DimensionPair DimPair;
  
   const int num_threads = internal::random<int>(4, 16);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads);
  
   Tensor<float, 2, DataLayout> t_left(2, 10000);
   Tensor<float, 2, DataLayout> t_right(10000, 10);
   Tensor<float, 2, DataLayout> t_result(2, 10);
  
   t_left.setRandom();
   t_right.setRandom();
   // Put trash in t_result to verify contraction clears output memory.
   t_result.setRandom();
  
   // Add a little offset so that the results won't be close to zero.
   t_left += t_left.constant(1.0f);
   t_right += t_right.constant(1.0f);
  
   typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 2, 10000);
   MapXf m_right(t_right.data(), 10000, 10);
   Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(2, 10);
  
   // this contraction should be equivalent to a single matrix multiplication
   Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
  
   // compute results by separate methods
   t_result.device(device) = t_left.contract(t_right, dims);
   m_result = m_left * m_right;
  
   for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_APPROX.

◆ test_sharded_by_inner_dim_contraction_with_output_kernel()

template<int DataLayout>

static void test_sharded_by_inner_dim_contraction_with_output_kernel ( )

static

                                                                        {
   typedef Tensor<float, 1>::DimensionPair DimPair;
  
   const int num_threads = internal::random<int>(4, 16);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads);
  
   Tensor<float, 2, DataLayout> t_left(2, 10000);
   Tensor<float, 2, DataLayout> t_right(10000, 10);
   Tensor<float, 2, DataLayout> t_result(2, 10);
  
   t_left.setRandom();
   t_right.setRandom();
   // Put trash in t_result to verify contraction clears output memory.
   t_result.setRandom();
  
   // Add a little offset so that the results won't be close to zero.
   t_left += t_left.constant(1.0f);
   t_right += t_right.constant(1.0f);
  
   typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
   MapXf m_left(t_left.data(), 2, 10000);
   MapXf m_right(t_right.data(), 10000, 10);
   Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(2, 10);
  
   // this contraction should be equivalent to a single matrix multiplication
   Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
  
   // compute results by separate methods
   t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
   m_result = m_left * m_right;
  
   for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
 }

References Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::data(), Eigen::PlainObjectBase< Derived >::data(), Eigen::TensorBase< Derived, AccessLevel >::device(), Eigen::Tensor< Scalar_, NumIndices_, Options_, IndexType_ >::dimensions(), i, Eigen::TensorBase< Derived, AccessLevel >::setRandom(), sqrt(), Eigen::DSizes< DenseIndex, NumDims >::TotalSize(), and VERIFY_IS_APPROX.

◆ test_threadpool_allocate()

void test_threadpool_allocate ( TestAllocator * allocator )

                                                         {
   const int num_threads = internal::random<int>(2, 11);
   const int num_allocs = internal::random<int>(2, 11);
   ThreadPool threads(num_threads);
   Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
  
   for (int a = 0; a < num_allocs; ++a) {
     void* ptr = device.allocate(512);
     device.deallocate(ptr);
   }
   VERIFY(allocator != NULL);
   VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
   VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
 }

References a, TestAllocator::alloc_count(), TestAllocator::dealloc_count(), VERIFY, and VERIFY_IS_EQUAL.

Referenced by EIGEN_DECLARE_TEST().

Classes

Macros

Functions

Macro Definition Documentation

◆ EIGEN_USE_THREADS

Function Documentation

◆ EIGEN_DECLARE_TEST()

◆ test_async_multithread_chip()

◆ test_async_multithread_contraction_agrees_with_singlethread()

◆ test_async_multithread_elementwise()

◆ test_async_multithread_volume_patch()

◆ test_async_sharded_by_inner_dim_contraction()

◆ test_async_sharded_by_inner_dim_contraction_with_output_kernel()

◆ test_contraction_corner_cases()

◆ test_full_contraction()

◆ test_memcpy()

◆ test_multithread_chip()

◆ test_multithread_compound_assignment()

◆ test_multithread_contraction()

◆ test_multithread_contraction_agrees_with_singlethread()

◆ test_multithread_contraction_with_output_kernel()

◆ test_multithread_elementwise()

◆ test_multithread_random()

◆ test_multithread_shuffle()

◆ test_multithread_volume_patch()

◆ test_multithreaded_reductions()

◆ test_sharded_by_inner_dim_contraction()

◆ test_sharded_by_inner_dim_contraction_with_output_kernel()

◆ test_threadpool_allocate()