TensorForcedEval.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 #include <memory>
17 
18 namespace Eigen {
19 
27 namespace internal {
28 template <typename XprType>
30  // Type promotion to handle the case where the types of the lhs and the rhs are different.
31  typedef typename XprType::Scalar Scalar;
34  typedef typename traits<XprType>::Index Index;
35  typedef typename XprType::Nested Nested;
36  typedef std::remove_reference_t<Nested> Nested_;
37  static constexpr int NumDimensions = XprTraits::NumDimensions;
38  static constexpr int Layout = XprTraits::Layout;
39  typedef typename XprTraits::PointerType PointerType;
40 
41  enum { Flags = 0 };
42 };
43 
44 template <typename XprType>
47 };
48 
49 template <typename XprType>
52 };
53 
54 } // end namespace internal
55 
56 template <typename XprType>
57 class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors> {
58  public:
61  typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
65 
67 
69 
70  protected:
71  typename XprType::Nested m_xpr;
72 };
73 
74 namespace internal {
75 template <typename Device, typename CoeffReturnType>
77  template <typename StorageType>
78  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) {
79  // Initialize non-trivially constructible types.
81  for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
82  }
83  }
84 };
85 
86 // SYCL does not support non-integral types
87 // having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices
88 // no matching function for call to 'operator new'
89 template <typename CoeffReturnType>
90 struct non_integral_type_placement_new<Eigen::SyclDevice, CoeffReturnType> {
91  template <typename StorageType>
93 };
94 } // end namespace internal
95 
96 template <typename Device>
98  public:
99  DeviceTempPointerHolder(const Device& device, size_t size)
100  : device_(device), size_(size), ptr_(device.allocate_temp(size)) {}
101 
103  device_.deallocate_temp(ptr_);
104  size_ = 0;
105  ptr_ = nullptr;
106  }
107 
108  void* ptr() { return ptr_; }
109 
110  private:
111  Device device_;
112  size_t size_;
113  void* ptr_;
114 };
115 
116 template <typename ArgType_, typename Device>
117 struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device> {
120  typedef typename ArgType::Scalar Scalar;
122  typedef typename XprType::Index Index;
129 
130  enum {
131  IsAligned = true,
134  PreferBlockAccess = false,
135  RawAccess = true
136  };
137 
139  static constexpr int NumDims = internal::traits<ArgType>::NumDimensions;
140 
141  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
144 
146  //===--------------------------------------------------------------------===//
147 
148  TensorEvaluator(const XprType& op, const Device& device)
149  : m_impl(op.expression(), device),
150  m_op(op.expression()),
151  m_device(device),
152  m_buffer_holder(nullptr),
153  m_buffer(nullptr) {}
154 
156 
157  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
158 
160  const Index numValues = internal::array_prod(m_impl.dimensions());
161  m_buffer_holder = std::make_shared<DeviceTempPointerHolder<Device>>(m_device, numValues * sizeof(CoeffReturnType));
162  m_buffer = static_cast<EvaluatorPointerType>(m_buffer_holder->ptr());
163 
165 
167  EvalTo evalToTmp(m_device.get(m_buffer), m_op);
168 
172 
173  return true;
174  }
175 
176 #ifdef EIGEN_USE_THREADS
177  template <typename EvalSubExprsCallback>
178  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
179  const Index numValues = internal::array_prod(m_impl.dimensions());
180  m_buffer_holder = std::make_shared<DeviceTempPointerHolder<Device>>(m_device, numValues * sizeof(CoeffReturnType));
181  m_buffer = static_cast<EvaluatorPointerType>(m_buffer_holder->ptr());
182 
184  EvalTo evalToTmp(m_device.get(m_buffer), m_op);
185 
186  auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); }, std::move(done));
187  internal::TensorAsyncExecutor<
188  const EvalTo, std::remove_const_t<Device>, decltype(on_done),
190  /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::runAsync(evalToTmp, m_device,
191  std::move(on_done));
192  }
193 #endif
194 
196  m_buffer_holder = nullptr;
197  m_buffer = nullptr;
198  }
199 
200  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_buffer[index]; }
201 
202  template <int LoadMode>
204  return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
205  }
206 
209  }
210 
212  bool /*root_of_expr_ast*/ = false) const {
213  eigen_assert(m_buffer != nullptr);
214  return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
215  }
216 
218  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
219  }
220 
222 
223  private:
225  const ArgType m_op;
227  std::shared_ptr<DeviceTempPointerHolder<Device>> m_buffer_holder;
228  EvaluatorPointerType m_buffer; // Cached copy of the value stored in m_buffer_holder.
229 };
230 
231 } // end namespace Eigen
232 
233 #endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
Definition: TensorForcedEval.h:97
~DeviceTempPointerHolder()
Definition: TensorForcedEval.h:102
void * ptr_
Definition: TensorForcedEval.h:113
DeviceTempPointerHolder(const Device &device, size_t size)
Definition: TensorForcedEval.h:99
Device device_
Definition: TensorForcedEval.h:111
void * ptr()
Definition: TensorForcedEval.h:108
size_t size_
Definition: TensorForcedEval.h:112
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorEvalTo.h:61
Definition: TensorForcedEval.h:57
Eigen::internal::traits< TensorForcedEvalOp >::Scalar Scalar
Definition: TensorForcedEval.h:59
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorForcedEval.h:68
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType &expr)
Definition: TensorForcedEval.h:66
Eigen::internal::traits< TensorForcedEvalOp >::Index Index
Definition: TensorForcedEval.h:64
std::remove_const_t< typename XprType::CoeffReturnType > CoeffReturnType
Definition: TensorForcedEval.h:61
XprType::Nested m_xpr
Definition: TensorForcedEval.h:71
Eigen::internal::traits< TensorForcedEvalOp >::StorageKind StorageKind
Definition: TensorForcedEval.h:63
Eigen::internal::nested< TensorForcedEvalOp >::type Nested
Definition: TensorForcedEval.h:62
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorForcedEval.h:60
Definition: TensorCostModel.h:28
Definition: TensorExecutor.h:78
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE TensorMaterializedBlock materialize(const Scalar *data, const DataDimensions &data_dims, TensorBlockDesc &desc, TensorBlockScratch &scratch)
Definition: TensorBlock.h:699
char char * op
Definition: level2_impl.h:374
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE auto array_prod(const array< T, N > &arr) -> decltype(array_reduce< product_op, T, N >(arr, static_cast< T >(1)))
Definition: MoreMeta.h:497
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
auto run(Kernel kernel, Args &&... args) -> decltype(kernel(args...))
Definition: gpu_test_helper.h:414
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Extend namespace for flags.
Definition: fsi_chan_precond_driver.cc:56
Definition: Eigen_Colamd.h:49
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorForcedEval.h:143
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorForcedEval.h:159
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorForcedEval.h:203
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
Definition: TensorForcedEval.h:121
XprType::Index Index
Definition: TensorForcedEval.h:122
TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorForcedEval.h:148
std::shared_ptr< DeviceTempPointerHolder< Device > > m_buffer_holder
Definition: TensorForcedEval.h:227
ArgType::Scalar Scalar
Definition: TensorForcedEval.h:120
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorForcedEval.h:195
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorForcedEval.h:211
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorForcedEval.h:200
Eigen::internal::traits< XprType >::PointerType TensorPointerType
Definition: TensorForcedEval.h:126
Storage::Type EvaluatorPointerType
Definition: TensorForcedEval.h:128
EvaluatorPointerType m_buffer
Definition: TensorForcedEval.h:228
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorForcedEval.h:224
TensorForcedEvalOp< ArgType > XprType
Definition: TensorForcedEval.h:119
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorForcedEval.h:217
EIGEN_DEVICE_FUNC const Dimensions & dimensions() const
Definition: TensorForcedEval.h:157
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Definition: TensorForcedEval.h:145
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EvaluatorPointerType data() const
Definition: TensorForcedEval.h:221
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorForcedEval.h:142
const ArgType m_op
Definition: TensorForcedEval.h:225
const internal::remove_all_t< ArgType_ > ArgType
Definition: TensorForcedEval.h:118
const Device EIGEN_DEVICE_REF m_device
Definition: TensorForcedEval.h:226
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorForcedEval.h:127
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorForcedEval.h:207
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorForcedEval.h:124
XprType::CoeffReturnType CoeffReturnType
Definition: TensorForcedEval.h:123
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
Derived::Scalar CoeffReturnType
Definition: TensorEvaluator.h:34
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorEvaluator.h:87
Definition: TensorForwardDeclarations.h:192
static const TiledEvaluation value
Definition: TensorForwardDeclarations.h:199
Definition: TensorForwardDeclarations.h:175
static const bool value
Definition: TensorForwardDeclarations.h:176
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements any()
Definition: TensorBlock.h:143
const TensorForcedEvalOp< XprType > & type
Definition: TensorForcedEval.h:46
Definition: XprHelper.h:427
Definition: Meta.h:145
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType)
Definition: TensorForcedEval.h:92
Definition: TensorForcedEval.h:76
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer)
Definition: TensorForcedEval.h:78
XprType::Nested Nested
Definition: TensorForcedEval.h:35
traits< XprType > XprTraits
Definition: TensorForcedEval.h:32
traits< XprType >::Index Index
Definition: TensorForcedEval.h:34
XprType::Scalar Scalar
Definition: TensorForcedEval.h:31
std::remove_reference_t< Nested > Nested_
Definition: TensorForcedEval.h:36
XprTraits::PointerType PointerType
Definition: TensorForcedEval.h:39
traits< XprType >::StorageKind StorageKind
Definition: TensorForcedEval.h:33
Definition: ForwardDeclarations.h:21