TensorGenerator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
25 namespace internal {
26 template <typename Generator, typename XprType>
27 struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType> {
28  typedef typename XprType::Scalar Scalar;
30  typedef typename XprTraits::StorageKind StorageKind;
31  typedef typename XprTraits::Index Index;
32  typedef typename XprType::Nested Nested;
33  typedef std::remove_reference_t<Nested> Nested_;
34  static constexpr int NumDimensions = XprTraits::NumDimensions;
35  static constexpr int Layout = XprTraits::Layout;
36  typedef typename XprTraits::PointerType PointerType;
37 };
38 
39 template <typename Generator, typename XprType>
40 struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense> {
42 };
43 
44 template <typename Generator, typename XprType>
45 struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type> {
47 };
48 
49 } // end namespace internal
50 
51 template <typename Generator, typename XprType>
52 class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors> {
53  public:
56  typedef typename XprType::CoeffReturnType CoeffReturnType;
60 
62  : m_xpr(expr), m_generator(generator) {}
63 
64  EIGEN_DEVICE_FUNC const Generator& generator() const { return m_generator; }
65 
67 
68  protected:
69  typename XprType::Nested m_xpr;
70  const Generator m_generator;
71 };
72 
73 // Eval as rvalue
74 template <typename Generator, typename ArgType, typename Device>
75 struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> {
77  typedef typename XprType::Index Index;
79  static constexpr int NumDims = internal::array_size<Dimensions>::value;
80  typedef typename XprType::Scalar Scalar;
86  enum {
87  IsAligned = false,
89  BlockAccess = true,
90  PreferBlockAccess = true,
91  CoordAccess = false, // to be implemented
92  RawAccess = false
93  };
94 
96 
97  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
100 
102  //===--------------------------------------------------------------------===//
103 
104  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
105  : m_device(device), m_generator(op.generator()) {
106  TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
107  m_dimensions = argImpl.dimensions();
108 
109  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
110  m_strides[0] = 1;
112  for (int i = 1; i < NumDims; ++i) {
113  m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
114  if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
115  }
116  } else {
117  m_strides[NumDims - 1] = 1;
119  for (int i = NumDims - 2; i >= 0; --i) {
120  m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
121  if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
122  }
123  }
124  }
125 
126  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
127 
130 
132  array<Index, NumDims> coords;
133  extract_coordinates(index, coords);
134  return m_generator(coords);
135  }
136 
137  template <int LoadMode>
139  const int packetSize = PacketType<CoeffReturnType, Device>::size;
140  eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
141 
142  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
143  for (int i = 0; i < packetSize; ++i) {
144  values[i] = coeff(index + i);
145  }
146  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
147  return rslt;
148  }
149 
151  const size_t target_size = m_device.firstLevelCacheSize();
152  // TODO(ezhulenev): Generator should have a cost.
153  return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size);
154  }
155 
156  struct BlockIteratorState {
161  };
162 
164  bool /*root_of_expr_ast*/ = false) const {
165  static const bool is_col_major = static_cast<int>(Layout) == static_cast<int>(ColMajor);
166 
167  // Compute spatial coordinates for the first block element.
168  array<Index, NumDims> coords;
169  extract_coordinates(desc.offset(), coords);
170  array<Index, NumDims> initial_coords = coords;
171 
172  // Offset in the output block buffer.
173  Index offset = 0;
174 
175  // Initialize output block iterator state. Dimension in this array are
176  // always in inner_most -> outer_most order (col major layout).
178  for (int i = 0; i < NumDims; ++i) {
179  const int dim = is_col_major ? i : NumDims - 1 - i;
180  it[i].size = desc.dimension(dim);
181  it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
182  it[i].span = it[i].stride * (it[i].size - 1);
183  it[i].count = 0;
184  }
185  eigen_assert(it[0].stride == 1);
186 
187  // Prepare storage for the materialized generator result.
188  const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
189 
190  CoeffReturnType* block_buffer = block_storage.data();
191 
192  static const int packet_size = PacketType<CoeffReturnType, Device>::size;
193 
194  static const int inner_dim = is_col_major ? 0 : NumDims - 1;
195  const Index inner_dim_size = it[0].size;
196  const Index inner_dim_vectorized = inner_dim_size - packet_size;
197 
198  while (it[NumDims - 1].count < it[NumDims - 1].size) {
199  Index i = 0;
200  // Generate data for the vectorized part of the inner-most dimension.
201  for (; i <= inner_dim_vectorized; i += packet_size) {
202  for (Index j = 0; j < packet_size; ++j) {
203  array<Index, NumDims> j_coords = coords; // Break loop dependence.
204  j_coords[inner_dim] += j;
205  *(block_buffer + offset + i + j) = m_generator(j_coords);
206  }
207  coords[inner_dim] += packet_size;
208  }
209  // Finalize non-vectorized part of the inner-most dimension.
210  for (; i < inner_dim_size; ++i) {
211  *(block_buffer + offset + i) = m_generator(coords);
212  coords[inner_dim]++;
213  }
214  coords[inner_dim] = initial_coords[inner_dim];
215 
216  // For the 1d tensor we need to generate only one inner-most dimension.
217  if (NumDims == 1) break;
218 
219  // Update offset.
220  for (i = 1; i < NumDims; ++i) {
221  if (++it[i].count < it[i].size) {
222  offset += it[i].stride;
223  coords[is_col_major ? i : NumDims - 1 - i]++;
224  break;
225  }
226  if (i != NumDims - 1) it[i].count = 0;
227  coords[is_col_major ? i : NumDims - 1 - i] = initial_coords[is_col_major ? i : NumDims - 1 - i];
228  offset -= it[i].span;
229  }
230  }
231 
232  return block_storage.AsTensorMaterializedBlock();
233  }
234 
236  // TODO(rmlarsen): This is just a placeholder. Define interface to make
237  // generators return their cost.
238  return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>());
239  }
240 
241  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
242 
243  protected:
245  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
246  for (int i = NumDims - 1; i > 0; --i) {
247  const Index idx = index / m_fast_strides[i];
248  index -= idx * m_strides[i];
249  coords[i] = idx;
250  }
251  coords[0] = index;
252  } else {
253  for (int i = 0; i < NumDims - 1; ++i) {
254  const Index idx = index / m_fast_strides[i];
255  index -= idx * m_strides[i];
256  coords[i] = idx;
257  }
258  coords[NumDims - 1] = index;
259  }
260  }
261 
266  Generator m_generator;
267 };
268 
269 } // end namespace Eigen
270 
271 #endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition: ConfigureVectorization.h:146
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Tensor generator class.
Definition: TensorGenerator.h:52
EIGEN_DEVICE_FUNC const Generator & generator() const
Definition: TensorGenerator.h:64
Eigen::internal::traits< TensorGeneratorOp >::StorageKind StorageKind
Definition: TensorGenerator.h:58
Eigen::internal::traits< TensorGeneratorOp >::Scalar Scalar
Definition: TensorGenerator.h:54
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType &expr, const Generator &generator)
Definition: TensorGenerator.h:61
Eigen::internal::nested< TensorGeneratorOp >::type Nested
Definition: TensorGenerator.h:57
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorGenerator.h:55
XprType::CoeffReturnType CoeffReturnType
Definition: TensorGenerator.h:56
Eigen::internal::traits< TensorGeneratorOp >::Index Index
Definition: TensorGenerator.h:59
const Generator m_generator
Definition: TensorGenerator.h:70
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorGenerator.h:66
XprType::Nested m_xpr
Definition: TensorGenerator.h:69
Definition: TensorCostModel.h:28
IndexType offset() const
Definition: TensorBlock.h:270
IndexType dimension(int index) const
Definition: TensorBlock.h:272
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:644
Scalar * data() const
Definition: TensorBlock.h:640
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorGenerator.h:104
XprType::CoeffReturnType CoeffReturnType
Definition: TensorGenerator.h:81
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorGenerator.h:241
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorGenerator.h:128
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorGenerator.h:126
const Device EIGEN_DEVICE_REF m_device
Definition: TensorGenerator.h:262
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorGenerator.h:99
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_coordinates(Index index, array< Index, NumDims > &coords) const
Definition: TensorGenerator.h:244
array< Index, NumDims > m_strides
Definition: TensorGenerator.h:264
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Definition: TensorGenerator.h:101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorGenerator.h:138
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
Definition: TensorGenerator.h:78
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorGenerator.h:150
TensorGeneratorOp< Generator, ArgType > XprType
Definition: TensorGenerator.h:76
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorGenerator.h:83
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
Definition: TensorGenerator.h:235
internal::TensorIntDivisor< Index > IndexDivisor
Definition: TensorGenerator.h:95
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorGenerator.h:163
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorGenerator.h:82
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorGenerator.h:98
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorGenerator.h:131
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorGenerator.h:129
array< IndexDivisor, NumDims > m_fast_strides
Definition: TensorGenerator.h:265
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: Meta.h:305
const TensorGeneratorOp< Generator, XprType > & type
Definition: TensorGenerator.h:41
Definition: XprHelper.h:427
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprType::Nested Nested
Definition: TensorGenerator.h:32
std::remove_reference_t< Nested > Nested_
Definition: TensorGenerator.h:33
XprTraits::StorageKind StorageKind
Definition: TensorGenerator.h:30
XprTraits::Index Index
Definition: TensorGenerator.h:31
XprType::Scalar Scalar
Definition: TensorGenerator.h:28
XprTraits::PointerType PointerType
Definition: TensorGenerator.h:36
traits< XprType > XprTraits
Definition: TensorGenerator.h:29
Definition: ForwardDeclarations.h:21
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2