TensorRoll.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2024 Tobias Wood tobias@spinicist.org.uk
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
12 // IWYU pragma: private
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
23 namespace internal {
24 template <typename RollDimensions, typename XprType>
25 struct traits<TensorRollOp<RollDimensions, XprType> > : public traits<XprType> {
26  typedef typename XprType::Scalar Scalar;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
31  typedef std::remove_reference_t<Nested> Nested_;
32  static constexpr int NumDimensions = XprTraits::NumDimensions;
33  static constexpr int Layout = XprTraits::Layout;
34  typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template <typename RollDimensions, typename XprType>
38 struct eval<TensorRollOp<RollDimensions, XprType>, Eigen::Dense> {
40 };
41 
42 template <typename RollDimensions, typename XprType>
43 struct nested<TensorRollOp<RollDimensions, XprType>, 1, typename eval<TensorRollOp<RollDimensions, XprType> >::type> {
45 };
46 
47 } // end namespace internal
48 
49 template <typename RollDimensions, typename XprType>
50 class TensorRollOp : public TensorBase<TensorRollOp<RollDimensions, XprType>, WriteAccessors> {
51  public:
55  typedef typename XprType::CoeffReturnType CoeffReturnType;
59 
60  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorRollOp(const XprType& expr, const RollDimensions& roll_dims)
61  : m_xpr(expr), m_roll_dims(roll_dims) {}
62 
63  EIGEN_DEVICE_FUNC const RollDimensions& roll() const { return m_roll_dims; }
64 
66 
68 
69  protected:
70  typename XprType::Nested m_xpr;
71  const RollDimensions m_roll_dims;
72 };
73 
74 // Eval as rvalue
75 template <typename RollDimensions, typename ArgType, typename Device>
76 struct TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
78  typedef typename XprType::Index Index;
79  static constexpr int NumDims = internal::array_size<RollDimensions>::value;
81  typedef typename XprType::Scalar Scalar;
87 
89  enum {
90  IsAligned = false,
92  BlockAccess = NumDims > 0,
93  PreferBlockAccess = true,
94  CoordAccess = false, // to be implemented
95  RawAccess = false
96  };
97 
99 
100  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
105  //===--------------------------------------------------------------------===//
106 
107  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
108  : m_impl(op.expression(), device), m_rolls(op.roll()), m_device(device) {
109  EIGEN_STATIC_ASSERT((NumDims > 0), Must_Have_At_Least_One_Dimension_To_Roll);
110 
111  // Compute strides
112  m_dimensions = m_impl.dimensions();
113  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
114  m_strides[0] = 1;
115  for (int i = 1; i < NumDims; ++i) {
116  m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
117  if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
118  }
119  } else {
120  m_strides[NumDims - 1] = 1;
121  for (int i = NumDims - 2; i >= 0; --i) {
122  m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
123  if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
124  }
125  }
126  }
127 
128  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
129 
131  m_impl.evalSubExprsIfNeeded(nullptr);
132  return true;
133  }
134 
135 #ifdef EIGEN_USE_THREADS
136  template <typename EvalSubExprsCallback>
137  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
138  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
139  }
140 #endif // EIGEN_USE_THREADS
141 
142  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
143 
145  auto const tmp = (i + r) % n;
146  if (tmp < 0) {
147  return tmp + n;
148  } else {
149  return tmp;
150  }
151  }
152 
154  array<Index, NumDims> rolledCoords;
155  for (int id = 0; id < NumDims; id++) {
156  eigen_assert(coords[id] < m_dimensions[id]);
157  rolledCoords[id] = roll(coords[id], m_rolls[id], m_dimensions[id]);
158  }
159  return rolledCoords;
160  }
161 
163  eigen_assert(index < dimensions().TotalSize());
164  Index rolledIndex = 0;
165  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
167  for (int i = NumDims - 1; i > 0; --i) {
168  Index idx = index / m_fast_strides[i];
169  index -= idx * m_strides[i];
170  rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
171  }
172  rolledIndex += roll(index, m_rolls[0], m_dimensions[0]);
173  } else {
175  for (int i = 0; i < NumDims - 1; ++i) {
176  Index idx = index / m_fast_strides[i];
177  index -= idx * m_strides[i];
178  rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
179  }
180  rolledIndex += roll(index, m_rolls[NumDims - 1], m_dimensions[NumDims - 1]);
181  }
182  return rolledIndex;
183  }
184 
186  return m_impl.coeff(rollIndex(index));
187  }
188 
189  template <int LoadMode>
191  eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
192  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
194  for (int i = 0; i < PacketSize; ++i) {
195  values[i] = coeff(index + i);
196  }
197  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
198  return rslt;
199  }
200 
202  const size_t target_size = m_device.lastLevelCacheSize();
203  return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size).addCostPerCoeff({0, 0, 24});
204  }
205 
206  struct BlockIteratorState {
211  };
212 
214  bool /*root_of_expr_ast*/ = false) const {
215  static const bool is_col_major = static_cast<int>(Layout) == static_cast<int>(ColMajor);
216 
217  // Compute spatial coordinates for the first block element.
218  array<Index, NumDims> coords;
219  extract_coordinates(desc.offset(), coords);
220  array<Index, NumDims> initial_coords = coords;
221  Index offset = 0; // Offset in the output block buffer.
222 
223  // Initialize output block iterator state. Dimension in this array are
224  // always in inner_most -> outer_most order (col major layout).
226  for (int i = 0; i < NumDims; ++i) {
227  const int dim = is_col_major ? i : NumDims - 1 - i;
228  it[i].size = desc.dimension(dim);
229  it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
230  it[i].span = it[i].stride * (it[i].size - 1);
231  it[i].count = 0;
232  }
233  eigen_assert(it[0].stride == 1);
234 
235  // Prepare storage for the materialized generator result.
236  const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
237  CoeffReturnType* block_buffer = block_storage.data();
238 
239  static const int inner_dim = is_col_major ? 0 : NumDims - 1;
240  const Index inner_dim_size = it[0].size;
241 
242  while (it[NumDims - 1].count < it[NumDims - 1].size) {
243  Index i = 0;
244  for (; i < inner_dim_size; ++i) {
245  auto const rolled = rollCoords(coords);
246  auto const index = is_col_major ? m_dimensions.IndexOfColMajor(rolled) : m_dimensions.IndexOfRowMajor(rolled);
247  *(block_buffer + offset + i) = m_impl.coeff(index);
248  coords[inner_dim]++;
249  }
250  coords[inner_dim] = initial_coords[inner_dim];
251 
252  if (NumDims == 1) break; // For the 1d tensor we need to generate only one inner-most dimension.
253 
254  // Update offset.
255  for (i = 1; i < NumDims; ++i) {
256  if (++it[i].count < it[i].size) {
257  offset += it[i].stride;
258  coords[is_col_major ? i : NumDims - 1 - i]++;
259  break;
260  }
261  if (i != NumDims - 1) it[i].count = 0;
262  coords[is_col_major ? i : NumDims - 1 - i] = initial_coords[is_col_major ? i : NumDims - 1 - i];
263  offset -= it[i].span;
264  }
265  }
266 
267  return block_storage.AsTensorMaterializedBlock();
268  }
269 
271  double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
272  TensorOpCost::DivCost<Index>());
273  for (int i = 0; i < NumDims; ++i) {
274  compute_cost += 2 * TensorOpCost::AddCost<Index>();
275  }
276  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
277  }
278 
279  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return nullptr; }
280 
281  protected:
286  RollDimensions m_rolls;
288 
290  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
291  for (int i = NumDims - 1; i > 0; --i) {
292  const Index idx = index / m_fast_strides[i];
293  index -= idx * m_strides[i];
294  coords[i] = idx;
295  }
296  coords[0] = index;
297  } else {
298  for (int i = 0; i < NumDims - 1; ++i) {
299  const Index idx = index / m_fast_strides[i];
300  index -= idx * m_strides[i];
301  coords[i] = idx;
302  }
303  coords[NumDims - 1] = index;
304  }
305  }
306 
307  private:
308 };
309 
310 // Eval as lvalue
311 
312 template <typename RollDimensions, typename ArgType, typename Device>
313 struct TensorEvaluator<TensorRollOp<RollDimensions, ArgType>, Device>
314  : public TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
317  typedef typename XprType::Index Index;
318  static constexpr int NumDims = internal::array_size<RollDimensions>::value;
320 
322  enum {
323  IsAligned = false,
325  BlockAccess = false,
326  PreferBlockAccess = false,
327  CoordAccess = false,
328  RawAccess = false
329  };
330  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
331 
332  typedef typename XprType::Scalar Scalar;
336 
337  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
339  //===--------------------------------------------------------------------===//
340 
341  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return this->m_dimensions; }
342 
344  return this->m_impl.coeffRef(this->rollIndex(index));
345  }
346 
347  template <int StoreMode>
349  eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
351  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
353  for (int i = 0; i < PacketSize; ++i) {
354  this->coeffRef(index + i) = values[i];
355  }
356  }
357 };
358 
359 } // end namespace Eigen
360 
361 #endif // EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_ALIGN_MAX
Definition: ConfigureVectorization.h:146
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorCostModel.h:28
Definition: TensorRoll.h:50
TensorBase< TensorRollOp< RollDimensions, XprType >, WriteAccessors > Base
Definition: TensorRoll.h:52
Eigen::internal::nested< TensorRollOp >::type Nested
Definition: TensorRoll.h:56
const RollDimensions m_roll_dims
Definition: TensorRoll.h:71
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorRoll.h:54
Eigen::internal::traits< TensorRollOp >::Index Index
Definition: TensorRoll.h:58
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorRollOp(const XprType &expr, const RollDimensions &roll_dims)
Definition: TensorRoll.h:60
Eigen::internal::traits< TensorRollOp >::StorageKind StorageKind
Definition: TensorRoll.h:57
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:55
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorRoll.h:65
EIGEN_DEVICE_FUNC const RollDimensions & roll() const
Definition: TensorRoll.h:63
XprType::Nested m_xpr
Definition: TensorRoll.h:70
Eigen::internal::traits< TensorRollOp >::Scalar Scalar
Definition: TensorRoll.h:53
IndexType offset() const
Definition: TensorBlock.h:270
IndexType dimension(int index) const
Definition: TensorBlock.h:272
Definition: TensorBlock.h:566
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ WriteAccessors
Definition: Constants.h:374
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
r
Definition: UniformPSDSelfTest.py:20
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
DSizes< Index, NumDims > Dimensions
Definition: TensorRoll.h:319
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:333
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index) const
Definition: TensorRoll.h:343
TensorRollOp< RollDimensions, ArgType > XprType
Definition: TensorRoll.h:316
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorRoll.h:334
TensorEvaluator< const TensorRollOp< RollDimensions, ArgType >, Device > Base
Definition: TensorRoll.h:315
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorRoll.h:341
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorRoll.h:348
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorRoll.h:330
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorRoll.h:338
const Device EIGEN_DEVICE_REF m_device
Definition: TensorRoll.h:287
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorRoll.h:285
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorRoll.h:270
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorRoll.h:107
array< Index, NumDims > m_strides
Definition: TensorRoll.h:283
TensorRollOp< RollDimensions, ArgType > XprType
Definition: TensorRoll.h:77
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index roll(Index const i, Index const r, Index const n) const
Definition: TensorRoll.h:144
DSizes< Index, NumDims > Dimensions
Definition: TensorRoll.h:80
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorRoll.h:130
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition: TensorRoll.h:279
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorRoll.h:201
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorRoll.h:142
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorRoll.h:85
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array< Index, NumDims > rollCoords(array< Index, NumDims > const &coords) const
Definition: TensorRoll.h:153
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorRoll.h:83
internal::TensorIntDivisor< Index > IndexDivisor
Definition: TensorRoll.h:98
array< IndexDivisor, NumDims > m_fast_strides
Definition: TensorRoll.h:284
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_coordinates(Index index, array< Index, NumDims > &coords) const
Definition: TensorRoll.h:289
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorRoll.h:185
typename internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Definition: TensorRoll.h:104
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorRoll.h:213
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorRoll.h:128
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:82
typename TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
Definition: TensorRoll.h:103
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rollIndex(Index index) const
Definition: TensorRoll.h:162
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorRoll.h:190
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: Meta.h:305
const TensorRollOp< RollDimensions, XprType > & type
Definition: TensorRoll.h:39
Definition: XprHelper.h:427
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::PointerType PointerType
Definition: TensorRoll.h:34
std::remove_reference_t< Nested > Nested_
Definition: TensorRoll.h:31
traits< XprType > XprTraits
Definition: TensorRoll.h:27
XprTraits::Index Index
Definition: TensorRoll.h:29
XprTraits::StorageKind StorageKind
Definition: TensorRoll.h:28
Definition: ForwardDeclarations.h:21