TensorReverse.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
5 // Benoit Steiner <benoit.steiner.goog@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
24 namespace internal {
25 template <typename ReverseDimensions, typename XprType>
26 struct traits<TensorReverseOp<ReverseDimensions, XprType> > : public traits<XprType> {
27  typedef typename XprType::Scalar Scalar;
29  typedef typename XprTraits::StorageKind StorageKind;
30  typedef typename XprTraits::Index Index;
31  typedef typename XprType::Nested Nested;
32  typedef std::remove_reference_t<Nested> Nested_;
33  static constexpr int NumDimensions = XprTraits::NumDimensions;
34  static constexpr int Layout = XprTraits::Layout;
35  typedef typename XprTraits::PointerType PointerType;
36 };
37 
38 template <typename ReverseDimensions, typename XprType>
39 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense> {
41 };
42 
43 template <typename ReverseDimensions, typename XprType>
44 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
45  typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type> {
47 };
48 
49 } // end namespace internal
50 
51 template <typename ReverseDimensions, typename XprType>
52 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, XprType>, WriteAccessors> {
53  public:
57  typedef typename XprType::CoeffReturnType CoeffReturnType;
61 
62  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(const XprType& expr, const ReverseDimensions& reverse_dims)
63  : m_xpr(expr), m_reverse_dims(reverse_dims) {}
64 
65  EIGEN_DEVICE_FUNC const ReverseDimensions& reverse() const { return m_reverse_dims; }
66 
68 
70 
71  protected:
72  typename XprType::Nested m_xpr;
73  const ReverseDimensions m_reverse_dims;
74 };
75 
76 // Eval as rvalue
77 template <typename ReverseDimensions, typename ArgType, typename Device>
78 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> {
80  typedef typename XprType::Index Index;
81  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
83  typedef typename XprType::Scalar Scalar;
89 
91  enum {
92  IsAligned = false,
94  BlockAccess = NumDims > 0,
95  PreferBlockAccess = true,
96  CoordAccess = false, // to be implemented
97  RawAccess = false
98  };
99 
101 
102  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
105 
107 
109  //===--------------------------------------------------------------------===//
110 
111  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
112  : m_impl(op.expression(), device), m_reverse(op.reverse()), m_device(device) {
113  // Reversing a scalar isn't supported yet. It would be a no-op anyway.
114  EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
115 
116  // Compute strides
117  m_dimensions = m_impl.dimensions();
118  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
119  m_strides[0] = 1;
120  for (int i = 1; i < NumDims; ++i) {
121  m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
122  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
123  }
124  } else {
125  m_strides[NumDims - 1] = 1;
126  for (int i = NumDims - 2; i >= 0; --i) {
127  m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
128  if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
129  }
130  }
131  }
132 
133  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
134 
136  m_impl.evalSubExprsIfNeeded(NULL);
137  return true;
138  }
139 
140 #ifdef EIGEN_USE_THREADS
141  template <typename EvalSubExprsCallback>
142  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
143  m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
144  }
145 #endif // EIGEN_USE_THREADS
146 
147  EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
148 
150  eigen_assert(index < dimensions().TotalSize());
151  Index inputIndex = 0;
152  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
154  for (int i = NumDims - 1; i > 0; --i) {
155  Index idx = index / m_fastStrides[i];
156  index -= idx * m_strides[i];
157  if (m_reverse[i]) {
158  idx = m_dimensions[i] - idx - 1;
159  }
160  inputIndex += idx * m_strides[i];
161  }
162  if (m_reverse[0]) {
163  inputIndex += (m_dimensions[0] - index - 1);
164  } else {
165  inputIndex += index;
166  }
167  } else {
169  for (int i = 0; i < NumDims - 1; ++i) {
170  Index idx = index / m_fastStrides[i];
171  index -= idx * m_strides[i];
172  if (m_reverse[i]) {
173  idx = m_dimensions[i] - idx - 1;
174  }
175  inputIndex += idx * m_strides[i];
176  }
177  if (m_reverse[NumDims - 1]) {
178  inputIndex += (m_dimensions[NumDims - 1] - index - 1);
179  } else {
180  inputIndex += index;
181  }
182  }
183  return inputIndex;
184  }
185 
187  return m_impl.coeff(reverseIndex(index));
188  }
189 
190  template <int LoadMode>
192  eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
193 
194  // TODO(ndjaitly): write a better packing routine that uses
195  // local structure.
196  EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
198  for (int i = 0; i < PacketSize; ++i) {
199  values[i] = coeff(index + i);
200  }
201  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
202  return rslt;
203  }
204 
206  const size_t target_size = m_device.lastLevelCacheSize();
207  // Block evaluation reads underlying memory in reverse order, and default
208  // cost model does not properly catch this in bytes stored/loaded.
209  return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size).addCostPerCoeff({0, 0, 24});
210  }
211 
213  bool /*root_of_expr_ast*/ = false) const {
214  // TODO(ezhulenev): If underlying tensor expression supports and prefers
215  // block evaluation we must use it. Currently we use coeff and packet
216  // access into the underlying tensor expression.
217  // static const bool useBlockAccessForArgType =
218  // TensorEvaluator<ArgType, Device>::BlockAccess &&
219  // TensorEvaluator<ArgType, Device>::PreferBlockAccess;
220 
221  static const bool isColMajor = static_cast<int>(Layout) == static_cast<int>(ColMajor);
222 
223  static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
224  const bool inner_dim_reversed = m_reverse[inner_dim_idx];
225 
226  // Offset in the output block.
227  Index block_offset = 0;
228 
229  // Offset in the input Tensor.
230  Index input_offset = reverseIndex(desc.offset());
231 
232  // Initialize output block iterator state. Dimension in this array are
233  // always in inner_most -> outer_most order (col major layout).
235  for (int i = 0; i < NumDims; ++i) {
236  const int dim = isColMajor ? i : NumDims - 1 - i;
237  it[i].size = desc.dimension(dim);
238  it[i].count = 0;
239  it[i].reverse = m_reverse[dim];
240 
241  it[i].block_stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
242  it[i].block_span = it[i].block_stride * (it[i].size - 1);
243 
244  it[i].input_stride = m_strides[dim];
245  it[i].input_span = it[i].input_stride * (it[i].size - 1);
246 
247  if (it[i].reverse) {
248  it[i].input_stride = -1 * it[i].input_stride;
249  it[i].input_span = -1 * it[i].input_span;
250  }
251  }
252 
253  // If multiple inner dimensions have the same reverse flag, check if we can
254  // merge them into a single virtual inner dimension.
255  int effective_inner_dim = 0;
256  for (int i = 1; i < NumDims; ++i) {
257  if (it[i].reverse != it[effective_inner_dim].reverse) break;
258  if (it[i].block_stride != it[effective_inner_dim].size) break;
259  if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
260 
261  it[i].size = it[effective_inner_dim].size * it[i].size;
262 
263  it[i].block_stride = 1;
264  it[i].input_stride = (inner_dim_reversed ? -1 : 1);
265 
266  it[i].block_span = it[i].block_stride * (it[i].size - 1);
267  it[i].input_span = it[i].input_stride * (it[i].size - 1);
268 
269  effective_inner_dim = i;
270  }
271 
272  eigen_assert(it[effective_inner_dim].block_stride == 1);
273  eigen_assert(it[effective_inner_dim].input_stride == (inner_dim_reversed ? -1 : 1));
274 
275  const Index inner_dim_size = it[effective_inner_dim].size;
276 
277  // Prepare storage for the materialized reverse result.
278  const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
279  CoeffReturnType* block_buffer = block_storage.data();
280 
281  while (it[NumDims - 1].count < it[NumDims - 1].size) {
282  // Copy inner-most dimension data from reversed location in input.
283  Index dst = block_offset;
284  Index src = input_offset;
285 
286  // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
287  // worse results in benchmarks than a simple coefficient loop.
288  if (inner_dim_reversed) {
289  for (Index i = 0; i < inner_dim_size; ++i) {
290  block_buffer[dst] = m_impl.coeff(src);
291  ++dst;
292  --src;
293  }
294  } else {
295  for (Index i = 0; i < inner_dim_size; ++i) {
296  block_buffer[dst] = m_impl.coeff(src);
297  ++dst;
298  ++src;
299  }
300  }
301 
302  // For the 1d tensor we need to generate only one inner-most dimension.
303  if ((NumDims - effective_inner_dim) == 1) break;
304 
305  // Update offset.
306  for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
307  if (++it[i].count < it[i].size) {
308  block_offset += it[i].block_stride;
309  input_offset += it[i].input_stride;
310  break;
311  }
312  if (i != NumDims - 1) it[i].count = 0;
313  block_offset -= it[i].block_span;
314  input_offset -= it[i].input_span;
315  }
316  }
317 
318  return block_storage.AsTensorMaterializedBlock();
319  }
320 
322  double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
323  TensorOpCost::DivCost<Index>());
324  for (int i = 0; i < NumDims; ++i) {
325  if (m_reverse[i]) {
326  compute_cost += 2 * TensorOpCost::AddCost<Index>();
327  }
328  }
329  return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
330  }
331 
332  EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
333 
334  protected:
339  ReverseDimensions m_reverse;
341 
342  private:
343  struct BlockIteratorState {
345  : size(0), count(0), reverse(false), block_stride(0), block_span(0), input_stride(0), input_span(0) {}
346 
349  bool reverse;
354  };
355 };
356 
357 // Eval as lvalue
358 
359 template <typename ReverseDimensions, typename ArgType, typename Device>
360 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
361  : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> {
364  typedef typename XprType::Index Index;
365  static constexpr int NumDims = internal::array_size<ReverseDimensions>::value;
367 
369  enum {
370  IsAligned = false,
372  BlockAccess = false,
373  PreferBlockAccess = false,
374  CoordAccess = false, // to be implemented
375  RawAccess = false
376  };
377  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
378 
379  typedef typename XprType::Scalar Scalar;
383 
384  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
386  //===--------------------------------------------------------------------===//
387 
388  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return this->m_dimensions; }
389 
391  return this->m_impl.coeffRef(this->reverseIndex(index));
392  }
393 
394  template <int StoreMode>
396  eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
397 
398  // This code is pilfered from TensorMorphing.h
400  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
402  for (int i = 0; i < PacketSize; ++i) {
403  this->coeffRef(index + i) = values[i];
404  }
405  }
406 };
407 
408 } // end namespace Eigen
409 
410 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition: ConfigureVectorization.h:146
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
void reverse(const MatrixType &m)
Definition: array_reverse.cpp:17
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorCostModel.h:28
Definition: TensorReverse.h:52
TensorBase< TensorReverseOp< ReverseDimensions, XprType >, WriteAccessors > Base
Definition: TensorReverse.h:54
Eigen::internal::nested< TensorReverseOp >::type Nested
Definition: TensorReverse.h:58
const ReverseDimensions m_reverse_dims
Definition: TensorReverse.h:73
Eigen::internal::traits< TensorReverseOp >::StorageKind StorageKind
Definition: TensorReverse.h:59
XprType::CoeffReturnType CoeffReturnType
Definition: TensorReverse.h:57
XprType::Nested m_xpr
Definition: TensorReverse.h:72
Eigen::internal::traits< TensorReverseOp >::Scalar Scalar
Definition: TensorReverse.h:55
EIGEN_DEVICE_FUNC const ReverseDimensions & reverse() const
Definition: TensorReverse.h:65
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorReverse.h:56
Eigen::internal::traits< TensorReverseOp >::Index Index
Definition: TensorReverse.h:60
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorReverse.h:67
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(const XprType &expr, const ReverseDimensions &reverse_dims)
Definition: TensorReverse.h:62
IndexType offset() const
Definition: TensorBlock.h:270
IndexType dimension(int index) const
Definition: TensorBlock.h:272
Definition: TensorBlock.h:566
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:644
Scalar * data() const
Definition: TensorBlock.h:640
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ WriteAccessors
Definition: Constants.h:374
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
Definition: MathFunctions.h:1355
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorReverse.h:395
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorReverse.h:377
DSizes< Index, NumDims > Dimensions
Definition: TensorReverse.h:366
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorReverse.h:385
TensorReverseOp< ReverseDimensions, ArgType > XprType
Definition: TensorReverse.h:363
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorReverse.h:388
XprType::CoeffReturnType CoeffReturnType
Definition: TensorReverse.h:380
TensorEvaluator< const TensorReverseOp< ReverseDimensions, ArgType >, Device > Base
Definition: TensorReverse.h:362
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorReverse.h:381
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index) const
Definition: TensorReverse.h:390
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Definition: TensorReverse.h:108
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorReverse.h:191
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorReverse.h:147
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorReverse.h:338
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorReverse.h:103
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition: TensorReverse.h:332
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorReverse.h:186
XprType::CoeffReturnType CoeffReturnType
Definition: TensorReverse.h:84
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorReverse.h:111
internal::TensorIntDivisor< Index > IndexDivisor
Definition: TensorReverse.h:100
DSizes< Index, NumDims > Dimensions
Definition: TensorReverse.h:82
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorReverse.h:133
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorReverse.h:85
array< Index, NumDims > m_strides
Definition: TensorReverse.h:336
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorReverse.h:212
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorReverse.h:205
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorReverse.h:104
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorReverse.h:87
TensorReverseOp< ReverseDimensions, ArgType > XprType
Definition: TensorReverse.h:79
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(Index index) const
Definition: TensorReverse.h:149
array< IndexDivisor, NumDims > m_fastStrides
Definition: TensorReverse.h:337
const Device EIGEN_DEVICE_REF m_device
Definition: TensorReverse.h:340
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
Definition: TensorReverse.h:106
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorReverse.h:135
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorReverse.h:321
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: Meta.h:305
const TensorReverseOp< ReverseDimensions, XprType > & type
Definition: TensorReverse.h:40
Definition: XprHelper.h:427
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::StorageKind StorageKind
Definition: TensorReverse.h:29
XprTraits::PointerType PointerType
Definition: TensorReverse.h:35
traits< XprType > XprTraits
Definition: TensorReverse.h:28
std::remove_reference_t< Nested > Nested_
Definition: TensorReverse.h:32
Definition: ForwardDeclarations.h:21