10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
24 template <
typename RollDimensions,
typename XprType>
30 typedef typename XprType::Nested
Nested;
31 typedef std::remove_reference_t<Nested>
Nested_;
32 static constexpr
int NumDimensions = XprTraits::NumDimensions;
33 static constexpr
int Layout = XprTraits::Layout;
37 template <
typename RollDimensions,
typename XprType>
42 template <
typename RollDimensions,
typename XprType>
49 template <
typename RollDimensions,
typename XprType>
75 template <
typename RollDimensions,
typename ArgType,
typename Device>
92 BlockAccess = NumDims > 0,
93 PreferBlockAccess =
true,
108 : m_impl(
op.expression(), device), m_rolls(
op.roll()),
m_device(device) {
112 m_dimensions = m_impl.dimensions();
115 for (
int i = 1;
i < NumDims; ++
i) {
116 m_strides[
i] = m_strides[
i - 1] * m_dimensions[
i - 1];
120 m_strides[NumDims - 1] = 1;
121 for (
int i = NumDims - 2;
i >= 0; --
i) {
122 m_strides[
i] = m_strides[
i + 1] * m_dimensions[
i + 1];
131 m_impl.evalSubExprsIfNeeded(
nullptr);
135 #ifdef EIGEN_USE_THREADS
136 template <
typename EvalSubExprsCallback>
138 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
145 auto const tmp = (
i +
r) %
n;
155 for (
int id = 0;
id < NumDims;
id++) {
157 rolledCoords[id] = roll(coords[
id], m_rolls[
id], m_dimensions[
id]);
164 Index rolledIndex = 0;
167 for (
int i = NumDims - 1;
i > 0; --
i) {
168 Index idx = index / m_fast_strides[
i];
169 index -= idx * m_strides[
i];
170 rolledIndex += roll(idx, m_rolls[
i], m_dimensions[
i]) * m_strides[
i];
172 rolledIndex += roll(index, m_rolls[0], m_dimensions[0]);
175 for (
int i = 0;
i < NumDims - 1; ++
i) {
176 Index idx = index / m_fast_strides[
i];
177 index -= idx * m_strides[
i];
178 rolledIndex += roll(idx, m_rolls[
i], m_dimensions[
i]) * m_strides[
i];
180 rolledIndex += roll(index, m_rolls[NumDims - 1], m_dimensions[NumDims - 1]);
186 return m_impl.coeff(rollIndex(index));
189 template <
int LoadMode>
202 const size_t target_size =
m_device.lastLevelCacheSize();
203 return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size).addCostPerCoeff({0, 0, 24});
206 struct BlockIteratorState {
214 bool =
false)
const {
215 static const bool is_col_major =
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor);
219 extract_coordinates(desc.
offset(), coords);
226 for (
int i = 0;
i < NumDims; ++
i) {
227 const int dim = is_col_major ?
i : NumDims - 1 -
i;
229 it[
i].stride =
i == 0 ? 1 : (it[
i - 1].size * it[
i - 1].stride);
230 it[
i].span = it[
i].stride * (it[
i].size - 1);
239 static const int inner_dim = is_col_major ? 0 : NumDims - 1;
240 const Index inner_dim_size = it[0].size;
242 while (it[NumDims - 1].count < it[NumDims - 1].
size) {
244 for (;
i < inner_dim_size; ++
i) {
245 auto const rolled = rollCoords(coords);
246 auto const index = is_col_major ? m_dimensions.IndexOfColMajor(rolled) : m_dimensions.IndexOfRowMajor(rolled);
247 *(block_buffer + offset +
i) = m_impl.coeff(index);
250 coords[inner_dim] = initial_coords[inner_dim];
252 if (NumDims == 1)
break;
255 for (
i = 1;
i < NumDims; ++
i) {
256 if (++it[
i].count < it[
i].
size) {
257 offset += it[
i].stride;
258 coords[is_col_major ?
i : NumDims - 1 -
i]++;
261 if (
i != NumDims - 1) it[
i].count = 0;
262 coords[is_col_major ?
i : NumDims - 1 -
i] = initial_coords[is_col_major ?
i : NumDims - 1 -
i];
263 offset -= it[
i].span;
267 return block_storage.AsTensorMaterializedBlock();
271 double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
272 TensorOpCost::DivCost<Index>());
273 for (
int i = 0;
i < NumDims; ++
i) {
274 compute_cost += 2 * TensorOpCost::AddCost<Index>();
291 for (
int i = NumDims - 1;
i > 0; --
i) {
292 const Index idx = index / m_fast_strides[
i];
293 index -= idx * m_strides[
i];
298 for (
int i = 0;
i < NumDims - 1; ++
i) {
299 const Index idx = index / m_fast_strides[
i];
300 index -= idx * m_strides[
i];
303 coords[NumDims - 1] = index;
312 template <
typename RollDimensions,
typename ArgType,
typename Device>
314 :
public TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
326 PreferBlockAccess =
false,
344 return this->m_impl.coeffRef(this->rollIndex(index));
347 template <
int StoreMode>
351 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorCostModel.h:28
Definition: TensorRoll.h:50
TensorBase< TensorRollOp< RollDimensions, XprType >, WriteAccessors > Base
Definition: TensorRoll.h:52
Eigen::internal::nested< TensorRollOp >::type Nested
Definition: TensorRoll.h:56
const RollDimensions m_roll_dims
Definition: TensorRoll.h:71
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorRoll.h:54
Eigen::internal::traits< TensorRollOp >::Index Index
Definition: TensorRoll.h:58
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorRollOp(const XprType &expr, const RollDimensions &roll_dims)
Definition: TensorRoll.h:60
Eigen::internal::traits< TensorRollOp >::StorageKind StorageKind
Definition: TensorRoll.h:57
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:55
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorRoll.h:65
EIGEN_DEVICE_FUNC const RollDimensions & roll() const
Definition: TensorRoll.h:63
XprType::Nested m_xpr
Definition: TensorRoll.h:70
Eigen::internal::traits< TensorRollOp >::Scalar Scalar
Definition: TensorRoll.h:53
IndexType offset() const
Definition: TensorBlock.h:270
IndexType dimension(int index) const
Definition: TensorBlock.h:272
Definition: TensorBlock.h:566
Definition: TensorBlock.h:475
Definition: TensorBlock.h:604
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ WriteAccessors
Definition: Constants.h:374
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
DSizes< Index, NumDims > Dimensions
Definition: TensorRoll.h:319
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:333
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index) const
Definition: TensorRoll.h:343
TensorRollOp< RollDimensions, ArgType > XprType
Definition: TensorRoll.h:316
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorRoll.h:334
TensorEvaluator< const TensorRollOp< RollDimensions, ArgType >, Device > Base
Definition: TensorRoll.h:315
XprType::Index Index
Definition: TensorRoll.h:317
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorRoll.h:341
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorRoll.h:348
XprType::Scalar Scalar
Definition: TensorRoll.h:332
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorRoll.h:330
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorRoll.h:338
Definition: TensorRoll.h:76
const Device EIGEN_DEVICE_REF m_device
Definition: TensorRoll.h:287
XprType::Scalar Scalar
Definition: TensorRoll.h:81
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorRoll.h:285
Dimensions m_dimensions
Definition: TensorRoll.h:282
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorRoll.h:270
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorRoll.h:107
array< Index, NumDims > m_strides
Definition: TensorRoll.h:283
TensorRollOp< RollDimensions, ArgType > XprType
Definition: TensorRoll.h:77
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index roll(Index const i, Index const r, Index const n) const
Definition: TensorRoll.h:144
DSizes< Index, NumDims > Dimensions
Definition: TensorRoll.h:80
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorRoll.h:130
RollDimensions m_rolls
Definition: TensorRoll.h:286
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition: TensorRoll.h:279
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorRoll.h:201
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorRoll.h:142
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorRoll.h:85
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array< Index, NumDims > rollCoords(array< Index, NumDims > const &coords) const
Definition: TensorRoll.h:153
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorRoll.h:83
internal::TensorIntDivisor< Index > IndexDivisor
Definition: TensorRoll.h:98
Storage::Type EvaluatorPointerType
Definition: TensorRoll.h:86
array< IndexDivisor, NumDims > m_fast_strides
Definition: TensorRoll.h:284
XprType::Index Index
Definition: TensorRoll.h:78
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_coordinates(Index index, array< Index, NumDims > &coords) const
Definition: TensorRoll.h:289
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorRoll.h:185
typename internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
Definition: TensorRoll.h:104
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorRoll.h:213
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorRoll.h:128
XprType::CoeffReturnType CoeffReturnType
Definition: TensorRoll.h:82
typename TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
Definition: TensorRoll.h:103
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rollIndex(Index index) const
Definition: TensorRoll.h:162
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorRoll.h:190
Index size
Definition: TensorRoll.h:209
Index stride
Definition: TensorRoll.h:207
Index count
Definition: TensorRoll.h:210
Index span
Definition: TensorRoll.h:208
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: TensorBlock.h:75
const TensorRollOp< RollDimensions, XprType > & type
Definition: TensorRoll.h:39
Definition: XprHelper.h:427
TensorRollOp< RollDimensions, XprType > type
Definition: TensorRoll.h:44
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::PointerType PointerType
Definition: TensorRoll.h:34
XprType::Scalar Scalar
Definition: TensorRoll.h:26
XprType::Nested Nested
Definition: TensorRoll.h:30
std::remove_reference_t< Nested > Nested_
Definition: TensorRoll.h:31
traits< XprType > XprTraits
Definition: TensorRoll.h:27
XprTraits::Index Index
Definition: TensorRoll.h:29
XprTraits::StorageKind StorageKind
Definition: TensorRoll.h:28
Definition: ForwardDeclarations.h:21