10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
26 template <
typename Str
ides,
typename XprType>
32 typedef typename XprType::Nested
Nested;
33 typedef std::remove_reference_t<Nested>
Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
39 template <
typename Str
ides,
typename XprType>
44 template <
typename Str
ides,
typename XprType>
51 template <
typename Str
ides,
typename XprType>
77 template <
typename Str
ides,
typename ArgType,
typename Device>
105 m_dimensions = m_impl.dimensions();
106 for (
int i = 0;
i < NumDims; ++
i) {
112 m_outputStrides[0] = 1;
113 m_inputStrides[0] = 1;
114 for (
int i = 1;
i < NumDims; ++
i) {
115 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
116 m_inputStrides[
i] = m_inputStrides[
i - 1] * input_dims[
i - 1];
117 m_inputStrides[
i - 1] *=
op.strides()[
i - 1];
119 m_inputStrides[NumDims - 1] *=
op.strides()[NumDims - 1];
121 m_outputStrides[NumDims - 1] = 1;
122 m_inputStrides[NumDims - 1] = 1;
123 for (
int i = NumDims - 2;
i >= 0; --
i) {
124 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
125 m_inputStrides[
i] = m_inputStrides[
i + 1] * input_dims[
i + 1];
126 m_inputStrides[
i + 1] *=
op.strides()[
i + 1];
128 m_inputStrides[0] *=
op.strides()[0];
135 m_impl.evalSubExprsIfNeeded(NULL);
141 return m_impl.coeff(srcCoeff(index));
144 template <
int LoadMode>
149 Index inputIndices[] = {0, 0};
153 for (
int i = NumDims - 1;
i > 0; --
i) {
154 const Index idx0 = indices[0] / m_outputStrides[
i];
155 const Index idx1 = indices[1] / m_outputStrides[
i];
156 inputIndices[0] += idx0 * m_inputStrides[
i];
157 inputIndices[1] += idx1 * m_inputStrides[
i];
158 indices[0] -= idx0 * m_outputStrides[
i];
159 indices[1] -= idx1 * m_outputStrides[
i];
161 inputIndices[0] += indices[0] * m_inputStrides[0];
162 inputIndices[1] += indices[1] * m_inputStrides[0];
165 for (
int i = 0;
i < NumDims - 1; ++
i) {
166 const Index idx0 = indices[0] / m_outputStrides[
i];
167 const Index idx1 = indices[1] / m_outputStrides[
i];
168 inputIndices[0] += idx0 * m_inputStrides[
i];
169 inputIndices[1] += idx1 * m_inputStrides[
i];
170 indices[0] -= idx0 * m_outputStrides[
i];
171 indices[1] -= idx1 * m_outputStrides[
i];
173 inputIndices[0] += indices[0] * m_inputStrides[NumDims - 1];
174 inputIndices[1] += indices[1] * m_inputStrides[NumDims - 1];
176 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
181 values[0] = m_impl.coeff(inputIndices[0]);
182 values[
PacketSize - 1] = m_impl.coeff(inputIndices[1]);
193 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() + TensorOpCost::MulCost<Index>() +
194 TensorOpCost::DivCost<Index>()) +
195 TensorOpCost::MulCost<Index>();
199 const int innerDim = (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? 0 : (NumDims - 1);
200 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
209 Index inputIndex = 0;
212 for (
int i = NumDims - 1;
i > 0; --
i) {
213 const Index idx = index / m_outputStrides[
i];
214 inputIndex += idx * m_inputStrides[
i];
215 index -= idx * m_outputStrides[
i];
217 inputIndex += index * m_inputStrides[0];
220 for (
int i = 0;
i < NumDims - 1; ++
i) {
221 const Index idx = index / m_outputStrides[
i];
222 inputIndex += idx * m_inputStrides[
i];
223 index -= idx * m_outputStrides[
i];
225 inputIndex += index * m_inputStrides[NumDims - 1];
237 template <
typename Str
ides,
typename ArgType,
typename Device>
239 :
public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> {
250 PreferBlockAccess =
false,
264 return this->m_impl.coeffRef(this->srcCoeff(index));
267 template <
int StoreMode>
272 Index inputIndices[] = {0, 0};
276 for (
int i = NumDims - 1;
i > 0; --
i) {
277 const Index idx0 = indices[0] / this->m_outputStrides[
i];
278 const Index idx1 = indices[1] / this->m_outputStrides[
i];
279 inputIndices[0] += idx0 * this->m_inputStrides[
i];
280 inputIndices[1] += idx1 * this->m_inputStrides[
i];
281 indices[0] -= idx0 * this->m_outputStrides[
i];
282 indices[1] -= idx1 * this->m_outputStrides[
i];
284 inputIndices[0] += indices[0] * this->m_inputStrides[0];
285 inputIndices[1] += indices[1] * this->m_inputStrides[0];
288 for (
int i = 0;
i < NumDims - 1; ++
i) {
289 const Index idx0 = indices[0] / this->m_outputStrides[
i];
290 const Index idx1 = indices[1] / this->m_outputStrides[
i];
291 inputIndices[0] += idx0 * this->m_inputStrides[
i];
292 inputIndices[1] += idx1 * this->m_inputStrides[
i];
293 indices[0] -= idx0 * this->m_outputStrides[
i];
294 indices[1] -= idx1 * this->m_outputStrides[
i];
296 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims - 1];
297 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims - 1];
299 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
300 this->m_impl.template writePacket<Unaligned>(inputIndices[0],
x);
303 internal::pstore<Scalar, PacketReturnType>(values,
x);
304 this->m_impl.coeffRef(inputIndices[0]) = values[0];
305 this->m_impl.coeffRef(inputIndices[1]) = values[
PacketSize - 1];
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorCostModel.h:28
Definition: TensorStriding.h:52
XprType::CoeffReturnType CoeffReturnType
Definition: TensorStriding.h:57
TensorBase< TensorStridingOp< Strides, XprType > > Base
Definition: TensorStriding.h:54
Eigen::internal::traits< TensorStridingOp >::Index Index
Definition: TensorStriding.h:60
Eigen::internal::traits< TensorStridingOp >::StorageKind StorageKind
Definition: TensorStriding.h:59
XprType::Nested m_xpr
Definition: TensorStriding.h:72
EIGEN_DEVICE_FUNC const Strides & strides() const
Definition: TensorStriding.h:65
Eigen::internal::traits< TensorStridingOp >::Scalar Scalar
Definition: TensorStriding.h:55
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType &expr, const Strides &dims)
Definition: TensorStriding.h:62
const Strides m_dims
Definition: TensorStriding.h:73
Eigen::internal::nested< TensorStridingOp >::type Nested
Definition: TensorStriding.h:58
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorStriding.h:67
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorStriding.h:56
Definition: TensorBlock.h:566
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar() ceil(const Scalar &x)
Definition: MathFunctions.h:1205
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
XprType::CoeffReturnType CoeffReturnType
Definition: TensorStriding.h:259
XprType::Index Index
Definition: TensorStriding.h:257
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index index) const
Definition: TensorStriding.h:263
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorStriding.h:255
TensorStridingOp< Strides, ArgType > XprType
Definition: TensorStriding.h:240
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorStriding.h:260
XprType::Scalar Scalar
Definition: TensorStriding.h:258
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorStriding.h:268
TensorEvaluator< const XprType, Device > Base
Definition: TensorStriding.h:241
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorStriding.h:104
array< Index, NumDims > m_inputStrides
Definition: TensorStriding.h:232
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorStriding.h:87
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorStriding.h:138
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorStriding.h:233
TensorStridingOp< Strides, ArgType > XprType
Definition: TensorStriding.h:79
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorStriding.h:145
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorStriding.h:140
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorStriding.h:132
XprType::Scalar Scalar
Definition: TensorStriding.h:83
XprType::CoeffReturnType CoeffReturnType
Definition: TensorStriding.h:84
Storage::Type EvaluatorPointerType
Definition: TensorStriding.h:88
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorStriding.h:192
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition: TensorStriding.h:208
Dimensions m_dimensions
Definition: TensorStriding.h:230
XprType::Index Index
Definition: TensorStriding.h:80
DSizes< Index, NumDims > Dimensions
Definition: TensorStriding.h:82
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition: TensorStriding.h:205
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorStriding.h:134
internal::TensorBlockNotImplemented TensorBlock
Definition: TensorStriding.h:101
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorStriding.h:85
array< Index, NumDims > m_outputStrides
Definition: TensorStriding.h:231
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
const TensorStridingOp< Strides, XprType > EIGEN_DEVICE_REF type
Definition: TensorStriding.h:41
Definition: XprHelper.h:427
TensorStridingOp< Strides, XprType > type
Definition: TensorStriding.h:46
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::PointerType PointerType
Definition: TensorStriding.h:36
traits< XprType > XprTraits
Definition: TensorStriding.h:29
XprTraits::Index Index
Definition: TensorStriding.h:31
std::remove_reference_t< Nested > Nested_
Definition: TensorStriding.h:33
XprType::Scalar Scalar
Definition: TensorStriding.h:28
XprType::Nested Nested
Definition: TensorStriding.h:32
XprTraits::StorageKind StorageKind
Definition: TensorStriding.h:30
Definition: ForwardDeclarations.h:21