10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
27 template <DenseIndex DimId,
typename XprType>
33 typedef typename XprType::Nested
Nested;
34 typedef std::remove_reference_t<Nested>
Nested_;
35 static constexpr
int NumDimensions = XprTraits::NumDimensions - 1;
36 static constexpr
int Layout = XprTraits::Layout;
40 template <DenseIndex DimId,
typename XprType>
45 template <DenseIndex DimId,
typename XprType>
50 template <DenseIndex DimId>
69 template <DenseIndex DimId,
typename XprType>
82 eigen_assert(dim < XprType::NumDimensions && dim >= 0 &&
"Chip_Dim_out_of_range");
99 template <DenseIndex DimId,
typename ArgType,
typename Device>
102 static constexpr
int NumInputDims =
104 static constexpr
int NumDims = NumInputDims - 1;
146 : m_impl(
op.expression(), device), m_dim(
op.dim()),
m_device(device) {
154 for (
int i = 0;
i < NumInputDims; ++
i) {
155 if (
i != m_dim.actualDim()) {
156 m_dimensions[
j] = input_dims[
i];
164 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
165 m_stride *= input_dims[
i];
166 m_inputStride *= input_dims[
i];
169 for (
int i = NumInputDims - 1;
i > m_dim.actualDim(); --
i) {
170 m_stride *= input_dims[
i];
171 m_inputStride *= input_dims[
i];
174 m_inputStride *= input_dims[m_dim.actualDim()];
175 m_inputOffset = m_stride *
op.offset();
179 Index after_chipped_dim_product = 1;
180 for (
int i =
static_cast<int>(m_dim.actualDim()) + 1;
i < NumInputDims; ++
i) {
181 after_chipped_dim_product *= input_dims[
i];
184 Index before_chipped_dim_product = 1;
185 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
186 before_chipped_dim_product *= input_dims[
i];
190 m_isEffectivelyInnerChipping = before_chipped_dim_product == 1;
191 m_isEffectivelyOuterChipping = after_chipped_dim_product == 1;
193 m_isEffectivelyInnerChipping = after_chipped_dim_product == 1;
194 m_isEffectivelyOuterChipping = before_chipped_dim_product == 1;
201 m_impl.evalSubExprsIfNeeded(NULL);
205 #ifdef EIGEN_USE_THREADS
206 template <
typename EvalSubExprsCallback>
208 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
215 return m_impl.coeff(srcCoeff(index));
218 template <
int LoadMode>
222 if (isInnerChipping()) {
225 Index inputIndex = index * m_inputStride + m_inputOffset;
229 values[
i] = m_impl.coeff(inputIndex);
230 inputIndex += m_inputStride;
234 }
else if (isOuterChipping()) {
237 return m_impl.template packet<LoadMode>(index + m_inputOffset);
239 const Index idx = index / m_stride;
240 const Index rem = index - idx * m_stride;
242 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
243 return m_impl.template packet<LoadMode>(inputIndex);
260 if ((
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) && m_dim.actualDim() == 0) ||
261 (
static_cast<int>(
Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == NumInputDims - 1)) {
262 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
263 }
else if ((
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
264 (
static_cast<int>(
Layout) ==
static_cast<int>(
RowMajor) && m_dim.actualDim() == 0)) {
265 cost += TensorOpCost::AddCost<Index>();
267 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() + 3 * TensorOpCost::AddCost<Index>();
274 const size_t target_size =
m_device.lastLevelCacheSize();
276 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), m_impl.getResourceRequirements());
280 bool root_of_expr_ast =
false)
const {
281 const Index chip_dim = m_dim.actualDim();
284 for (
int i = 0;
i < NumInputDims; ++
i) {
293 for (
int i = 0;
i < NumInputDims; ++
i) {
294 arg_destination_strides[
i] =
i < chip_dim ? desc.
destination().strides()[
i]
299 arg_desc.template AddDestinationBuffer<Layout>(desc.
destination().template data<ScalarNoConst>(),
300 arg_destination_strides);
303 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
306 if (arg_block.
data() != NULL) {
317 TensorBlockAssignment;
319 TensorBlockAssignment::Run(
320 TensorBlockAssignment::target(arg_desc.
dimensions(), internal::strides<Layout>(arg_desc.
dimensions()),
321 block_storage.
data()),
330 if (isOuterChipping() && result) {
331 return result + m_inputOffset;
340 if (isInnerChipping()) {
343 inputIndex = index * m_inputStride + m_inputOffset;
344 }
else if (isOuterChipping()) {
348 inputIndex = index + m_inputOffset;
350 const Index idx = index / m_stride;
351 inputIndex = idx * m_inputStride + m_inputOffset;
352 index -= idx * m_stride;
359 return IsInnerChipping || m_isEffectivelyInnerChipping;
363 return IsOuterChipping || m_isEffectivelyOuterChipping;
381 template <DenseIndex DimId,
typename ArgType,
typename Device>
383 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> {
386 static constexpr
int NumInputDims =
388 static constexpr
int NumDims = NumInputDims - 1;
411 return this->m_impl.coeffRef(this->srcCoeff(index));
414 template <
int StoreMode>
416 if (this->isInnerChipping()) {
420 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
421 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
424 this->m_impl.coeffRef(inputIndex) = values[
i];
425 inputIndex += this->m_inputStride;
427 }
else if (this->isOuterChipping()) {
430 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset,
x);
432 const Index idx = index / this->m_stride;
433 const Index rem = index - idx * this->m_stride;
434 if (rem + PacketSize <= this->m_stride) {
435 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
436 this->m_impl.template writePacket<StoreMode>(inputIndex,
x);
440 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
450 template <
typename TensorBlock>
454 const Index chip_dim = this->m_dim.actualDim();
457 for (
int i = 0;
i < NumInputDims; ++
i) {
465 TensorBlockAssign::Run(
466 TensorBlockAssign::target(input_block_dims, internal::strides<Layout>(this->m_impl.dimensions()),
467 this->m_impl.data(), this->srcCoeff(desc.
offset())),
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Macro to manually inherit assignment operators. This is necessary, because the implicitly defined ass...
Definition: TensorMacros.h:81
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
SCALAR Scalar
Definition: bench_gemm.cpp:45
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The tensor base class.
Definition: TensorBase.h:1026
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReshapingOp< const NewDimensions, const Derived > reshape(const NewDimensions &newDimensions) const
Definition: TensorBase.h:1106
Definition: TensorChipping.h:70
Eigen::internal::traits< TensorChippingOp >::Index Index
Definition: TensorChipping.h:78
TensorBase< TensorChippingOp< DimId, XprType > > Base
Definition: TensorChipping.h:72
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType &expr, const Index offset, const Index dim)
Definition: TensorChipping.h:80
Eigen::internal::traits< TensorChippingOp >::StorageKind StorageKind
Definition: TensorChipping.h:77
Eigen::internal::nested< TensorChippingOp >::type Nested
Definition: TensorChipping.h:76
XprType::Nested m_xpr
Definition: TensorChipping.h:93
Eigen::internal::traits< TensorChippingOp >::Scalar Scalar
Definition: TensorChipping.h:73
const internal::DimensionId< DimId > m_dim
Definition: TensorChipping.h:95
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorChipping.h:74
const Index m_offset
Definition: TensorChipping.h:94
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorChipping.h:88
EIGEN_DEVICE_FUNC const Index offset() const
Definition: TensorChipping.h:85
EIGEN_DEVICE_FUNC const Index dim() const
Definition: TensorChipping.h:86
XprType::CoeffReturnType CoeffReturnType
Definition: TensorChipping.h:75
Definition: TensorCostModel.h:28
Definition: TensorMorphing.h:53
Definition: TensorBlock.h:1314
const DestinationBuffer & destination() const
Definition: TensorBlock.h:275
IndexType offset() const
Definition: TensorBlock.h:270
bool HasDestinationBuffer() const
Definition: TensorBlock.h:295
TensorBlockDescriptor & DropDestinationBuffer()
Definition: TensorBlock.h:289
IndexType dimension(int index) const
Definition: TensorBlock.h:272
const Dimensions & dimensions() const
Definition: TensorBlock.h:271
Definition: TensorBlock.h:475
Definition: TensorBlock.h:638
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:644
Scalar * data() const
Definition: TensorBlock.h:640
Definition: TensorBlock.h:604
const Scalar * data() const
Definition: TensorBlock.h:625
const XprType & expr() const
Definition: TensorBlock.h:621
TensorBlockKind kind() const
Definition: TensorBlock.h:617
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ ColMajor
Definition: Constants.h:318
@ RowMajor
Definition: Constants.h:320
char char * op
Definition: level2_impl.h:374
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T * constCast(const T *data)
Definition: TensorForwardDeclarations.h:31
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
Definition: Meta.h:75
const int Dynamic
Definition: Constants.h:25
Definition: Eigen_Colamd.h:49
list x
Definition: plotDoE.py:28
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorForwardDeclarations.h:42
DSizes< Index, NumDims > Dimensions
Definition: TensorChipping.h:390
TensorEvaluator< const TensorChippingOp< DimId, ArgType >, Device > Base
Definition: TensorChipping.h:384
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorChipping.h:393
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
Definition: TensorChipping.h:451
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorChipping.h:405
XprType::CoeffReturnType CoeffReturnType
Definition: TensorChipping.h:392
TensorChippingOp< DimId, ArgType > XprType
Definition: TensorChipping.h:385
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorChipping.h:408
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x) const
Definition: TensorChipping.h:415
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorChipping.h:410
XprType::Index Index
Definition: TensorChipping.h:389
XprType::Scalar Scalar
Definition: TensorChipping.h:391
Definition: TensorChipping.h:100
Storage::Type EvaluatorPointerType
Definition: TensorChipping.h:112
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition: TensorChipping.h:219
const Device EIGEN_DEVICE_REF m_device
Definition: TensorChipping.h:372
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition: TensorChipping.h:338
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorChipping.h:214
Index m_inputStride
Definition: TensorChipping.h:369
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const
Definition: TensorChipping.h:362
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorChipping.h:370
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorChipping.h:198
XprType::Scalar Scalar
Definition: TensorChipping.h:107
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
Definition: TensorChipping.h:140
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const
Definition: TensorChipping.h:358
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorChipping.h:136
TensorChippingOp< DimId, ArgType > XprType
Definition: TensorChipping.h:101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorChipping.h:273
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorChipping.h:111
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorChipping.h:258
Dimensions m_dimensions
Definition: TensorChipping.h:366
XprType::Index Index
Definition: TensorChipping.h:105
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorChipping.h:200
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorChipping.h:137
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition: TensorChipping.h:142
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorChipping.h:145
internal::TensorBlockDescriptor< NumInputDims, Index > ArgTensorBlockDesc
Definition: TensorChipping.h:139
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorChipping.h:212
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
Definition: TensorChipping.h:279
bool m_isEffectivelyOuterChipping
Definition: TensorChipping.h:377
XprType::CoeffReturnType CoeffReturnType
Definition: TensorChipping.h:108
DSizes< Index, NumDims > Dimensions
Definition: TensorChipping.h:106
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
Definition: TensorChipping.h:328
bool m_isEffectivelyInnerChipping
Definition: TensorChipping.h:376
const internal::DimensionId< DimId > m_dim
Definition: TensorChipping.h:371
Index m_stride
Definition: TensorChipping.h:367
std::remove_const_t< Scalar > ScalarNoConst
Definition: TensorChipping.h:133
Index m_inputOffset
Definition: TensorChipping.h:368
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorChipping.h:109
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index) const
Definition: TensorEvaluator.h:94
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorEvaluator.h:89
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition: TensorEvaluator.h:63
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorEvaluator.h:147
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
Definition: TensorChipping.h:61
const DenseIndex actual_dim
Definition: TensorChipping.h:64
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Definition: TensorChipping.h:60
Definition: TensorChipping.h:51
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Definition: TensorChipping.h:52
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
Definition: TensorChipping.h:56
Definition: TensorBlock.h:75
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:129
const TensorChippingOp< DimId, XprType > EIGEN_DEVICE_REF type
Definition: TensorChipping.h:42
Definition: XprHelper.h:427
TensorChippingOp< DimId, XprType > type
Definition: TensorChipping.h:47
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::StorageKind StorageKind
Definition: TensorChipping.h:31
XprType::Nested Nested
Definition: TensorChipping.h:33
XprTraits::PointerType PointerType
Definition: TensorChipping.h:37
traits< XprType > XprTraits
Definition: TensorChipping.h:30
XprTraits::Index Index
Definition: TensorChipping.h:32
XprType::Scalar Scalar
Definition: TensorChipping.h:29
std::remove_reference_t< Nested > Nested_
Definition: TensorChipping.h:34
Definition: ForwardDeclarations.h:21
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2