8 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
9 #define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
19 template <
typename Scalar,
typename IndexType,
int NumDims,
int Layout>
28 template <
int Layout,
typename IndexType,
int NumDims>
31 if (NumDims == 0)
return strides;
35 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
37 for (
int i = 1;
i < NumDims; ++
i) {
42 for (
int i = NumDims - 2;
i >= 0; --
i) {
50 template <
int Layout,
typename IndexType,
size_t NumDims>
55 template <
int Layout, std::ptrdiff_t... Indices>
57 return strides<Layout>(
DSizes<std::ptrdiff_t,
sizeof...(Indices)>(
sizes));
88 template <
typename Scalar>
95 template <
typename Scalar>
97 size_t size_in_bytes) {
112 return withShapeAndSize<Scalar>(
shape_type, size_in_bytes,
118 template <
typename Scalar>
123 template <
typename Scalar>
162 return lhs_cost + rhs_cost;
170 template <
int NumDims,
typename IndexType = Eigen::Index>
218 template <
typename Scalar>
232 template <
typename Scalar>
236 template <
int Layout,
typename Scalar>
241 template <
int Layout>
244 const Dimensions& desc_strides = internal::strides<Layout>(desc_dims);
245 for (
int i = 0;
i < NumDims; ++
i) {
246 if (desc_dims[
i] == 1)
continue;
277 template <
int Layout,
typename Scalar>
280 m_destination = DestinationBuffer::template make<Layout>(*
this, dst_base, dst_strides);
283 template <
int Layout,
typename Scalar,
typename DstStr
idesIndexType>
286 AddDestinationBuffer<Layout>(dst_base,
Dimensions(dst_strides));
313 template <
int NumDims,
int Layout,
typename IndexType = Eigen::Index>
336 static const bool isColMajor = Layout ==
static_cast<int>(
ColMajor);
338 IndexType offset = 0;
344 for (
int i = NumDims - 1;
i >= 0; --
i) {
345 const int dim = isColMajor ?
i : NumDims -
i - 1;
355 return {offset, dimensions};
362 IndexType target_block_size = numext::maxi<IndexType>(1,
static_cast<IndexType
>(
m_requirements.
size));
370 if (tensor_size == 0) {
371 for (
int i = 0;
i < NumDims; ++
i) {
379 if (tensor_size <= target_block_size) {
384 for (
int i = 0;
i < NumDims; ++
i) {
391 static const bool isColMajor = Layout ==
static_cast<int>(
ColMajor);
395 IndexType coeff_to_allocate = target_block_size;
397 for (
int i = 0;
i < NumDims; ++
i) {
398 const int dim = isColMajor ?
i : NumDims -
i - 1;
408 const IndexType dim_size_target = convert_index<IndexType>(
411 for (
int i = 0;
i < NumDims; ++
i) {
421 for (
int i = 0;
i < NumDims; ++
i) {
422 const int dim = isColMajor ?
i : NumDims -
i - 1;
426 const IndexType alloc_avail = numext::div_ceil<IndexType>(target_block_size, total_size_other_dims);
445 for (
int i = 0;
i < NumDims; ++
i) {
474 template <
typename Device>
490 const int num_allocations =
static_cast<int>(
m_allocations.size());
508 if (!has_allocation) {
576 template <
typename XprType>
603 template <
typename Scalar,
int NumDims,
int Layout,
typename IndexType = Eigen::Index>
610 bool valid_expr =
true)
654 bool strided_storage)
670 template <
typename TensorBlockScratch>
672 bool allow_strided_storage =
false) {
683 }
else if (desc.
destination().
kind() == DestinationBuffer::kStrided && allow_strided_storage) {
690 void* mem = scratch.allocate(desc.
size() *
sizeof(
Scalar));
698 template <
typename DataDimensions,
typename TensorBlockScratch>
713 static const bool is_col_major = Layout ==
ColMajor;
716 int num_matching_inner_dims = 0;
717 for (
int i = 0;
i < NumDims; ++
i) {
718 int dim = is_col_major ?
i : NumDims -
i - 1;
719 if (data_dims[dim] != desc.
dimensions()[dim])
break;
720 ++num_matching_inner_dims;
725 bool can_use_direct_access =
true;
726 for (
int i = num_matching_inner_dims + 1;
i < NumDims; ++
i) {
727 int dim = is_col_major ?
i : NumDims -
i - 1;
729 can_use_direct_access =
false;
734 if (can_use_direct_access) {
766 template <
typename UnaryOp,
typename ArgTensorBlock>
795 template <
typename BinaryOp,
typename LhsTensorBlock,
typename RhsTensorBlock>
801 typedef std::conditional_t<
833 template <
typename BlockFactory,
typename ArgTensorBlock>
860 template <
typename BlockFactory,
typename Arg1TensorBlock,
typename Arg2TensorBlock,
typename Arg3TensorBlock>
878 const Arg3TensorBlock& arg3_block,
const BlockFactory& factory)
901 template <
typename Scalar,
typename IndexType>
939 template <
typename Str
idedLinearBufferCopy::Kind kind>
945 template <
typename Str
idedLinearBufferCopy::Kind kind>
948 const IndexType src_offset,
const IndexType src_stride,
950 const Scalar* src = &src_data[src_offset];
951 Scalar* dst = &dst_data[dst_offset];
955 dst[
i * dst_stride] = src[
i * src_stride];
969 for (
int j = 0;
j < 4; ++
j) {
976 pstoreu<Scalar, Packet>(dst +
i,
p);
980 if (
i < vectorized_half_size) {
982 pstoreu<Scalar, HalfPacket>(dst +
i,
p);
986 for (;
i < count; ++
i) {
995 pscatter<Scalar, Packet>(dst +
i * dst_stride,
p, dst_stride);
999 if (
i < vectorized_half_size) {
1001 pscatter<Scalar, HalfPacket>(dst +
i * dst_stride,
p, dst_stride);
1005 for (;
i < count; ++
i) {
1006 dst[
i * dst_stride] = src[
i];
1017 for (
int j = 0;
j < 4; ++
j) {
1022 pstoreu<Scalar, Packet>(dst +
i,
p);
1024 if (HasHalfPacket) {
1026 if (
i < vectorized_half_size) {
1028 pstoreu<Scalar, HalfPacket>(dst +
i, hp);
1032 for (;
i < count; ++
i) {
1042 pscatter<Scalar, Packet>(dst +
i * dst_stride,
p, dst_stride);
1044 if (HasHalfPacket) {
1046 if (
i < vectorized_half_size) {
1048 pscatter<Scalar, HalfPacket>(dst +
i * dst_stride, hp, dst_stride);
1052 for (;
i < count; ++
i) {
1053 dst[
i * dst_stride] =
s;
1060 Packet p = pgather<Scalar, Packet>(src +
i * src_stride, src_stride);
1061 pstoreu<Scalar, Packet>(dst +
i,
p);
1063 if (HasHalfPacket) {
1065 if (
i < vectorized_half_size) {
1066 HalfPacket p = pgather<Scalar, HalfPacket>(src +
i * src_stride, src_stride);
1067 pstoreu<Scalar, HalfPacket>(dst +
i,
p);
1071 for (;
i < count; ++
i) {
1072 dst[
i] = src[
i * src_stride];
1077 for (;
i < count; ++
i) {
1078 dst[
i * dst_stride] = src[
i * src_stride];
1092 template <
typename Scalar,
typename IndexType,
int NumDims,
int Layout>
1138 int inner_dim =
IsColMajor ? 0 : NumDims - 1;
1160 int num_size_one_inner_dims = 0;
1161 for (
int i = 0;
i < num_squeezable_dims; ++
i) {
1163 if (dst.
dims[dst_dim] != 1)
break;
1164 num_size_one_inner_dims++;
1168 if (num_size_one_inner_dims == NumDims) {
1174 const int dst_stride1_dim =
IsColMajor ? num_size_one_inner_dims : NumDims - num_size_one_inner_dims - 1;
1177 const int src_dim_for_dst_stride1_dim = NumDims == 0 ? 1 : dim_map[dst_stride1_dim];
1180 IndexType dst_inner_dim_size = NumDims == 0 ? 1 : dst.
dims[dst_stride1_dim];
1184 for (
int i = num_size_one_inner_dims + 1;
i < num_squeezable_dims; ++
i) {
1186 const IndexType dst_stride = dst.
strides[dst_dim];
1187 const IndexType src_stride = src.
strides[dim_map[dst_dim]];
1188 if (dst_inner_dim_size == dst_stride && dst_stride == src_stride) {
1189 dst_inner_dim_size *= dst.
dims[dst_dim];
1190 ++num_size_one_inner_dims;
1197 IndexType input_offset = src.
offset;
1198 IndexType output_offset = dst.
offset;
1199 IndexType input_stride = NumDims == 0 ? 1 : src.
strides[src_dim_for_dst_stride1_dim];
1200 IndexType output_stride = NumDims == 0 ? 1 : dst.
strides[dst_stride1_dim];
1202 const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
1207 for (
int i = num_size_one_inner_dims;
i < NumDims - 1; ++
i) {
1208 const int dst_dim =
IsColMajor ?
i + 1 : NumDims -
i - 2;
1209 if (dst.
dims[dst_dim] == 1)
continue;
1211 it[idx].size = dst.
dims[dst_dim];
1212 it[idx].input_stride = src.
strides[dim_map[dst_dim]];
1213 it[idx].output_stride = dst.
strides[dst_dim];
1215 it[idx].input_span = it[idx].input_stride * (it[idx].size - 1);
1216 it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
1222 const IndexType block_total_size = NumDims == 0 ? 1 : dst.
dims.
TotalSize();
1224 #define COPY_INNER_DIM(KIND) \
1225 IndexType num_copied = 0; \
1226 for (num_copied = 0; num_copied < block_total_size; num_copied += dst_inner_dim_size) { \
1227 LinCopy::template Run<KIND>(typename LinCopy::Dst(output_offset, output_stride, dst.data), \
1228 typename LinCopy::Src(input_offset, input_stride, src.data), dst_inner_dim_size); \
1230 for (int j = 0; j < idx; ++j) { \
1231 if (++it[j].count < it[j].size) { \
1232 input_offset += it[j].input_stride; \
1233 output_offset += it[j].output_stride; \
1237 input_offset -= it[j].input_span; \
1238 output_offset -= it[j].output_span; \
1243 if (input_stride == 1 && output_stride == 1) {
1245 }
else if (input_stride == 1 && output_stride != 1) {
1247 }
else if (input_stride == 0 && output_stride == 1) {
1249 }
else if (input_stride == 0 && output_stride != 1) {
1251 }
else if (output_stride == 1) {
1257 #undef COPY_INNER_DIM
1264 for (
int i = 0;
i < NumDims; ++
i) dst_to_src_map[
i] =
i;
1265 return Copy(dst, src, dst_to_src_map);
1284 int num_squeezable_dims = 0;
1285 for (
int i = 0;
i < NumDims; ++
i) {
1287 if (dim_map[dim] != dim)
break;
1288 num_squeezable_dims++;
1290 return num_squeezable_dims;
1313 template <
typename Scalar,
int NumDims,
typename TensorBlockExpr,
typename IndexType = Eigen::Index>
1322 template <
bool Vectorizable,
typename Evaluator>
1325 for (IndexType
i = 0;
i < count; ++
i) {
1331 template <
typename Evaluator>
1341 for (
int j = 0;
j < 4; ++
j) {
1342 const IndexType idx = eval_offset +
i +
j *
PacketSize;
1349 Packet p =
eval.template packet<Unaligned>(eval_offset +
i);
1353 for (;
i < count; ++
i) {
1362 IndexType target_offset = 0)
1372 IndexType target_offset = 0) {
1373 return Target(target_dims, target_strides, target_data, target_offset);
1376 template <
typename TargetDimsIndexType,
typename TargetStr
idesIndexType>
1379 IndexType target_offset = 0) {
1393 static const bool is_col_major = Layout ==
ColMajor;
1397 const int inner_dim_idx = is_col_major ? 0 : NumDims - 1;
1398 IndexType output_inner_dim_size =
target.
dims[inner_dim_idx];
1404 IndexType num_squeezed_dims = 0;
1405 for (
Index i = 1;
i < NumDims; ++
i) {
1406 const Index dim = is_col_major ?
i : NumDims -
i - 1;
1409 if (output_inner_dim_size == target_stride) {
1411 num_squeezed_dims++;
1422 for (
Index i = num_squeezed_dims;
i < NumDims - 1; ++
i) {
1423 const Index dim = is_col_major ?
i + 1 : NumDims -
i - 2;
1428 it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
1434 IndexType input_offset = 0;
1438 for (IndexType
i = 0;
i < output_size;
i += output_inner_dim_size) {
1441 target.
data + output_offset, output_inner_dim_size,
eval, input_offset);
1444 input_offset += output_inner_dim_size;
1447 for (
int j = 0;
j < idx; ++
j) {
1448 if (++it[
j].count < it[
j].
size) {
1449 output_offset += it[
j].output_stride;
1453 output_offset -= it[
j].output_span;
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_RESTRICT
Definition: Macros.h:1067
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define COPY_INNER_DIM(KIND)
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54
Definition: TensorExpr.h:162
Definition: TensorExpr.h:97
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:33
Definition: TensorCostModel.h:28
Definition: TensorBlock.h:902
packet_traits< Scalar >::type Packet
Definition: TensorBlock.h:903
Kind
Definition: TensorBlock.h:914
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Dst &dst, const Src &src, const size_t count)
Definition: TensorBlock.h:940
@ HalfPacketSize
Definition: TensorBlock.h:908
@ PacketSize
Definition: TensorBlock.h:907
@ Vectorizable
Definition: TensorBlock.h:906
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const IndexType count, const IndexType dst_offset, const IndexType dst_stride, Scalar *EIGEN_RESTRICT dst_data, const IndexType src_offset, const IndexType src_stride, const Scalar *EIGEN_RESTRICT src_data)
Definition: TensorBlock.h:946
unpacket_traits< Packet >::half HalfPacket
Definition: TensorBlock.h:904
Definition: TensorBlock.h:1314
DSizes< IndexType, NumDims > Dimensions
Definition: TensorBlock.h:1318
@ PacketSize
Definition: TensorBlock.h:1320
@ Vectorizable
Definition: TensorBlock.h:1320
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(const Target &target, const TensorBlockExpr &expr)
Definition: TensorBlock.h:1384
static Target target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
Definition: TensorBlock.h:1371
static Target target(const DSizes< TargetDimsIndexType, NumDims > &target_dims, const DSizes< TargetStridesIndexType, NumDims > &target_strides, Scalar *target_data, IndexType target_offset=0)
Definition: TensorBlock.h:1377
TensorEvaluator< const TensorBlockExpr, DefaultDevice > TensorBlockEvaluator
Definition: TensorBlock.h:1316
Definition: TensorBlock.h:185
static DestinationBuffer make(const TensorBlockDescriptor &desc, Scalar *data, const Dimensions &strides)
Definition: TensorBlock.h:237
static DestinationBufferKind kind(const TensorBlockDescriptor &desc, const Dimensions &strides)
Definition: TensorBlock.h:242
DestinationBufferKind m_kind
Definition: TensorBlock.h:261
DestinationBufferKind
Definition: TensorBlock.h:187
@ kEmpty
Definition: TensorBlock.h:197
@ kStrided
Definition: TensorBlock.h:215
@ kContiguous
Definition: TensorBlock.h:203
void * m_data
Definition: TensorBlock.h:254
DestinationBuffer()
Definition: TensorBlock.h:230
Dimensions m_strides
Definition: TensorBlock.h:259
const DestinationBufferKind & kind() const
Definition: TensorBlock.h:225
const Dimensions & strides() const
Definition: TensorBlock.h:224
Scalar * data() const
Definition: TensorBlock.h:219
size_t m_data_type_size
Definition: TensorBlock.h:255
DestinationBuffer(Scalar *data, const Dimensions &strides, DestinationBufferKind kind)
Definition: TensorBlock.h:233
Definition: TensorBlock.h:171
TensorBlockDescriptor WithOffset(IndexType offset) const
Definition: TensorBlock.h:298
void AddDestinationBuffer(Scalar *dst_base, const DSizes< DstStridesIndexType, NumDims > &dst_strides)
Definition: TensorBlock.h:284
void AddDestinationBuffer(Scalar *dst_base, const Dimensions &dst_strides)
Definition: TensorBlock.h:278
DSizes< IndexType, NumDims > Dimensions
Definition: TensorBlock.h:173
IndexType size() const
Definition: TensorBlock.h:273
const DestinationBuffer & destination() const
Definition: TensorBlock.h:275
IndexType offset() const
Definition: TensorBlock.h:270
bool HasDestinationBuffer() const
Definition: TensorBlock.h:295
TensorBlockDescriptor & DropDestinationBuffer()
Definition: TensorBlock.h:289
IndexType dimension(int index) const
Definition: TensorBlock.h:272
const Dimensions & dimensions() const
Definition: TensorBlock.h:271
const IndexType m_offset
Definition: TensorBlock.h:305
DestinationBuffer m_destination
Definition: TensorBlock.h:307
const Dimensions m_dimensions
Definition: TensorBlock.h:306
TensorBlockDescriptor(const IndexType offset, const Dimensions &dimensions, const DestinationBuffer &destination)
Definition: TensorBlock.h:264
TensorBlockDescriptor(const IndexType offset, const Dimensions &dimensions)
Definition: TensorBlock.h:267
Definition: TensorBlock.h:1093
static int NumSqueezableInnerDims(const DimensionsMap &dim_map)
Definition: TensorBlock.h:1283
static constexpr bool IsColMajor
Definition: TensorBlock.h:1094
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy(const Dst &dst, const Src &src, const DimensionsMap &dst_to_src_dim_map)
Definition: TensorBlock.h:1126
StridedLinearBufferCopy< Scalar, IndexType > LinCopy
Definition: TensorBlock.h:1096
DSizes< IndexType, NumDims > Dimensions
Definition: TensorBlock.h:1099
DSizes< int, NumDims > DimensionsMap
Definition: TensorBlock.h:1100
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexType Copy(const Dst &dst, const Src &src)
Definition: TensorBlock.h:1262
Definition: TensorBlock.h:314
IndexType m_total_block_count
Definition: TensorBlock.h:459
TensorBlockResourceRequirements m_requirements
Definition: TensorBlock.h:456
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes< IndexType, NumDims > & blockDimensions() const
Definition: TensorBlock.h:331
TensorBlockMapper(const DSizes< IndexType, NumDims > &dimensions, const TensorBlockResourceRequirements &requirements)
Definition: TensorBlock.h:321
DSizes< IndexType, NumDims > m_block_dimensions
Definition: TensorBlock.h:458
TensorBlockDescriptor< NumDims, IndexType > BlockDescriptor
Definition: TensorBlock.h:315
DSizes< IndexType, NumDims > m_tensor_strides
Definition: TensorBlock.h:461
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const
Definition: TensorBlock.h:327
DSizes< IndexType, NumDims > m_tensor_dimensions
Definition: TensorBlock.h:455
DSizes< IndexType, NumDims > m_block_strides
Definition: TensorBlock.h:462
void InitializeBlockDimensions()
Definition: TensorBlock.h:359
DSizes< IndexType, NumDims > Dimensions
Definition: TensorBlock.h:318
TensorBlockMapper()=default
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const
Definition: TensorBlock.h:329
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor blockDescriptor(IndexType block_index) const
Definition: TensorBlock.h:335
Definition: TensorBlock.h:566
void XprType
Definition: TensorBlock.h:568
Definition: TensorBlock.h:475
void * allocate(size_t size)
Definition: TensorBlock.h:485
int m_allocation_index
Definition: TensorBlock.h:530
const Device & m_device
Definition: TensorBlock.h:529
~TensorBlockScratchAllocator()
Definition: TensorBlock.h:479
std::vector< Allocation > m_allocations
Definition: TensorBlock.h:532
TensorBlockScratchAllocator(const Device &device)
Definition: TensorBlock.h:477
void reset()
Definition: TensorBlock.h:521
Definition: TensorBlock.h:796
XprScalar< XprType >::type Scalar
Definition: TensorBlock.h:806
TensorCwiseBinaryBlock(const LhsTensorBlock &left_block, const RhsTensorBlock &right_block, const BinaryOp &functor)
Definition: TensorBlock.h:808
TensorBlockKind kind() const
Definition: TensorBlock.h:811
static constexpr bool NoArgBlockAccess
Definition: TensorBlock.h:797
XprType expr() const
Definition: TensorBlock.h:813
RhsTensorBlock m_right_block
Definition: TensorBlock.h:824
std::conditional_t< NoArgBlockAccess, void, TensorCwiseBinaryOp< BinaryOp, const typename LhsTensorBlock::XprType, const typename RhsTensorBlock::XprType > > XprType
Definition: TensorBlock.h:804
void cleanup()
Definition: TensorBlock.h:817
const Scalar * data() const
Definition: TensorBlock.h:815
LhsTensorBlock m_left_block
Definition: TensorBlock.h:823
BinaryOp m_functor
Definition: TensorBlock.h:825
Definition: TensorBlock.h:767
ArgTensorBlock m_arg_block
Definition: TensorBlock.h:787
static constexpr bool NoArgBlockAccess
Definition: TensorBlock.h:768
std::conditional_t< NoArgBlockAccess, void, TensorCwiseUnaryOp< UnaryOp, const typename ArgTensorBlock::XprType > > XprType
Definition: TensorBlock.h:773
XprScalar< XprType >::type Scalar
Definition: TensorBlock.h:775
TensorCwiseUnaryBlock(const ArgTensorBlock &arg_block, const UnaryOp &functor)
Definition: TensorBlock.h:777
UnaryOp m_functor
Definition: TensorBlock.h:788
void cleanup()
Definition: TensorBlock.h:784
const Scalar * data() const
Definition: TensorBlock.h:783
TensorBlockKind kind() const
Definition: TensorBlock.h:780
XprType expr() const
Definition: TensorBlock.h:782
Definition: TensorBlock.h:638
bool m_materialized_in_output
Definition: TensorBlock.h:664
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:644
bool m_strided_storage
Definition: TensorBlock.h:665
Dimensions m_strides
Definition: TensorBlock.h:663
Scalar * m_data
Definition: TensorBlock.h:661
Scalar * data() const
Definition: TensorBlock.h:640
Dimensions m_dimensions
Definition: TensorBlock.h:662
Storage(Scalar *data, const Dimensions &dimensions, const Dimensions &strides, bool materialized_in_output, bool strided_storage)
Definition: TensorBlock.h:653
const Dimensions & strides() const
Definition: TensorBlock.h:642
const Dimensions & dimensions() const
Definition: TensorBlock.h:641
Definition: TensorBlock.h:604
TensorMaterializedBlock(TensorBlockKind kind, const Scalar *data, const Dimensions &dimensions, bool valid_expr=true)
Definition: TensorBlock.h:609
static EIGEN_STRONG_INLINE TensorMaterializedBlock materialize(const Scalar *data, const DataDimensions &data_dims, TensorBlockDesc &desc, TensorBlockScratch &scratch)
Definition: TensorBlock.h:699
const Scalar * data() const
Definition: TensorBlock.h:625
const XprType & expr() const
Definition: TensorBlock.h:621
TensorBlockKind kind() const
Definition: TensorBlock.h:617
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
bool m_valid_expr
Definition: TensorBlock.h:759
XprType m_expr
Definition: TensorBlock.h:758
TensorBlockKind m_kind
Definition: TensorBlock.h:755
const Scalar * m_data
Definition: TensorBlock.h:756
Dimensions m_dimensions
Definition: TensorBlock.h:757
internal::TensorBlockDescriptor< NumDims, IndexType > TensorBlockDesc
Definition: TensorBlock.h:628
DSizes< IndexType, NumDims > Dimensions
Definition: TensorBlock.h:606
TensorMap< const Tensor< Scalar, NumDims, Layout > > XprType
Definition: TensorBlock.h:607
void cleanup()
Definition: TensorBlock.h:626
Definition: TensorBlock.h:861
XprType expr() const
Definition: TensorBlock.h:882
TensorBlockKind kind() const
Definition: TensorBlock.h:881
Arg1TensorBlock::XprType Arg1XprType
Definition: TensorBlock.h:862
TensorTernaryExprBlock(const Arg1TensorBlock &arg1_block, const Arg2TensorBlock &arg2_block, const Arg3TensorBlock &arg3_block, const BlockFactory &factory)
Definition: TensorBlock.h:877
std::conditional_t< NoArgBlockAccess, void, typename BlockFactory::template XprType< Arg1XprType, Arg2XprType, Arg3XprType >::type > XprType
Definition: TensorBlock.h:873
const Scalar * data() const
Definition: TensorBlock.h:883
Arg3TensorBlock::XprType Arg3XprType
Definition: TensorBlock.h:864
BlockFactory m_factory
Definition: TensorBlock.h:894
Arg3TensorBlock m_arg3_block
Definition: TensorBlock.h:893
static constexpr bool NoArgBlockAccess
Definition: TensorBlock.h:866
Arg1TensorBlock m_arg1_block
Definition: TensorBlock.h:891
Arg2TensorBlock m_arg2_block
Definition: TensorBlock.h:892
XprScalar< XprType >::type Scalar
Definition: TensorBlock.h:875
void cleanup()
Definition: TensorBlock.h:884
Arg2TensorBlock::XprType Arg2XprType
Definition: TensorBlock.h:863
Definition: TensorBlock.h:834
TensorUnaryExprBlock(const ArgTensorBlock &arg_block, const BlockFactory &factory)
Definition: TensorBlock.h:843
XprType expr() const
Definition: TensorBlock.h:847
XprScalar< XprType >::type Scalar
Definition: TensorBlock.h:841
void cleanup()
Definition: TensorBlock.h:849
ArgTensorBlock::XprType ArgXprType
Definition: TensorBlock.h:835
static constexpr bool NoArgBlockAccess
Definition: TensorBlock.h:836
const Scalar * data() const
Definition: TensorBlock.h:848
std::conditional_t< NoArgBlockAccess, void, typename BlockFactory::template XprType< ArgXprType >::type > XprType
Definition: TensorBlock.h:839
BlockFactory m_factory
Definition: TensorBlock.h:853
ArgTensorBlock m_arg_block
Definition: TensorBlock.h:852
TensorBlockKind kind() const
Definition: TensorBlock.h:846
std::vector< Array2i > sizes
Definition: dense_solvers.cpp:12
@ ColMajor
Definition: Constants.h:318
RealScalar s
Definition: level1_cplx_impl.h:130
@ Target
Definition: Constants.h:495
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:625
TensorBlockKind
Definition: TensorBlock.h:538
@ kMaterializedInOutput
Definition: TensorBlock.h:559
@ kMaterializedInScratch
Definition: TensorBlock.h:550
@ kView
Definition: TensorBlock.h:545
@ kExpr
Definition: TensorBlock.h:541
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE auto array_prod(const array< T, N > &arr) -> decltype(array_reduce< product_op, T, N >(arr, static_cast< T >(1)))
Definition: MoreMeta.h:497
EIGEN_ALWAYS_INLINE DSizes< IndexType, NumDims > strides(const DSizes< IndexType, NumDims > &dimensions)
Definition: TensorBlock.h:29
TensorBlockShapeType
Definition: TensorBlock.h:73
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition: MathFunctions.h:926
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_CONSTEXPR T div_ceil(T a, T b)
Definition: MathFunctions.h:1251
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition: MathFunctions.h:920
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool dimensions_match(Dims1 dims1, Dims2 dims2)
Definition: TensorDimensions.h:322
std::array< T, N > array
Definition: EmulateArray.h:231
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
type
Definition: compute_granudrum_aor.py:141
Definition: Eigen_Colamd.h:49
CwiseBinaryOp< internal::scalar_sum_op< double, double >, const CpyMatrixXd, const CpyMatrixXd > XprType
Definition: nestbyvalue.cpp:15
internal::nested_eval< T, 1 >::type eval(const T &xpr)
Definition: sparse_permutations.cpp:47
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
Definition: TensorDimensions.h:167
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const
Definition: TensorDimensions.h:165
Definition: TensorDeviceDefault.h:19
Definition: TensorDimensions.h:85
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
Definition: TensorBlock.h:923
Dst(IndexType o, IndexType s, Scalar *d)
Definition: TensorBlock.h:924
IndexType offset
Definition: TensorBlock.h:926
Scalar * data
Definition: TensorBlock.h:928
IndexType stride
Definition: TensorBlock.h:927
Definition: TensorBlock.h:931
const Scalar * data
Definition: TensorBlock.h:936
Src(IndexType o, IndexType s, const Scalar *d)
Definition: TensorBlock.h:932
IndexType offset
Definition: TensorBlock.h:934
IndexType stride
Definition: TensorBlock.h:935
Definition: TensorBlock.h:1459
IndexType output_span
Definition: TensorBlock.h:1465
IndexType size
Definition: TensorBlock.h:1463
IndexType output_stride
Definition: TensorBlock.h:1464
IndexType count
Definition: TensorBlock.h:1462
BlockIteratorState()
Definition: TensorBlock.h:1460
static EIGEN_ALWAYS_INLINE void Run(Scalar *target, IndexType count, const Evaluator &eval, IndexType eval_offset)
Definition: TensorBlock.h:1333
Definition: TensorBlock.h:1323
static EIGEN_ALWAYS_INLINE void Run(Scalar *target, IndexType count, const Evaluator &eval, IndexType eval_offset)
Definition: TensorBlock.h:1324
Definition: TensorBlock.h:1360
Dimensions strides
Definition: TensorBlock.h:1366
Dimensions dims
Definition: TensorBlock.h:1365
Scalar * data
Definition: TensorBlock.h:1367
Target(const Dimensions &target_dims, const Dimensions &target_strides, Scalar *target_data, IndexType target_offset=0)
Definition: TensorBlock.h:1361
IndexType offset
Definition: TensorBlock.h:1368
Definition: TensorBlock.h:1269
IndexType input_stride
Definition: TensorBlock.h:1274
IndexType count
Definition: TensorBlock.h:1273
IndexType size
Definition: TensorBlock.h:1272
IndexType output_span
Definition: TensorBlock.h:1277
BlockIteratorState()
Definition: TensorBlock.h:1270
IndexType output_stride
Definition: TensorBlock.h:1275
IndexType input_span
Definition: TensorBlock.h:1276
Definition: TensorBlock.h:1102
IndexType offset
Definition: TensorBlock.h:1109
Dimensions strides
Definition: TensorBlock.h:1107
Dimensions dims
Definition: TensorBlock.h:1106
Scalar * data
Definition: TensorBlock.h:1108
Dst(const Dimensions &dst_dims, const Dimensions &dst_strides, Scalar *dst, IndexType dst_offset=0)
Definition: TensorBlock.h:1103
Definition: TensorBlock.h:1112
const Scalar * data
Definition: TensorBlock.h:1117
Dimensions strides
Definition: TensorBlock.h:1116
IndexType offset
Definition: TensorBlock.h:1118
Src(const Dimensions &src_strides, const Scalar *src, IndexType src_offset=0)
Definition: TensorBlock.h:1113
Definition: TensorBlock.h:75
EIGEN_DEVICE_FUNC TensorBlockResourceRequirements & addCostPerCoeff(TensorOpCost cost)
Definition: TensorBlock.h:135
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements withShapeAndSize(TensorBlockShapeType shape_type, size_t size_in_bytes)
Definition: TensorBlock.h:96
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements withShapeAndSize(TensorBlockShapeType shape_type, size_t size_in_bytes, TensorOpCost cost)
Definition: TensorBlock.h:89
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements skewed(size_t size_in_bytes)
Definition: TensorBlock.h:119
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs)
Definition: TensorBlock.h:154
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size)
Definition: TensorBlock.h:150
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements any()
Definition: TensorBlock.h:143
size_t size
Definition: TensorBlock.h:77
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:129
static EIGEN_DEVICE_FUNC TensorBlockResourceRequirements uniform(size_t size_in_bytes)
Definition: TensorBlock.h:124
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost, TensorOpCost rhs_cost)
Definition: TensorBlock.h:161
TensorOpCost cost_per_coeff
Definition: TensorBlock.h:78
TensorBlockShapeType shape_type
Definition: TensorBlock.h:76
Definition: TensorBlock.h:524
size_t size
Definition: TensorBlock.h:526
void * ptr
Definition: TensorBlock.h:525
void type
Definition: TensorBlock.h:582
Definition: TensorBlock.h:577
XprType::Scalar type
Definition: TensorBlock.h:578
Definition: XprHelper.h:427
Definition: GenericPacketMath.h:108
Definition: GenericPacketMath.h:134
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Definition: ZVector/PacketMath.h:50