10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
26 template <
typename Broadcast,
typename XprType>
32 typedef typename XprType::Nested
Nested;
33 typedef std::remove_reference_t<Nested>
Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
39 template <
typename Broadcast,
typename XprType>
44 template <
typename Broadcast,
typename XprType>
50 template <
typename Dims>
52 static const bool value =
false;
58 template <
typename std::ptrdiff_t... Indices>
60 static constexpr
bool value = (
Sizes<Indices...>::total_size == 1);
65 template <
typename Broadcast,
typename XprType>
88 template <
typename Broadcast,
typename ArgType,
typename Device>
112 PreferBlockAccess =
true,
137 m_broadcast(
op.broadcast()),
138 m_impl(
op.expression(), device) {
145 for (
int i = 0;
i < NumDims; ++
i) {
147 m_dimensions[
i] = input_dims[
i] * m_broadcast[
i];
148 if (m_broadcast[
i] != 1) {
154 m_inputStrides[0] = 1;
155 m_outputStrides[0] = 1;
156 for (
int i = 1;
i < NumDims; ++
i) {
157 m_inputStrides[
i] = m_inputStrides[
i - 1] * input_dims[
i - 1];
158 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
161 m_inputStrides[NumDims - 1] = 1;
162 m_outputStrides[NumDims - 1] = 1;
163 for (
int i = NumDims - 2;
i >= 0; --
i) {
164 m_inputStrides[
i] = m_inputStrides[
i + 1] * input_dims[
i + 1];
165 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
169 if (input_dims[0] == 1) {
171 for (
int i = 1;
i < NumDims; ++
i) {
172 if (m_broadcast[
i] != 1) {
177 }
else if (input_dims[NumDims - 1] == 1) {
179 for (
int i = 0;
i < NumDims - 1; ++
i) {
180 if (m_broadcast[
i] != 1) {
189 if (!oneByN && !nByOne) {
190 if (input_dims[0] == 1 && input_dims[NumDims - 1] == 1 && NumDims > 2) {
193 for (
int i = 1;
i < NumDims - 1; ++
i) {
194 if (m_broadcast[
i] != 1) {
207 m_impl.evalSubExprsIfNeeded(NULL);
211 #ifdef EIGEN_USE_THREADS
212 template <
typename EvalSubExprsCallback>
214 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
222 return m_impl.coeff(0);
227 return m_impl.coeff(index);
229 return coeffColMajor(index);
233 return m_impl.coeff(index);
235 return coeffRowMajor(index);
242 Index inputIndex = 0;
244 for (
int i = NumDims - 1;
i > 0; --
i) {
245 const Index idx = index / m_outputStrides[
i];
246 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
248 inputIndex += idx * m_inputStrides[
i];
250 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
253 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
256 index -= idx * m_outputStrides[
i];
258 if (internal::index_statically_eq<Broadcast>(0, 1)) {
262 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
265 inputIndex += (index % m_impl.dimensions()[0]);
272 return m_impl.coeff(indexColMajor(index));
276 Index inputIndex = 0;
278 for (
int i = 0;
i < NumDims - 1; ++
i) {
279 const Index idx = index / m_outputStrides[
i];
280 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
282 inputIndex += idx * m_inputStrides[
i];
284 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
287 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
290 index -= idx * m_outputStrides[
i];
292 if (internal::index_statically_eq<Broadcast>(NumDims - 1, 1)) {
296 if (internal::index_statically_eq<InputDimensions>(NumDims - 1, 1)) {
297 eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0);
299 inputIndex += (index % m_impl.dimensions()[NumDims - 1]);
306 return m_impl.coeff(indexRowMajor(index));
309 template <
int LoadMode>
312 return internal::pset1<PacketReturnType>(m_impl.coeff(0));
317 #ifdef EIGEN_GPU_COMPILE_PHASE
320 return m_impl.template packet<Unaligned>(index);
322 return m_impl.template packet<LoadMode>(index);
324 }
else if (oneByN && !nByOne) {
325 return packetNByOne<LoadMode>(index);
326 }
else if (!oneByN && nByOne) {
327 return packetOneByN<LoadMode>(index);
328 }
else if (oneByN && nByOne) {
329 return packetOneByNByOne<LoadMode>(index);
331 return packetColMajor<LoadMode>(index);
335 #ifdef EIGEN_GPU_COMPILE_PHASE
337 return m_impl.template packet<Unaligned>(index);
339 return m_impl.template packet<LoadMode>(index);
341 }
else if (oneByN && !nByOne) {
342 return packetOneByN<LoadMode>(index);
343 }
else if (!oneByN && nByOne) {
344 return packetNByOne<LoadMode>(index);
345 }
else if (oneByN && nByOne) {
346 return packetOneByNByOne<LoadMode>(index);
348 return packetRowMajor<LoadMode>(index);
353 template <
int LoadMode>
358 Index startDim, endDim;
359 Index inputIndex, outputOffset, batchedIndex;
362 startDim = NumDims - 1;
366 endDim = NumDims - 2;
369 batchedIndex = index % m_outputStrides[startDim];
370 inputIndex = batchedIndex / m_outputStrides[endDim];
371 outputOffset = batchedIndex % m_outputStrides[endDim];
373 if (outputOffset +
PacketSize <= m_outputStrides[endDim]) {
374 values[0] = m_impl.coeff(inputIndex);
375 return internal::pload1<PacketReturnType>(values);
379 if (outputOffset + cur < m_outputStrides[endDim]) {
380 values[
i] = m_impl.coeff(inputIndex);
383 inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex);
384 values[
i] = m_impl.coeff(inputIndex);
389 return internal::pload<PacketReturnType>(values);
393 template <
int LoadMode>
403 (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? m_inputStrides[NumDims - 1] : m_inputStrides[0];
404 Index inputIndex = index %
M;
406 return m_impl.template packet<Unaligned>(inputIndex);
411 if (inputIndex >
M - 1) {
414 values[
i] = m_impl.coeff(inputIndex++);
416 return internal::pload<PacketReturnType>(values);
420 template <
int LoadMode>
429 (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? m_broadcast[0] : m_broadcast[NumDims - 1];
431 Index inputIndex = index /
M;
432 Index outputOffset = index %
M;
434 return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
439 if (outputOffset <
M) {
440 values[
i] = m_impl.coeff(inputIndex);
443 values[
i] = m_impl.coeff(++inputIndex);
447 return internal::pload<PacketReturnType>(values);
453 template <
int LoadMode>
457 const Index originalIndex = index;
459 Index inputIndex = 0;
461 for (
int i = NumDims - 1;
i > 0; --
i) {
462 const Index idx = index / m_outputStrides[
i];
463 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
465 inputIndex += idx * m_inputStrides[
i];
467 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
470 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
473 index -= idx * m_outputStrides[
i];
476 if (internal::index_statically_eq<Broadcast>(0, 1)) {
478 innermostLoc = index;
480 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
484 innermostLoc = index % m_impl.dimensions()[0];
487 inputIndex += innermostLoc;
491 if (innermostLoc +
PacketSize <= m_impl.dimensions()[0]) {
492 return m_impl.template packet<Unaligned>(inputIndex);
495 values[0] = m_impl.coeff(inputIndex);
498 if (innermostLoc +
i < m_impl.dimensions()[0]) {
499 values[
i] = m_impl.coeff(inputIndex +
i);
501 values[
i] = coeffColMajor(originalIndex +
i);
509 template <
int LoadMode>
513 const Index originalIndex = index;
515 Index inputIndex = 0;
517 for (
int i = 0;
i < NumDims - 1; ++
i) {
518 const Index idx = index / m_outputStrides[
i];
519 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
521 inputIndex += idx * m_inputStrides[
i];
523 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
526 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
529 index -= idx * m_outputStrides[
i];
532 if (internal::index_statically_eq<Broadcast>(NumDims - 1, 1)) {
534 innermostLoc = index;
536 if (internal::index_statically_eq<InputDimensions>(NumDims - 1, 1)) {
537 eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0);
540 innermostLoc = index % m_impl.dimensions()[NumDims - 1];
543 inputIndex += innermostLoc;
547 if (innermostLoc +
PacketSize <= m_impl.dimensions()[NumDims - 1]) {
548 return m_impl.template packet<Unaligned>(inputIndex);
551 values[0] = m_impl.coeff(inputIndex);
554 if (innermostLoc +
i < m_impl.dimensions()[NumDims - 1]) {
555 values[
i] = m_impl.coeff(inputIndex +
i);
557 values[
i] = coeffRowMajor(originalIndex +
i);
566 double compute_cost = TensorOpCost::AddCost<Index>();
567 if (!isCopy && NumDims > 0) {
569 for (
int i = NumDims - 1;
i > 0; --
i) {
570 compute_cost += TensorOpCost::DivCost<Index>();
571 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
572 compute_cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
574 if (!internal::index_statically_eq<InputDimensions>(
i, 1)) {
576 TensorOpCost::MulCost<Index>() + TensorOpCost::ModCost<Index>() + TensorOpCost::AddCost<Index>();
579 compute_cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
588 const size_t target_size =
m_device.firstLevelCacheSize();
590 m_impl.getResourceRequirements(), internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
594 bool =
false)
const {
595 BlockBroadcastingParams
params = blockBroadcastingParams(desc);
597 if (
params.inner_dim_size == 0 ||
params.bcast_dim_size == 0) {
606 size_t materialized_input_size = 0;
615 for (
int i =
params.inner_dim_count + 1;
i < NumDims; ++
i) {
616 const Index dim = IsColMajor ?
i : NumDims - 1 -
i;
617 it[idx].size =
params.output_dims[dim];
619 it[idx].output_stride = m_outputStrides[dim];
620 it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
625 Index output_offset = 0;
629 const Index output_size = NumDims == 0 ? 1 :
params.output_dims.TotalSize();
631 for (
Index num_output_coeffs = 0; num_output_coeffs < output_size;) {
632 ScalarNoConst* bcast_output = materialized_output + num_output_coeffs;
633 Index bcast_offset = desc.
offset() + output_offset;
636 num_output_coeffs += BroadcastBlockAlongBcastDim(
params, bcast_offset, scratch, bcast_output, &materialized_input,
637 &materialized_input_size);
640 for (
int j = 0;
j < idx; ++
j) {
641 if (++it[
j].count < it[
j].
size) {
642 output_offset += it[
j].output_stride;
646 output_offset -= it[
j].output_span;
657 Broadcast
functor()
const {
return m_broadcast; }
660 static constexpr
bool IsColMajor =
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor);
679 struct BlockBroadcastingParams {
700 struct BlockBroadcastingIteratorState {
708 BlockBroadcastingParams
params;
714 params.output_strides = internal::strides<Layout>(
params.output_dims);
719 params.bcast_dim_size = 1;
720 params.inner_dim_size = 1;
724 params.inner_dim_count = 0;
726 for (
int i = 0;
i < NumDims; ++
i) {
727 const int dim = IsColMajor ?
i : NumDims -
i - 1;
729 if (
params.output_dims[dim] == m_dimensions[dim]) {
743 for (
int i = 0;
i <
params.inner_dim_count; ++
i) {
744 const int dim = IsColMajor ?
i : NumDims -
i - 1;
747 for (
int i =
params.inner_dim_count;
i < NumDims; ++
i) {
748 const int dim = IsColMajor ?
i : NumDims -
i - 1;
749 params.input_block_sizes[dim] = 1;
751 params.input_block_strides = internal::strides<Layout>(
params.input_block_sizes);
771 for (
int i = 0;
i <
params.inner_dim_count; ++
i) {
772 const int dim = IsColMajor ?
i : NumDims -
i - 1;
774 const int copy_dim = IsColMajor ? 2 *
i : 2 * NumDims - 2 *
i - 1;
775 const int broadcast_dim = IsColMajor ? copy_dim + 1 : copy_dim - 1;
777 params.bcast_block_sizes[copy_dim] =
params.input_dims[dim];
778 params.bcast_block_sizes[broadcast_dim] = m_broadcast[dim];
779 params.bcast_block_strides[copy_dim] =
params.output_strides[dim];
780 params.bcast_block_strides[broadcast_dim] =
params.output_strides[dim] *
params.input_dims[dim];
781 params.bcast_input_strides[copy_dim] =
params.input_block_strides[dim];
782 params.bcast_input_strides[broadcast_dim] = 0;
785 for (
int i = 2 *
params.inner_dim_count;
i < 2 * NumDims; ++
i) {
786 const int dim = IsColMajor ?
i : 2 * NumDims -
i - 1;
787 params.bcast_block_sizes[dim] = 1;
788 params.bcast_block_strides[dim] = 0;
789 params.bcast_input_strides[dim] = 0;
804 if (
params.bcast_dim_size == 1) {
806 return BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
807 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, 0, scratch,
808 materialized_output, materialized_input, materialized_input_size);
812 const int broadcast_bcast_dim =
813 IsColMajor ? 2 *
params.inner_dim_count + 1 : 2 * NumDims - 2 *
params.inner_dim_count - 2;
815 params.bcast_block_sizes[broadcast_bcast_dim] =
params.bcast_dim_size;
816 params.bcast_input_strides[broadcast_bcast_dim] = 0;
817 params.bcast_block_strides[broadcast_bcast_dim] =
params.output_strides[
params.bcast_dim];
819 return BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
820 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, 0, scratch,
821 materialized_output, materialized_input, materialized_input_size);
826 Index num_output_coeffs = 0;
848 const Index bcast_dim_left_index = bcast_offset / m_outputStrides[
params.bcast_dim];
856 numext::div_ceil<Index>(bcast_dim_left_index, input_bcast_dim_size) * input_bcast_dim_size;
860 const Index last_multiple =
861 (bcast_dim_left_index +
params.bcast_dim_size) / input_bcast_dim_size * input_bcast_dim_size;
862 const int copy_bcast_dim =
863 IsColMajor ? 2 *
params.inner_dim_count : 2 * NumDims - 2 *
params.inner_dim_count - 1;
864 const int broadcast_bcast_dim =
865 IsColMajor ? 2 *
params.inner_dim_count + 1 : 2 * NumDims - 2 *
params.inner_dim_count - 2;
870 params.bcast_block_sizes[copy_bcast_dim] = head_size;
871 params.bcast_input_strides[copy_bcast_dim] =
params.input_block_strides[
params.bcast_dim];
873 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
874 params.bcast_input_strides[broadcast_bcast_dim] = 0;
875 params.bcast_block_strides[broadcast_bcast_dim] =
879 BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
880 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, 0, scratch,
881 materialized_output, materialized_input, materialized_input_size);
884 params.input_block_sizes[
params.bcast_dim] = input_bcast_dim_size;
885 params.bcast_block_sizes[copy_bcast_dim] = input_bcast_dim_size;
886 params.bcast_input_strides[copy_bcast_dim] =
params.input_block_strides[
params.bcast_dim];
888 params.bcast_block_sizes[broadcast_bcast_dim] = (last_multiple -
first_multiple) / input_bcast_dim_size;
889 params.bcast_input_strides[broadcast_bcast_dim] = 0;
890 params.bcast_block_strides[broadcast_bcast_dim] =
895 BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
896 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, offset, scratch,
897 materialized_output, materialized_input, materialized_input_size);
899 if (last_multiple < bcast_dim_left_index +
params.bcast_dim_size) {
900 const Index tail_size = bcast_dim_left_index +
params.bcast_dim_size - last_multiple;
902 params.bcast_block_sizes[copy_bcast_dim] = tail_size;
903 params.bcast_input_strides[copy_bcast_dim] =
params.input_block_strides[
params.bcast_dim];
905 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
906 params.bcast_input_strides[broadcast_bcast_dim] = 0;
907 params.bcast_block_strides[broadcast_bcast_dim] =
909 const Index offset = (last_multiple - bcast_dim_left_index) * m_outputStrides[
params.bcast_dim];
912 BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
913 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, offset, scratch,
914 materialized_output, materialized_input, materialized_input_size);
918 const int copy_bcast_dim =
919 IsColMajor ? 2 *
params.inner_dim_count : 2 * NumDims - 2 *
params.inner_dim_count - 1;
921 params.bcast_block_sizes[copy_bcast_dim] =
params.bcast_dim_size;
922 params.bcast_input_strides[copy_bcast_dim] =
params.input_block_strides[
params.bcast_dim];
926 BroadcastBlock(
params.input_block_sizes,
params.input_block_strides,
params.bcast_block_sizes,
927 params.bcast_block_strides,
params.bcast_input_strides, bcast_offset, 0, scratch,
928 materialized_output, materialized_input, materialized_input_size);
931 return num_output_coeffs;
942 const Index input_offset = bcast_offset + offset;
943 TensorBlockDesc input_desc(IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset),
953 if (input_block.
data() != NULL) {
955 input_buffer = input_block.
data();
962 const size_t input_total_size = input_block_sizes.
TotalSize();
963 if (*materialized_input == NULL || *materialized_input_size < input_total_size) {
964 *materialized_input_size = input_total_size;
965 void* mem = scratch.
allocate(*materialized_input_size *
sizeof(
Scalar));
970 TensorBlockAssignment;
972 TensorBlockAssignment::Run(
973 TensorBlockAssignment::target(input_block_sizes, input_block_strides, *materialized_input),
976 input_buffer = *materialized_input;
984 typename TensorBlockIO::Src src(bcast_input_strides, input_buffer);
985 typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides, materialized_output + offset);
987 return TensorBlockIO::Copy(dst, src);
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:845
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:34
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Matrix< RealScalar, Dynamic, Dynamic > M
Definition: bench_gemm.cpp:50
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition: CwiseBinaryOp.h:79
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
The tensor base class.
Definition: TensorBase.h:1026
Definition: TensorBroadcasting.h:66
XprType::CoeffReturnType CoeffReturnType
Definition: TensorBroadcasting.h:70
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorBroadcasting.h:69
Eigen::internal::traits< TensorBroadcastingOp >::Index Index
Definition: TensorBroadcasting.h:73
EIGEN_DEVICE_FUNC const Broadcast & broadcast() const
Definition: TensorBroadcasting.h:78
EIGEN_DEVICE_FUNC const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorBroadcasting.h:80
const Broadcast m_broadcast
Definition: TensorBroadcasting.h:84
Eigen::internal::nested< TensorBroadcastingOp >::type Nested
Definition: TensorBroadcasting.h:71
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType &expr, const Broadcast &broadcast)
Definition: TensorBroadcasting.h:75
Eigen::internal::traits< TensorBroadcastingOp >::Scalar Scalar
Definition: TensorBroadcasting.h:68
XprType::Nested m_xpr
Definition: TensorBroadcasting.h:83
Eigen::internal::traits< TensorBroadcastingOp >::StorageKind StorageKind
Definition: TensorBroadcasting.h:72
Definition: TensorCostModel.h:28
Definition: TensorBlock.h:1314
IndexType offset() const
Definition: TensorBlock.h:270
const Dimensions & dimensions() const
Definition: TensorBlock.h:271
Definition: TensorBlock.h:1093
Definition: TensorBlock.h:475
void * allocate(size_t size)
Definition: TensorBlock.h:485
Definition: TensorBlock.h:638
TensorMaterializedBlock AsTensorMaterializedBlock() const
Definition: TensorBlock.h:644
Scalar * data() const
Definition: TensorBlock.h:640
Definition: TensorBlock.h:604
const Scalar * data() const
Definition: TensorBlock.h:625
const XprType & expr() const
Definition: TensorBlock.h:621
static EIGEN_STRONG_INLINE Storage prepareStorage(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool allow_strided_storage=false)
Definition: TensorBlock.h:671
@ ColMajor
Definition: Constants.h:318
char char * op
Definition: level2_impl.h:374
@ kView
Definition: TensorBlock.h:545
Index first_multiple(Index size, Index base)
Definition: Memory.h:559
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
std::array< T, N > array
Definition: EmulateArray.h:231
squared absolute value
Definition: GlobalFunctions.h:87
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
dictionary params
Definition: Particles2023AnalysisHung.py:35
Definition: Eigen_Colamd.h:49
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
Definition: TensorDimensions.h:167
Definition: Constants.h:519
T Real
Definition: NumTraits.h:183
Definition: TensorMeta.h:47
Definition: TensorDimensions.h:85
Definition: TensorForwardDeclarations.h:42
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition: TensorBroadcasting.h:593
Dimensions m_dimensions
Definition: TensorBroadcasting.h:993
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition: TensorBroadcasting.h:129
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
Definition: TensorBroadcasting.h:454
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
Definition: TensorBroadcasting.h:95
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition: TensorBroadcasting.h:132
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockAlongBcastDim(BlockBroadcastingParams params, Index bcast_offset, TensorBlockScratch &scratch, ScalarNoConst *materialized_output, ScalarNoConst **materialized_input, size_t *materialized_input_size) const
Definition: TensorBroadcasting.h:801
StorageMemory< CoeffReturnType, Device > Storage
Definition: TensorBroadcasting.h:105
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const
Definition: TensorBroadcasting.h:275
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition: TensorBroadcasting.h:565
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition: TensorBroadcasting.h:124
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition: TensorBroadcasting.h:206
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
Definition: TensorBroadcasting.h:510
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition: TensorBroadcasting.h:585
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockBroadcastingParams blockBroadcastingParams(TensorBlockDesc &desc) const
Definition: TensorBroadcasting.h:707
TensorBroadcastingOp< Broadcast, ArgType > XprType
Definition: TensorBroadcasting.h:90
EIGEN_STRONG_INLINE void cleanup()
Definition: TensorBroadcasting.h:218
const std::remove_reference_t< Broadcast > m_broadcast
Definition: TensorBroadcasting.h:992
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
Definition: TensorBroadcasting.h:421
XprType::Index Index
Definition: TensorBroadcasting.h:91
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
Definition: TensorBroadcasting.h:127
bool isCopy
Definition: TensorBroadcasting.h:102
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const
Definition: TensorBroadcasting.h:241
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
Definition: TensorBroadcasting.h:394
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlock(const Dimensions &input_block_sizes, const Dimensions &input_block_strides, const BroadcastDimensions &bcast_block_sizes, const BroadcastDimensions &bcast_block_strides, const BroadcastDimensions &bcast_input_strides, Index bcast_offset, Index offset, TensorBlockScratch &scratch, ScalarNoConst *materialized_output, ScalarNoConst **materialized_input, size_t *materialized_input_size) const
Definition: TensorBroadcasting.h:935
array< Index, NumDims > m_inputStrides
Definition: TensorBroadcasting.h:995
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition: TensorBroadcasting.h:653
XprType::CoeffReturnType CoeffReturnType
Definition: TensorBroadcasting.h:96
XprType::Scalar Scalar
Definition: TensorBroadcasting.h:94
Storage::Type EvaluatorPointerType
Definition: TensorBroadcasting.h:106
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne(Index index) const
Definition: TensorBroadcasting.h:354
DSizes< Index, 2 *NumDims > BroadcastDimensions
Definition: TensorBroadcasting.h:121
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
Definition: TensorBroadcasting.h:305
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
Definition: TensorBroadcasting.h:220
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition: TensorBroadcasting.h:97
const TensorEvaluator< ArgType, Device > & impl() const
Definition: TensorBroadcasting.h:655
array< Index, NumDims > m_outputStrides
Definition: TensorBroadcasting.h:994
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorBroadcasting.h:204
Broadcast functor() const
Definition: TensorBroadcasting.h:657
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition: TensorBroadcasting.h:125
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
Definition: TensorBroadcasting.h:271
const Device EIGEN_DEVICE_REF m_device
Definition: TensorBroadcasting.h:991
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock emptyBlock() const
Definition: TensorBroadcasting.h:795
DSizes< Index, NumDims > Dimensions
Definition: TensorBroadcasting.h:93
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
Definition: TensorBroadcasting.h:310
TensorEvaluator< ArgType, Device > m_impl
Definition: TensorBroadcasting.h:996
std::remove_const_t< Scalar > ScalarNoConst
Definition: TensorBroadcasting.h:117
Dimensions output_dims
Definition: TensorBroadcasting.h:681
BroadcastDimensions bcast_block_sizes
Definition: TensorBroadcasting.h:695
BroadcastDimensions bcast_block_strides
Definition: TensorBroadcasting.h:696
int bcast_dim
Definition: TensorBroadcasting.h:685
Dimensions output_strides
Definition: TensorBroadcasting.h:682
Index inner_dim_size
Definition: TensorBroadcasting.h:687
int inner_dim_count
Definition: TensorBroadcasting.h:684
Index bcast_dim_size
Definition: TensorBroadcasting.h:686
BroadcastDimensions bcast_input_strides
Definition: TensorBroadcasting.h:697
Dimensions input_block_sizes
Definition: TensorBroadcasting.h:691
Dimensions input_block_strides
Definition: TensorBroadcasting.h:692
Dimensions input_dims
Definition: TensorBroadcasting.h:680
Index output_span
Definition: TensorBroadcasting.h:704
Index size
Definition: TensorBroadcasting.h:701
Index count
Definition: TensorBroadcasting.h:702
Index output_stride
Definition: TensorBroadcasting.h:703
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31
static constexpr int Layout
Definition: TensorEvaluator.h:46
const Device EIGEN_DEVICE_REF m_device
Definition: TensorEvaluator.h:170
Storage::Type EvaluatorPointerType
Definition: TensorEvaluator.h:41
@ PacketAccess
Definition: TensorEvaluator.h:50
@ IsAligned
Definition: TensorEvaluator.h:49
static constexpr int PacketSize
Definition: TensorEvaluator.h:38
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition: TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition: TensorEvaluator.h:36
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition: TensorEvaluator.h:69
Definition: TensorBlock.h:75
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition: TensorBlock.h:129
const TensorBroadcastingOp< Broadcast, XprType > EIGEN_DEVICE_REF type
Definition: TensorBroadcasting.h:41
Definition: XprHelper.h:427
TensorBroadcastingOp< Broadcast, XprType > type
Definition: TensorBroadcasting.h:47
Definition: TensorTraits.h:152
ref_selector< T >::type type
Definition: TensorTraits.h:153
XprTraits::Index Index
Definition: TensorBroadcasting.h:31
XprType::Nested Nested
Definition: TensorBroadcasting.h:32
std::remove_reference_t< Nested > Nested_
Definition: TensorBroadcasting.h:33
XprType::Scalar Scalar
Definition: TensorBroadcasting.h:28
traits< XprType > XprTraits
Definition: TensorBroadcasting.h:29
XprTraits::PointerType PointerType
Definition: TensorBroadcasting.h:36
XprTraits::StorageKind StorageKind
Definition: TensorBroadcasting.h:30
Definition: ForwardDeclarations.h:21
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2