Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout > Class Template Reference

#include <TensorBlock.h>

Classes

struct  BlockIteratorState
 
struct  Dst
 
struct  Src
 

Public Types

typedef DSizes< IndexType, NumDims > Dimensions
 
typedef DSizes< int, NumDims > DimensionsMap
 

Static Public Member Functions

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy (const Dst &dst, const Src &src, const DimensionsMap &dst_to_src_dim_map)
 
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexType Copy (const Dst &dst, const Src &src)
 

Private Types

typedef StridedLinearBufferCopy< Scalar, IndexType > LinCopy
 

Static Private Member Functions

static int NumSqueezableInnerDims (const DimensionsMap &dim_map)
 

Static Private Attributes

static constexpr bool IsColMajor = (Layout == ColMajor)
 

Member Typedef Documentation

◆ Dimensions

template<typename Scalar , typename IndexType , int NumDims, int Layout>
typedef DSizes<IndexType, NumDims> Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Dimensions

◆ DimensionsMap

template<typename Scalar , typename IndexType , int NumDims, int Layout>
typedef DSizes<int, NumDims> Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::DimensionsMap

◆ LinCopy

template<typename Scalar , typename IndexType , int NumDims, int Layout>
typedef StridedLinearBufferCopy<Scalar, IndexType> Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::LinCopy
private

Member Function Documentation

◆ Copy() [1/2]

template<typename Scalar , typename IndexType , int NumDims, int Layout>
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexType Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Copy ( const Dst dst,
const Src src 
)
inlinestatic
1262  {
1263  DimensionsMap dst_to_src_map;
1264  for (int i = 0; i < NumDims; ++i) dst_to_src_map[i] = i;
1265  return Copy(dst, src, dst_to_src_map);
1266  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Copy(const Dst &dst, const Src &src, const DimensionsMap &dst_to_src_dim_map)
Definition: TensorBlock.h:1126
DSizes< int, NumDims > DimensionsMap
Definition: TensorBlock.h:1100

References Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Copy(), and i.

◆ Copy() [2/2]

template<typename Scalar , typename IndexType , int NumDims, int Layout>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Copy ( const Dst dst,
const Src src,
const DimensionsMap dst_to_src_dim_map 
)
inlinestatic
1127  {
1128  // Copy single scalar value from `src` to `dst`.
1129  if (NumDims == 0) {
1130  *(dst.data + dst.offset) = *(src.data + src.offset);
1131  return 1;
1132  }
1133 
1134  // Both `dst` and `src` must have contiguous innermost dimension. We also
1135  // accept the special case with stride '0', because it's used as a trick to
1136  // implement broadcasting.
1137  {
1138  int inner_dim = IsColMajor ? 0 : NumDims - 1;
1139  EIGEN_UNUSED_VARIABLE(inner_dim);
1140  eigen_assert(dst.strides[inner_dim] == 1 || dst.strides[inner_dim] == 0);
1141  eigen_assert(src.strides[inner_dim] == 1 || src.strides[inner_dim] == 0);
1142  }
1143 
1144  // Give a shorter name to `dst_to_src_dim_map`.
1145  const DimensionsMap& dim_map = dst_to_src_dim_map;
1146 
1147  // Do not squeeze reordered inner dimensions.
1148  int num_squeezable_dims = NumSqueezableInnerDims(dim_map);
1149 
1150  // NOTE: We find the innermost dimension (contiguous in memory) in the dst
1151  // block, and we write data linearly into that dimension, reading it from
1152  // the src. If dimensions are reordered, we might end up reading data from
1153  // the src with `stride != 1`.
1154  //
1155  // NOTE: Random-Read/Linear-Write can be up to ~2X faster than
1156  // Linear-Read/Random-Write: https://stackoverflow.com/a/54935680
1157 
1158  // Find the innermost dimension in the dst whose size is not 1. This is the
1159  // effective inner dim.
1160  int num_size_one_inner_dims = 0;
1161  for (int i = 0; i < num_squeezable_dims; ++i) {
1162  const int dst_dim = IsColMajor ? i : NumDims - i - 1;
1163  if (dst.dims[dst_dim] != 1) break;
1164  num_size_one_inner_dims++;
1165  }
1166 
1167  // If all dimensions are of size 1, just copy a scalar from `src` to `dst`.
1168  if (num_size_one_inner_dims == NumDims) {
1169  *(dst.data + dst.offset) = *(src.data + src.offset);
1170  return 1;
1171  }
1172 
1173  // Outermost dimension in the dst with `stride == 1` (contiguous in memory).
1174  const int dst_stride1_dim = IsColMajor ? num_size_one_inner_dims : NumDims - num_size_one_inner_dims - 1;
1175 
1176  // Dimension in the src that corresponds to the dst innermost dimension.
1177  const int src_dim_for_dst_stride1_dim = NumDims == 0 ? 1 : dim_map[dst_stride1_dim];
1178 
1179  // Size of the innermost dimension (length of contiguous blocks of memory).
1180  IndexType dst_inner_dim_size = NumDims == 0 ? 1 : dst.dims[dst_stride1_dim];
1181 
1182  // Squeeze multiple inner dims into one if they are contiguous in `dst` and
1183  // `src` memory, so we can do less linear copy calls.
1184  for (int i = num_size_one_inner_dims + 1; i < num_squeezable_dims; ++i) {
1185  const int dst_dim = IsColMajor ? i : NumDims - i - 1;
1186  const IndexType dst_stride = dst.strides[dst_dim];
1187  const IndexType src_stride = src.strides[dim_map[dst_dim]];
1188  if (dst_inner_dim_size == dst_stride && dst_stride == src_stride) {
1189  dst_inner_dim_size *= dst.dims[dst_dim];
1190  ++num_size_one_inner_dims;
1191  } else {
1192  break;
1193  }
1194  }
1195 
1196  // Setup strides to read data from `src` and write to `dst`.
1197  IndexType input_offset = src.offset;
1198  IndexType output_offset = dst.offset;
1199  IndexType input_stride = NumDims == 0 ? 1 : src.strides[src_dim_for_dst_stride1_dim];
1200  IndexType output_stride = NumDims == 0 ? 1 : dst.strides[dst_stride1_dim];
1201 
1202  const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
1203  array<BlockIteratorState, at_least_1_dim> it;
1204 
1205  // Initialize block iterator state. Squeeze away any dimension of size 1.
1206  int idx = 0; // currently initialized iterator state index
1207  for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
1208  const int dst_dim = IsColMajor ? i + 1 : NumDims - i - 2;
1209  if (dst.dims[dst_dim] == 1) continue;
1210 
1211  it[idx].size = dst.dims[dst_dim];
1212  it[idx].input_stride = src.strides[dim_map[dst_dim]];
1213  it[idx].output_stride = dst.strides[dst_dim];
1214 
1215  it[idx].input_span = it[idx].input_stride * (it[idx].size - 1);
1216  it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
1217 
1218  idx++;
1219  }
1220 
1221  // Iterate copying data from src to dst.
1222  const IndexType block_total_size = NumDims == 0 ? 1 : dst.dims.TotalSize();
1223 
1224 #define COPY_INNER_DIM(KIND) \
1225  IndexType num_copied = 0; \
1226  for (num_copied = 0; num_copied < block_total_size; num_copied += dst_inner_dim_size) { \
1227  LinCopy::template Run<KIND>(typename LinCopy::Dst(output_offset, output_stride, dst.data), \
1228  typename LinCopy::Src(input_offset, input_stride, src.data), dst_inner_dim_size); \
1229  \
1230  for (int j = 0; j < idx; ++j) { \
1231  if (++it[j].count < it[j].size) { \
1232  input_offset += it[j].input_stride; \
1233  output_offset += it[j].output_stride; \
1234  break; \
1235  } \
1236  it[j].count = 0; \
1237  input_offset -= it[j].input_span; \
1238  output_offset -= it[j].output_span; \
1239  } \
1240  } \
1241  return num_copied;
1242 
1243  if (input_stride == 1 && output_stride == 1) {
1245  } else if (input_stride == 1 && output_stride != 1) {
1247  } else if (input_stride == 0 && output_stride == 1) {
1249  } else if (input_stride == 0 && output_stride != 1) {
1251  } else if (output_stride == 1) {
1253  } else {
1255  }
1256 
1257 #undef COPY_INNER_DIM
1258  }
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:966
#define eigen_assert(x)
Definition: Macros.h:910
#define COPY_INNER_DIM(KIND)
static int NumSqueezableInnerDims(const DimensionsMap &dim_map)
Definition: TensorBlock.h:1283
static constexpr bool IsColMajor
Definition: TensorBlock.h:1094

References COPY_INNER_DIM, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Dst::data, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Src::data, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Dst::dims, eigen_assert, EIGEN_UNUSED_VARIABLE, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::FillLinear, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::FillScatter, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Gather, i, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::IsColMajor, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Linear, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::NumSqueezableInnerDims(), Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Dst::offset, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Src::offset, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Random, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Scatter, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Dst::strides, Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Src::strides, and Eigen::DSizes< DenseIndex, NumDims >::TotalSize().

Referenced by Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Copy(), and Eigen::internal::TensorMaterializedBlock< Scalar, NumDims, Layout, IndexType >::materialize().

◆ NumSqueezableInnerDims()

template<typename Scalar , typename IndexType , int NumDims, int Layout>
static int Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::NumSqueezableInnerDims ( const DimensionsMap dim_map)
inlinestaticprivate
1283  {
1284  int num_squeezable_dims = 0;
1285  for (int i = 0; i < NumDims; ++i) {
1286  const int dim = IsColMajor ? i : NumDims - i - 1;
1287  if (dim_map[dim] != dim) break;
1288  num_squeezable_dims++;
1289  }
1290  return num_squeezable_dims;
1291  }

References i, and Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::IsColMajor.

Referenced by Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::Copy().

Member Data Documentation

◆ IsColMajor

template<typename Scalar , typename IndexType , int NumDims, int Layout>
constexpr bool Eigen::internal::TensorBlockIO< Scalar, IndexType, NumDims, Layout >::IsColMajor = (Layout == ColMajor)
staticconstexprprivate

The documentation for this class was generated from the following file: