Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType > Class Template Reference

#include <TensorBlock.h>

Classes

struct  Dst
 
struct  Src
 

Public Types

enum class  Kind {
  Linear = 0 , Scatter = 1 , FillLinear = 2 , FillScatter = 3 ,
  Gather = 4 , Random = 5
}
 

Static Public Member Functions

template<typename StridedLinearBufferCopy::Kind kind>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run (const Dst &dst, const Src &src, const size_t count)
 

Private Types

enum  { Vectorizable = packet_traits<Scalar>::Vectorizable , PacketSize = packet_traits<Scalar>::size , HalfPacketSize = unpacket_traits<HalfPacket>::size }
 
typedef packet_traits< Scalar >::type Packet
 
typedef unpacket_traits< Packet >::half HalfPacket
 

Static Private Member Functions

template<typename StridedLinearBufferCopy::Kind kind>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run (const IndexType count, const IndexType dst_offset, const IndexType dst_stride, Scalar *EIGEN_RESTRICT dst_data, const IndexType src_offset, const IndexType src_stride, const Scalar *EIGEN_RESTRICT src_data)
 

Member Typedef Documentation

◆ HalfPacket

template<typename Scalar , typename IndexType >
typedef unpacket_traits<Packet>::half Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::HalfPacket
private

◆ Packet

template<typename Scalar , typename IndexType >
typedef packet_traits<Scalar>::type Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Packet
private

Member Enumeration Documentation

◆ anonymous enum

template<typename Scalar , typename IndexType >
anonymous enum
private
Enumerator
Vectorizable 
PacketSize 
HalfPacketSize 
905  {
909  HasHalfPacket = static_cast<int>(HalfPacketSize) < static_cast<int>(PacketSize)
910  };
@ HalfPacketSize
Definition: TensorBlock.h:908
@ PacketSize
Definition: TensorBlock.h:907
@ Vectorizable
Definition: TensorBlock.h:906
@ size
Definition: GenericPacketMath.h:113
@ Vectorizable
Definition: GenericPacketMath.h:112
@ size
Definition: GenericPacketMath.h:139

◆ Kind

template<typename Scalar , typename IndexType >
enum Eigen::internal::StridedLinearBufferCopy::Kind
strong
Enumerator
Linear 
Scatter 
FillLinear 
FillScatter 
Gather 
Random 
914  {
915  Linear = 0, // src_stride == 1 && dst_stride == 1
916  Scatter = 1, // src_stride == 1 && dst_stride != 1
917  FillLinear = 2, // src_stride == 0 && dst_stride == 1
918  FillScatter = 3, // src_stride == 0 && dst_stride != 1
919  Gather = 4, // dst_stride == 1
920  Random = 5 // everything else
921  };

Member Function Documentation

◆ Run() [1/2]

◆ Run() [2/2]

template<typename Scalar , typename IndexType >
template<typename StridedLinearBufferCopy::Kind kind>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Run ( const IndexType  count,
const IndexType  dst_offset,
const IndexType  dst_stride,
Scalar *EIGEN_RESTRICT  dst_data,
const IndexType  src_offset,
const IndexType  src_stride,
const Scalar *EIGEN_RESTRICT  src_data 
)
inlinestaticprivate
949  {
950  const Scalar* src = &src_data[src_offset];
951  Scalar* dst = &dst_data[dst_offset];
952 
953  if (!Vectorizable) {
954  for (Index i = 0; i < count; ++i) {
955  dst[i * dst_stride] = src[i * src_stride];
956  }
957  return;
958  }
959 
960  const IndexType vectorized_size = PacketSize * (count / PacketSize);
961  IndexType i = 0;
962 
964  // ******************************************************************** //
965  // Linear copy from `src` to `dst`.
966  const IndexType unrolled_size = (4 * PacketSize) * (count / (4 * PacketSize));
967  eigen_assert(src_stride == 1 && dst_stride == 1);
968  for (; i < unrolled_size; i += 4 * PacketSize) {
969  for (int j = 0; j < 4; ++j) {
970  Packet p = ploadu<Packet>(src + i + j * PacketSize);
971  pstoreu<Scalar, Packet>(dst + i + j * PacketSize, p);
972  }
973  }
974  for (; i < vectorized_size; i += PacketSize) {
975  Packet p = ploadu<Packet>(src + i);
976  pstoreu<Scalar, Packet>(dst + i, p);
977  }
978  if (HasHalfPacket) {
979  const IndexType vectorized_half_size = HalfPacketSize * (count / HalfPacketSize);
980  if (i < vectorized_half_size) {
981  HalfPacket p = ploadu<HalfPacket>(src + i);
982  pstoreu<Scalar, HalfPacket>(dst + i, p);
983  i += HalfPacketSize;
984  }
985  }
986  for (; i < count; ++i) {
987  dst[i] = src[i];
988  }
989  // ******************************************************************** //
990  } else if (kind == StridedLinearBufferCopy::Kind::Scatter) {
991  // Scatter from `src` to `dst`.
992  eigen_assert(src_stride == 1 && dst_stride != 1);
993  for (; i < vectorized_size; i += PacketSize) {
994  Packet p = ploadu<Packet>(src + i);
995  pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
996  }
997  if (HasHalfPacket) {
998  const IndexType vectorized_half_size = HalfPacketSize * (count / HalfPacketSize);
999  if (i < vectorized_half_size) {
1000  HalfPacket p = ploadu<HalfPacket>(src + i);
1001  pscatter<Scalar, HalfPacket>(dst + i * dst_stride, p, dst_stride);
1002  i += HalfPacketSize;
1003  }
1004  }
1005  for (; i < count; ++i) {
1006  dst[i * dst_stride] = src[i];
1007  }
1008  // ******************************************************************** //
1009  } else if (kind == StridedLinearBufferCopy::Kind::FillLinear) {
1010  // Fill `dst` with value at `*src`.
1011  eigen_assert(src_stride == 0 && dst_stride == 1);
1012 
1013  const IndexType unrolled_size = (4 * PacketSize) * (count / (4 * PacketSize));
1014  Scalar s = *src;
1015  Packet p = pset1<Packet>(s);
1016  for (; i < unrolled_size; i += 4 * PacketSize) {
1017  for (int j = 0; j < 4; ++j) {
1018  pstoreu<Scalar, Packet>(dst + i + j * PacketSize, p);
1019  }
1020  }
1021  for (; i < vectorized_size; i += PacketSize) {
1022  pstoreu<Scalar, Packet>(dst + i, p);
1023  }
1024  if (HasHalfPacket) {
1025  const IndexType vectorized_half_size = HalfPacketSize * (count / HalfPacketSize);
1026  if (i < vectorized_half_size) {
1027  HalfPacket hp = pset1<HalfPacket>(s);
1028  pstoreu<Scalar, HalfPacket>(dst + i, hp);
1029  i += HalfPacketSize;
1030  }
1031  }
1032  for (; i < count; ++i) {
1033  dst[i] = s;
1034  }
1035  // ******************************************************************** //
1036  } else if (kind == StridedLinearBufferCopy::Kind::FillScatter) {
1037  // Scatter `*src` into `dst`.
1038  eigen_assert(src_stride == 0 && dst_stride != 1);
1039  Scalar s = *src;
1040  Packet p = pset1<Packet>(s);
1041  for (; i < vectorized_size; i += PacketSize) {
1042  pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
1043  }
1044  if (HasHalfPacket) {
1045  const IndexType vectorized_half_size = HalfPacketSize * (count / HalfPacketSize);
1046  if (i < vectorized_half_size) {
1047  HalfPacket hp = pset1<HalfPacket>(s);
1048  pscatter<Scalar, HalfPacket>(dst + i * dst_stride, hp, dst_stride);
1049  i += HalfPacketSize;
1050  }
1051  }
1052  for (; i < count; ++i) {
1053  dst[i * dst_stride] = s;
1054  }
1055  // ******************************************************************** //
1056  } else if (kind == StridedLinearBufferCopy::Kind::Gather) {
1057  // Gather from `src` into `dst`.
1058  eigen_assert(dst_stride == 1);
1059  for (; i < vectorized_size; i += PacketSize) {
1060  Packet p = pgather<Scalar, Packet>(src + i * src_stride, src_stride);
1061  pstoreu<Scalar, Packet>(dst + i, p);
1062  }
1063  if (HasHalfPacket) {
1064  const IndexType vectorized_half_size = HalfPacketSize * (count / HalfPacketSize);
1065  if (i < vectorized_half_size) {
1066  HalfPacket p = pgather<Scalar, HalfPacket>(src + i * src_stride, src_stride);
1067  pstoreu<Scalar, HalfPacket>(dst + i, p);
1068  i += HalfPacketSize;
1069  }
1070  }
1071  for (; i < count; ++i) {
1072  dst[i] = src[i * src_stride];
1073  }
1074  // ******************************************************************** //
1075  } else if (kind == StridedLinearBufferCopy::Kind::Random) {
1076  // Random.
1077  for (; i < count; ++i) {
1078  dst[i * dst_stride] = src[i * src_stride];
1079  }
1080  } else {
1081  eigen_assert(false);
1082  }
1083  }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
#define eigen_assert(x)
Definition: Macros.h:910
float * p
Definition: Tutorial_Map_using.cpp:9
SCALAR Scalar
Definition: bench_gemm.cpp:45
unpacket_traits< Packet >::half HalfPacket
Definition: TensorBlock.h:904
RealScalar s
Definition: level1_cplx_impl.h:130
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References eigen_assert, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::FillLinear, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::FillScatter, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Gather, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::HalfPacketSize, i, j, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Linear, p, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::PacketSize, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Random, s, Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Scatter, and Eigen::internal::StridedLinearBufferCopy< Scalar, IndexType >::Vectorizable.


The documentation for this class was generated from the following file: