d1/dda/TensorRandom_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>

 // Copyright (C) 2018 Mehdi Goli <eigen@codeplay.com> Codeplay Software Ltd.

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H

 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H


 // IWYU pragma: private

 #include "./InternalHeaderCheck.h"


 namespace Eigen {

 namespace internal {


 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t get_random_seed() {

 #if defined(EIGEN_GPU_COMPILE_PHASE)

   // We don't support 3d kernels since we currently only use 1 and

   // 2d kernels.

   gpu_assert(threadIdx.z == 0);

   return blockIdx.x * blockDim.x + threadIdx.x + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);

 #else

   // Rely on Eigen's random implementation.

   return random<uint64_t>();

 #endif

 }


 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state, uint64_t stream) {

   // TODO: Unify with the implementation in the non blocking thread pool.

   uint64_t current = *state;

   // Update the internal state

   *state = current * 6364136223846793005ULL + (stream << 1 | 1);

   // Generate the random output (using the PCG-XSH-RS scheme)

   return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));

 }


 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {

   seed = seed ? seed : get_random_seed();

   return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;

 }


 template <typename T>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T RandomToTypeUniform(uint64_t* state, uint64_t stream) {

   unsigned rnd = PCG_XSH_RS_generator(state, stream);

   return static_cast<T>(rnd);

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state, uint64_t stream) {

   // Generate 10 random bits for the mantissa, merge with exponent.

   unsigned rnd = PCG_XSH_RS_generator(state, stream);

   const uint16_t half_bits = static_cast<uint16_t>(rnd & 0x3ffu) | (static_cast<uint16_t>(15) << 10);

   Eigen::half result = Eigen::numext::bit_cast<Eigen::half>(half_bits);

   // Return the final result

   return result - Eigen::half(1.0f);

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::bfloat16 RandomToTypeUniform<Eigen::bfloat16>(uint64_t* state,

                                                                                            uint64_t stream) {

   // Generate 7 random bits for the mantissa, merge with exponent.

   unsigned rnd = PCG_XSH_RS_generator(state, stream);

   const uint16_t half_bits = static_cast<uint16_t>(rnd & 0x7fu) | (static_cast<uint16_t>(127) << 7);

   Eigen::bfloat16 result = Eigen::numext::bit_cast<Eigen::bfloat16>(half_bits);

   // Return the final result

   return result - Eigen::bfloat16(1.0f);

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float RandomToTypeUniform<float>(uint64_t* state, uint64_t stream) {

   typedef union {

     uint32_t raw;

     float fp;

   } internal;

   internal result;

   // Generate 23 random bits for the mantissa mantissa

   const unsigned rnd = PCG_XSH_RS_generator(state, stream);

   result.raw = rnd & 0x7fffffu;

   // Set the exponent

   result.raw |= (static_cast<uint32_t>(127) << 23);

   // Return the final result

   return result.fp - 1.0f;

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double RandomToTypeUniform<double>(uint64_t* state, uint64_t stream) {

   typedef union {

     uint64_t raw;

     double dp;

   } internal;

   internal result;

   result.raw = 0;

   // Generate 52 random bits for the mantissa

   // First generate the upper 20 bits

   unsigned rnd1 = PCG_XSH_RS_generator(state, stream) & 0xfffffu;

   // The generate the lower 32 bits

   unsigned rnd2 = PCG_XSH_RS_generator(state, stream);

   result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;

   // Set the exponent

   result.raw |= (static_cast<uint64_t>(1023) << 52);

   // Return the final result

   return result.dp - 1.0;

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state,

                                                                                                     uint64_t stream) {

   return std::complex<float>(RandomToTypeUniform<float>(state, stream), RandomToTypeUniform<float>(state, stream));

 }

 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state,

                                                                                                       uint64_t stream) {

   return std::complex<double>(RandomToTypeUniform<double>(state, stream), RandomToTypeUniform<double>(state, stream));

 }


 template <typename T>

 class UniformRandomGenerator {

  public:

   static constexpr bool PacketAccess = true;


   // Uses the given "seed" if non-zero, otherwise uses a random seed.

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(uint64_t seed = 0) {

     m_state = PCG_XSH_RS_state(seed);

 #ifdef EIGEN_USE_SYCL

     // In SYCL it is not possible to build PCG_XSH_RS_state in one step.

     // Therefore, we need two steps to initializate the m_state.

     // IN SYCL, the constructor of the functor is s called on the CPU

     // and we get the clock seed here from the CPU. However, This seed is

     // the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.

     // and only  available on the Operator() function (which is called on the GPU).

     // Thus for CUDA (((CLOCK  + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each

     // thread but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each

     // thread adds the  (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the

     // construction similar to CUDA Therefore, the thread Id injection is not available at this stage.

     // However when the operator() is called the thread ID will be available. So inside the operator,

     // we add the thrreadID, BlockId,... (which is equivalent of i)

     // to the seed and construct the unique m_state per thead similar to cuda.

     m_exec_once = false;

 #endif

   }

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(const UniformRandomGenerator& other) {

     m_state = other.m_state;

 #ifdef EIGEN_USE_SYCL

     m_exec_once = other.m_exec_once;

 #endif

   }


   template <typename Index>

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const {

 #ifdef EIGEN_USE_SYCL

     if (!m_exec_once) {

       // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread

       // The (i * 6364136223846793005ULL) is the remaining part of the PCG_XSH_RS_state on the GPU side

       m_state += (i * 6364136223846793005ULL);

       m_exec_once = true;

     }

 #endif

     T result = RandomToTypeUniform<T>(&m_state, i);

     return result;

   }


   template <typename Packet, typename Index>

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(Index i) const {

     const int packetSize = internal::unpacket_traits<Packet>::size;

     EIGEN_ALIGN_MAX T values[packetSize];

 #ifdef EIGEN_USE_SYCL

     if (!m_exec_once) {

       // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread

       m_state += (i * 6364136223846793005ULL);

       m_exec_once = true;

     }

 #endif

     EIGEN_UNROLL_LOOP

     for (int j = 0; j < packetSize; ++j) {

       values[j] = RandomToTypeUniform<T>(&m_state, i);

     }

     return internal::pload<Packet>(values);

   }


  private:

   mutable uint64_t m_state;

 #ifdef EIGEN_USE_SYCL

   mutable bool m_exec_once;

 #endif

 };


 template <typename Scalar>

 struct functor_traits<UniformRandomGenerator<Scalar> > {

   enum {

     // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).

     Cost = 12 * NumTraits<Scalar>::AddCost * ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),

     PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess

   };

 };


 template <typename T>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T RandomToTypeNormal(uint64_t* state, uint64_t stream) {

   // Use the ratio of uniform method to generate numbers following a normal

   // distribution. See for example Numerical Recipes chapter 7.3.9 for the

   // details.

   T u, v, q;

   do {

     u = RandomToTypeUniform<T>(state, stream);

     v = T(1.7156) * (RandomToTypeUniform<T>(state, stream) - T(0.5));

     const T x = u - T(0.449871);

     const T y = numext::abs(v) + T(0.386595);

     q = x * x + y * (T(0.196) * y - T(0.25472) * x);

   } while (q > T(0.27597) && (q > T(0.27846) || v * v > T(-4) * numext::log(u) * u * u));


   return v / u;

 }


 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state,

                                                                                                    uint64_t stream) {

   return std::complex<float>(RandomToTypeNormal<float>(state, stream), RandomToTypeNormal<float>(state, stream));

 }

 template <>

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state,

                                                                                                      uint64_t stream) {

   return std::complex<double>(RandomToTypeNormal<double>(state, stream), RandomToTypeNormal<double>(state, stream));

 }


 template <typename T>

 class NormalRandomGenerator {

  public:

   static constexpr bool PacketAccess = true;


   // Uses the given "seed" if non-zero, otherwise uses a random seed.

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {

     m_state = PCG_XSH_RS_state(seed);

 #ifdef EIGEN_USE_SYCL

     // In SYCL it is not possible to build PCG_XSH_RS_state in one step.

     // Therefore, we need two steps to initializate the m_state.

     // IN SYCL, the constructor of the functor is s called on the CPU

     // and we get the clock seed here from the CPU. However, This seed is

     // the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.

     // and only  available on the Operator() function (which is called on the GPU).

     // Therefore, the thread Id injection is not available at this stage. However when the operator()

     // is called the thread ID will be available. So inside the operator,

     // we add the thrreadID, BlockId,... (which is equivalent of i)

     // to the seed and construct the unique m_state per thead similar to cuda.

     m_exec_once = false;

 #endif

   }

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(const NormalRandomGenerator& other) {

     m_state = other.m_state;

 #ifdef EIGEN_USE_SYCL

     m_exec_once = other.m_exec_once;

 #endif

   }


   template <typename Index>

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const {

 #ifdef EIGEN_USE_SYCL

     if (!m_exec_once) {

       // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread

       m_state += (i * 6364136223846793005ULL);

       m_exec_once = true;

     }

 #endif

     T result = RandomToTypeNormal<T>(&m_state, i);

     return result;

   }


   template <typename Packet, typename Index>

   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(Index i) const {

     const int packetSize = internal::unpacket_traits<Packet>::size;

     EIGEN_ALIGN_MAX T values[packetSize];

 #ifdef EIGEN_USE_SYCL

     if (!m_exec_once) {

       // This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread

       m_state += (i * 6364136223846793005ULL);

       m_exec_once = true;

     }

 #endif

     EIGEN_UNROLL_LOOP

     for (int j = 0; j < packetSize; ++j) {

       values[j] = RandomToTypeNormal<T>(&m_state, i);

     }

     return internal::pload<Packet>(values);

   }


  private:

   mutable uint64_t m_state;

 #ifdef EIGEN_USE_SYCL

   mutable bool m_exec_once;

 #endif

 };


 template <typename Scalar>

 struct functor_traits<NormalRandomGenerator<Scalar> > {

   enum {

     // On average, we need to generate about 3 random numbers

     // 15 mul, 8 add, 1.5 logs

     Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + 15 * NumTraits<Scalar>::AddCost +

            8 * NumTraits<Scalar>::AddCost + 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,

     PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess

   };

 };


 }  // end namespace internal

 }  // end namespace Eigen


 #endif  // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H

v
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1

i
int i
Definition: BiCGSTAB_step_by_step.cpp:9

EIGEN_ALIGN_MAX
#define EIGEN_ALIGN_MAX
Definition: ConfigureVectorization.h:146

T
Eigen::Triplet< double > T
Definition: EigenUnitTest.cpp:11

EIGEN_UNROLL_LOOP
#define EIGEN_UNROLL_LOOP
Definition: Macros.h:1298

EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892

EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834

Scalar
SCALAR Scalar
Definition: bench_gemm.cpp:45

Eigen::Triplet< double >

Eigen::internal::NormalRandomGenerator
Definition: TensorRandom.h:229

Eigen::internal::NormalRandomGenerator::m_state
uint64_t m_state
Definition: TensorRandom.h:289

Eigen::internal::NormalRandomGenerator::NormalRandomGenerator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed=0)
Definition: TensorRandom.h:234

Eigen::internal::NormalRandomGenerator::PacketAccess
static constexpr bool PacketAccess
Definition: TensorRandom.h:231

Eigen::internal::NormalRandomGenerator::packetOp
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(Index i) const
Definition: TensorRandom.h:271

Eigen::internal::NormalRandomGenerator::NormalRandomGenerator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(const NormalRandomGenerator &other)
Definition: TensorRandom.h:250

Eigen::internal::NormalRandomGenerator::operator()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const
Definition: TensorRandom.h:258

Eigen::internal::UniformRandomGenerator
Definition: TensorRandom.h:121

Eigen::internal::UniformRandomGenerator::operator()
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(Index i) const
Definition: TensorRandom.h:153

Eigen::internal::UniformRandomGenerator::packetOp
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(Index i) const
Definition: TensorRandom.h:167

Eigen::internal::UniformRandomGenerator::UniformRandomGenerator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(const UniformRandomGenerator &other)
Definition: TensorRandom.h:145

Eigen::internal::UniformRandomGenerator::PacketAccess
static constexpr bool PacketAccess
Definition: TensorRandom.h:123

Eigen::internal::UniformRandomGenerator::UniformRandomGenerator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(uint64_t seed=0)
Definition: TensorRandom.h:126

Eigen::internal::UniformRandomGenerator::m_state
uint64_t m_state
Definition: TensorRandom.h:185

threadIdx
dim3 threadIdx
Definition: gpu_common.h:16

blockDim
dim3 blockDim
Definition: gpu_common.h:16

blockIdx
dim3 blockIdx
Definition: gpu_common.h:16

Eigen::internal::y
const Scalar & y
Definition: RandomImpl.h:36

Eigen::internal::get_random_seed
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t get_random_seed()
Definition: TensorRandom.h:20

Eigen::internal::RandomToTypeNormal
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T RandomToTypeNormal(uint64_t *state, uint64_t stream)
Definition: TensorRandom.h:201

Eigen::internal::RandomToTypeUniform< float >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float RandomToTypeUniform< float >(uint64_t *state, uint64_t stream)
Definition: TensorRandom.h:74

Eigen::internal::RandomToTypeUniform
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T RandomToTypeUniform(uint64_t *state, uint64_t stream)
Definition: TensorRandom.h:47

Eigen::internal::RandomToTypeUniform< double >
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double RandomToTypeUniform< double >(uint64_t *state, uint64_t stream)
Definition: TensorRandom.h:90

Eigen::internal::PCG_XSH_RS_state
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed)
Definition: TensorRandom.h:41

Eigen::internal::PCG_XSH_RS_generator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t *state, uint64_t stream)
Definition: TensorRandom.h:32

Eigen::numext::log
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x)
Definition: MathFunctions.h:1332

Eigen::numext::abs
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
Definition: MathFunctions.h:1355

Eigen::numext::q
EIGEN_DEVICE_FUNC const Scalar & q
Definition: SpecialFunctionsImpl.h:2019

Eigen::numext::uint16_t
std::uint16_t uint16_t
Definition: Meta.h:38

Eigen::numext::uint32_t
std::uint32_t uint32_t
Definition: Meta.h:40

Eigen::numext::uint64_t
std::uint64_t uint64_t
Definition: Meta.h:42

Eigen
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83

internal
Definition: Eigen_Colamd.h:49

plotDoE.x
list x
Definition: plotDoE.py:28

Eigen::NumTraits
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217

Eigen::bfloat16
Definition: BFloat16.h:101

Eigen::half
Definition: Half.h:139

Eigen::internal::functor_traits
Definition: XprHelper.h:205

Eigen::internal::functor_traits::PacketAccess
@ PacketAccess
Definition: XprHelper.h:206

Eigen::internal::functor_traits::Cost
@ Cost
Definition: XprHelper.h:206

Eigen::internal::unpacket_traits
Definition: GenericPacketMath.h:134

j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

Eigen::internal::Packet
Definition: ZVector/PacketMath.h:50

InternalHeaderCheck.h