benchBlasGemm.cpp File Reference
#include <iostream>
#include <Eigen/Core>
#include "BenchTimer.h"
#include <cblas.h>
#include <string>

Macros

#define _FLOAT
 
#define CBLAS_GEMM   cblas_sgemm
 
#define MYVERIFY(A, M)
 

Typedefs

typedef float Scalar
 
typedef Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::DynamicMyMatrix
 

Functions

void bench_eigengemm (MyMatrix &mc, const MyMatrix &ma, const MyMatrix &mb, int nbloops)
 
void check_product (int M, int N, int K)
 
void check_product (void)
 
int main (int argc, char *argv[])
 

Macro Definition Documentation

◆ _FLOAT

#define _FLOAT

◆ CBLAS_GEMM

#define CBLAS_GEMM   cblas_sgemm

◆ MYVERIFY

#define MYVERIFY (   A,
  M 
)
Value:
if (!(A)) { \
std::cout << "FAIL: " << M << "\n"; \
}
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186

Typedef Documentation

◆ MyMatrix

◆ Scalar

typedef float Scalar

Function Documentation

◆ bench_eigengemm()

void bench_eigengemm ( MyMatrix mc,
const MyMatrix ma,
const MyMatrix mb,
int  nbloops 
)
150  {
151  for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
152 }
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2

References j.

Referenced by main().

◆ check_product() [1/2]

void check_product ( int  M,
int  N,
int  K 
)
158  {
159  MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
160  ma = MyMatrix::Random(M, K);
161  mb = MyMatrix::Random(K, N);
162  maT = ma.transpose();
163  mbT = mb.transpose();
164  mc = MyMatrix::Random(M, N);
165 
166  MyMatrix::Scalar eps = 1e-4;
167 
168  meigen = mref = mc;
169  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
170  meigen += ma * mb;
171  MYVERIFY(meigen.isApprox(mref, eps), ". * .");
172 
173  meigen = mref = mc;
174  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
175  meigen += maT.transpose() * mb;
176  MYVERIFY(meigen.isApprox(mref, eps), "T * .");
177 
178  meigen = mref = mc;
179  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
180  meigen += (maT.transpose()) * (mbT.transpose());
181  MYVERIFY(meigen.isApprox(mref, eps), "T * T");
182 
183  meigen = mref = mc;
184  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
185  meigen += ma * mbT.transpose();
186  MYVERIFY(meigen.isApprox(mref, eps), ". * T");
187 }
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define MYVERIFY(A, M)
Definition: benchBlasGemm.cpp:154
#define CBLAS_GEMM
Definition: benchBlasGemm.cpp:22
constexpr EIGEN_DEVICE_FUNC const Scalar * data() const
Definition: PlainObjectBase.h:273
internal::traits< Matrix< Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_ > >::Scalar Scalar
Definition: PlainObjectBase.h:127
@ N
Definition: constructor.cpp:22
double eps
Definition: crbond_bessel.cc:24
double K
Wave number.
Definition: sphere_scattering.cc:115

References CBLAS_GEMM, Eigen::PlainObjectBase< Derived >::data(), e(), CRBond_Bessel::eps, PlanarWave::K, MYVERIFY, and N.

Referenced by check_product(), and main().

◆ check_product() [2/2]

void check_product ( void  )
189  {
190  int M, N, K;
191  for (uint i = 0; i < 1000; ++i) {
192  M = internal::random<int>(1, 64);
193  N = internal::random<int>(1, 768);
194  K = internal::random<int>(1, 768);
195  M = (0 + M) * 1;
196  std::cout << M << " x " << N << " x " << K << "\n";
197  check_product(M, N, K);
198  }
199 }
int i
Definition: BiCGSTAB_step_by_step.cpp:9
void check_product(int M, int N, int K)
Definition: benchBlasGemm.cpp:158
Matrix< RealScalar, Dynamic, Dynamic > M
Definition: bench_gemm.cpp:50

References check_product(), i, PlanarWave::K, and N.

◆ main()

int main ( int argc  ,
char argv[] 
)
33  {
34 // disable SSE exceptions
35 #ifdef __GNUC__
36  {
37  int aux;
38  asm("stmxcsr %[aux] \n\t"
39  "orl $32832, %[aux] \n\t"
40  "ldmxcsr %[aux] \n\t"
41  :
42  : [aux] "m"(aux));
43  }
44 #endif
45 
46  int nbtries = 1, nbloops = 1, M, N, K;
47 
48  if (argc == 2) {
49  if (std::string(argv[1]) == "check")
50  check_product();
51  else
52  M = N = K = atoi(argv[1]);
53  } else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
54  M = N = K = atoi(argv[2]);
55  nbloops = 1000000000 / (M * M * M);
56  if (nbloops < 1) nbloops = 1;
57  nbtries = 6;
58  } else if (argc == 4) {
59  M = N = K = atoi(argv[1]);
60  nbloops = atoi(argv[2]);
61  nbtries = atoi(argv[3]);
62  } else if (argc == 6) {
63  M = atoi(argv[1]);
64  N = atoi(argv[2]);
65  K = atoi(argv[3]);
66  nbloops = atoi(argv[4]);
67  nbtries = atoi(argv[5]);
68  } else {
69  std::cout << "Usage: " << argv[0] << " size \n";
70  std::cout << "Usage: " << argv[0] << " auto size\n";
71  std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
72  std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
73  std::cout << "Usage: " << argv[0] << " check\n";
74  std::cout << "Options:\n";
75  std::cout << " size unique size of the 2 matrices (integer)\n";
76  std::cout << " auto automatically set the number of repetitions and tries\n";
77  std::cout << " nbloops number of times the GEMM routines is executed\n";
78  std::cout << " nbtries number of times the loop is benched (return the best try)\n";
79  std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
80  std::cout << " check check eigen product using cblas as a reference\n";
81  exit(1);
82  }
83 
84  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
85 
86  if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
87 
88  Scalar alpha, beta;
89  MyMatrix ma(M, K), mb(K, N), mc(M, N);
90  ma = MyMatrix::Random(M, K);
91  mb = MyMatrix::Random(K, N);
92  mc = MyMatrix::Random(M, N);
93 
95 
96  // we simply compute c += a*b, so:
97  alpha = 1;
98  beta = 1;
99 
100  // bench cblas
101  // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
102  if (!(std::string(argv[1]) == "auto")) {
103  timer.reset();
104  for (uint k = 0; k < nbtries; ++k) {
105  timer.start();
106  for (uint j = 0; j < nbloops; ++j)
107 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
108  CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
109  mc.data(), N);
110 #else
111  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
112  mc.data(), M);
113 #endif
114  timer.stop();
115  }
116  if (!(std::string(argv[1]) == "auto"))
117  std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
118  else
119  std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
120  }
121 
122  // clear
123  ma = MyMatrix::Random(M, K);
124  mb = MyMatrix::Random(K, N);
125  mc = MyMatrix::Random(M, N);
126 
127  // eigen
128  // if (!(std::string(argv[1])=="auto"))
129  {
130  timer.reset();
131  for (uint k = 0; k < nbtries; ++k) {
132  timer.start();
133  bench_eigengemm(mc, ma, mb, nbloops);
134  timer.stop();
135  }
136  if (!(std::string(argv[1]) == "auto"))
137  std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
138  else
139  std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
140  }
141 
142  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
143  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
144 
145  return 0;
146 }
float Scalar
Definition: benchBlasGemm.cpp:21
void bench_eigengemm(MyMatrix &mc, const MyMatrix &ma, const MyMatrix &mb, int nbloops)
Definition: benchBlasGemm.cpp:150
Definition: BenchTimer.h:55
RealScalar alpha
Definition: level1_cplx_impl.h:151
Scalar beta
Definition: level2_cplx_impl.h:36
char char char int int * k
Definition: level2_impl.h:374
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16 &a)
Definition: BFloat16.h:643
std::ptrdiff_t l1CacheSize()
Definition: products/GeneralBlockPanelKernel.h:3119
std::ptrdiff_t l2CacheSize()
Definition: products/GeneralBlockPanelKernel.h:3127
std::string string(const unsigned &i)
Definition: oomph_definitions.cc:286
double timer
Definition: oomph_metis_from_parmetis_3.1.1/struct.h:210

References alpha, bench_eigengemm(), beta, CBLAS_GEMM, check_product(), Eigen::PlainObjectBase< Derived >::data(), e(), Eigen::bfloat16_impl::floor(), j, k, PlanarWave::K, Eigen::l1CacheSize(), Eigen::l2CacheSize(), N, and oomph::Global_string_for_annotation::string().