165 std::cout <<
"L1 cache size = " << (l1 > 0 ? l1 / 1024 : -1) <<
" KB\n";
166 std::cout <<
"L2/L3 cache size = " << (l2 > 0 ? l2 / 1024 : -1) <<
" KB\n";
167 typedef internal::gebp_traits<Scalar, Scalar> Traits;
168 std::cout <<
"Register blocking = " << Traits::mr <<
" x " << Traits::nr <<
"\n";
177 int cache_size1 = -1, cache_size2 = l2, cache_size3 = 0;
179 bool need_help =
false;
180 for (
int i = 1;
i < argc;) {
181 if (argv[
i][0] ==
'-') {
182 if (argv[
i][1] ==
's') {
186 if (argv[
i][0] !=
'-') {
190 }
else if (argv[
i][1] ==
'c') {
192 cache_size1 = atoi(argv[
i++]);
193 if (argv[
i][0] !=
'-') {
194 cache_size2 = atoi(argv[
i++]);
195 if (argv[
i][0] !=
'-') cache_size3 = atoi(argv[
i++]);
197 }
else if (argv[
i][1] ==
't') {
198 tries = atoi(argv[++
i]);
200 }
else if (argv[
i][1] ==
'p') {
202 rep = atoi(argv[
i++]);
211 std::cout << argv[0] <<
" -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
212 std::cout <<
" <matrix sizes> : size\n";
213 std::cout <<
" <matrix sizes> : rows columns depth\n";
217 #if EIGEN_VERSION_AT_LEAST(3, 2, 90)
218 if (cache_size1 > 0)
setCpuCacheSizes(cache_size1, cache_size2, cache_size3);
229 std::cout <<
"Matrix sizes = " <<
m <<
"x" <<
p <<
" * " <<
p <<
"x" <<
n <<
"\n";
230 std::ptrdiff_t mc(
m), nc(
n), kc(
p);
231 internal::computeProductBlockingSizes<Scalar, Scalar>(kc, mc, nc);
232 std::cout <<
"blocking size (mc x kc) = " << mc <<
" x " << kc <<
" x " << nc <<
"\n";
237 #if defined EIGEN_HAS_OPENMP
239 int procs = omp_get_max_threads();
244 omp_set_num_threads(1);
245 r.noalias() +=
a *
b;
246 omp_set_num_threads(procs);
248 c.noalias() +=
a *
b;
249 if (!
r.isApprox(
c)) std::cerr <<
"Warning, your parallel product is crap!\n\n";
251 #elif defined HAVE_BLAS
253 c.noalias() +=
a *
b;
254 if (!
r.isApprox(
c)) {
255 std::cout << (
r -
c).norm() /
r.norm() <<
"\n";
256 std::cerr <<
"Warning, your product is crap!\n\n";
259 if (1. *
m *
n *
p < 2000. * 2000 * 2000) {
262 if (!
r.isApprox(
c)) {
263 std::cout << (
r -
c).norm() /
r.norm() <<
"\n";
264 std::cerr <<
"Warning, your product is crap!\n\n";
272 BENCH(tblas, tries, rep, blas_gemm(
a,
b,
c));
273 std::cout <<
"blas cpu " << tblas.
best(
CPU_TIMER) / rep <<
"s \t"
276 std::cout <<
"blas real " << tblas.
best(
REAL_TIMER) / rep <<
"s \t"
282 if (
b.norm() +
a.norm() == 123.554) std::cout <<
"\n";
287 std::cout <<
"eigen cpu " << tmt.
best(
CPU_TIMER) / rep <<
"s \t"
290 std::cout <<
"eigen real " << tmt.
best(
REAL_TIMER) / rep <<
"s \t"
294 #ifdef EIGEN_HAS_OPENMP
297 omp_set_num_threads(1);
301 std::cout <<
"eigen mono cpu " << tmono.
best(
CPU_TIMER) / rep <<
"s \t"
304 std::cout <<
"eigen mono real " << tmono.
best(
REAL_TIMER) / rep <<
"s \t"
312 if (1. *
m *
n *
p < 30 * 30 * 30) {
315 BENCH(tmt, tries, rep,
c.noalias() +=
a.lazyProduct(
b));
316 std::cout <<
"lazy cpu " << tmt.
best(
CPU_TIMER) / rep <<
"s \t"
341 std::cout <<
"\"matlab\" cpu " <<
t.best(
CPU_TIMER) / rep <<
"s \t"
344 std::cout <<
"\"matlab\" real " <<
t.best(
REAL_TIMER) / rep <<
"s \t"
362 std::cout <<
"\"matlab\" cpu " <<
t.best(
CPU_TIMER) / rep <<
"s \t"
365 std::cout <<
"\"matlab\" real " <<
t.best(
REAL_TIMER) / rep <<
"s \t"
383 std::cout <<
"\"matlab\" cpu " <<
t.best(
CPU_TIMER) / rep <<
"s \t"
386 std::cout <<
"\"matlab\" real " <<
t.best(
REAL_TIMER) / rep <<
"s \t"
#define BENCH(TIMER, TRIES, REP, CODE)
Definition: BenchTimer.h:150
int i
Definition: BiCGSTAB_step_by_step.cpp:9
const unsigned n
Definition: CG3DPackingUnitTest.cpp:11
Array< double, 1, 3 > e(1./3., 0.5, 2.)
float * p
Definition: Tutorial_Map_using.cpp:9
SCALAR Scalar
Definition: bench_gemm.cpp:45
void matlab_real_cplx(const M &a, const M &br, const M &bi, M &cr, M &ci)
Definition: bench_gemm.cpp:147
EIGEN_DONT_INLINE void gemm(const A &a, const B &b, C &c)
Definition: bench_gemm.cpp:158
void matlab_cplx_real(const M &ar, const M &ai, const M &b, M &cr, M &ci)
Definition: bench_gemm.cpp:152
void matlab_cplx_cplx(const M &ar, const M &ai, const M &br, const M &bi, M &cr, M &ci)
Definition: bench_gemm.cpp:139
Definition: BenchTimer.h:55
double best(int TIMER=CPU_TIMER) const
Definition: BenchTimer.h:98
double total(int TIMER=CPU_TIMER) const
Definition: BenchTimer.h:106
The matrix class, also used for vectors and row-vectors.
Definition: Eigen/Eigen/src/Core/Matrix.h:186
Definition: matrices.h:74
RealScalar s
Definition: level1_cplx_impl.h:130
int * m
Definition: level2_cplx_impl.h:294
int queryTopLevelCacheSize()
Definition: Memory.h:1307
int queryL1CacheSize()
Definition: Memory.h:1299
@ REAL_TIMER
Definition: BenchTimer.h:46
@ CPU_TIMER
Definition: BenchTimer.h:46
EIGEN_DEPRECATED void initParallel()
Definition: Parallelizer.h:50
void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
Definition: products/GeneralBlockPanelKernel.h:3146
void setNbThreads(int v)
Definition: Parallelizer.h:62
list rc
Definition: plotDoE.py:16
t
Definition: plotPSD.py:36
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:217