591 for (;
j + vectorSize <=
rows;
j += vectorSize) {
592 const DataMapper lhs2 = UseLhs ? lhs.getSubMapper(
j, 0) : lhs.getSubMapper(0,
j);
595 if (PanelMode) ri += vectorSize * offset;
597 dhs_copy<4>(blockA, lhs2,
i, ri, depth, vectorSize);
598 dhs_copy<2>(blockA, lhs2,
i, ri, depth, vectorSize);
599 dhs_copy<1>(blockA, lhs2,
i, ri, depth, vectorSize);
601 for (;
i < depth;
i++) {
602 if (((StorageOrder ==
RowMajor) && UseLhs) || ((StorageOrder ==
ColMajor) && !UseLhs)) {
604 blockA[ri + 0] = lhs2(0,
i);
605 blockA[ri + 1] = lhs2(1,
i);
606 blockA[ri + 2] = lhs2(2,
i);
607 blockA[ri + 3] = lhs2(3,
i);
609 blockA[ri + 0] = lhs2(
i, 0);
610 blockA[ri + 1] = lhs2(
i, 1);
611 blockA[ri + 2] = lhs2(
i, 2);
612 blockA[ri + 3] = lhs2(
i, 3);
617 lhsV = lhs2.template loadPacket<Packet>(0,
i);
619 lhsV = lhs2.template loadPacket<Packet>(
i, 0);
621 pstore<Scalar>(blockA + ri, lhsV);
627 if (PanelMode) ri += vectorSize * (stride - offset - depth);
631 if (PanelMode) ri += offset;
634 const DataMapper lhs2 = lhs.getSubMapper(0,
j);
636 blockA[ri] = lhs2(
i, 0);
640 if (PanelMode) ri += stride - depth;
644 if (PanelMode) ri += offset * (
rows -
j);
649 blockA[ri] = lhs(
k,
i);
int rows
Definition: Tutorial_commainit_02.cpp:1
@ vectorsize
Definition: MatrixProduct.h:67
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2