2 #include "../../InternalHeaderCheck.h"
7 #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
14 : gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
18 asm volatile(
"vmla.f32 %q[r], %q[c], %q[alpha]" : [
r]
"+w"(
r) : [
c]
"w"(
c), [
alpha]
"w"(
alpha) :);
21 template <
typename LaneIdType>
26 template <
typename LaneIdType>
28 const LaneIdType& lane)
const {
37 #ifndef EIGEN_NEON_GEBP_NR
38 #define EIGEN_NEON_GEBP_NR 8
43 : gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
46 enum {
nr = EIGEN_NEON_GEBP_NR };
58 const FixedInt<0>&)
const {
59 c = vfmaq_n_f32(
c,
a,
b);
65 const FixedInt<0>&)
const {
66 madd_helper<0>(
a,
b,
c);
69 const FixedInt<1>&)
const {
70 madd_helper<1>(
a,
b,
c);
73 const FixedInt<2>&)
const {
74 madd_helper<2>(
a,
b,
c);
77 const FixedInt<3>&)
const {
78 madd_helper<3>(
a,
b,
c);
84 #if EIGEN_GNUC_STRICT_LESS_THAN(9, 0, 0)
89 asm(
"fmla %0.4s, %1.4s, %2.s[0]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b) :);
91 asm(
"fmla %0.4s, %1.4s, %2.s[1]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b) :);
93 asm(
"fmla %0.4s, %1.4s, %2.s[2]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b) :);
95 asm(
"fmla %0.4s, %1.4s, %2.s[3]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b) :);
97 c = vfmaq_laneq_f32(
c,
a,
b, LaneID);
104 : gebp_traits<double, double, false, false, Architecture::Generic> {
106 enum {
nr = EIGEN_NEON_GEBP_NR };
108 float64x2_t
B_0, B_1;
114 dest.B_0 = vld1q_f64(
b);
115 dest.B_1 = vld1q_f64(
b + 2);
125 const FixedInt<0>&)
const {
126 c = vfmaq_n_f64(
c,
a,
b);
133 const FixedInt<0>&)
const {
134 madd_helper<0>(
a,
b,
c);
137 const FixedInt<1>&)
const {
138 madd_helper<1>(
a,
b,
c);
141 const FixedInt<2>&)
const {
142 madd_helper<2>(
a,
b,
c);
145 const FixedInt<3>&)
const {
146 madd_helper<3>(
a,
b,
c);
150 template <
int LaneID>
152 #if EIGEN_GNUC_STRICT_LESS_THAN(9, 0, 0)
157 asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b.B_0) :);
158 else if (LaneID == 1)
159 asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b.B_0) :);
160 else if (LaneID == 2)
161 asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b.B_1) :);
162 else if (LaneID == 3)
163 asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w"(
c) :
"w"(
a),
"w"(
b.B_1) :);
166 c = vfmaq_laneq_f64(
c,
a,
b.B_0, 0);
167 else if (LaneID == 1)
168 c = vfmaq_laneq_f64(
c,
a,
b.B_0, 1);
169 else if (LaneID == 2)
170 c = vfmaq_laneq_f64(
c,
a,
b.B_1, 0);
171 else if (LaneID == 3)
172 c = vfmaq_laneq_f64(
c,
a,
b.B_1, 1);
181 #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
184 struct gebp_traits<half, half, false, false, Architecture::
NEON>
185 : gebp_traits<half, half, false, false, Architecture::Generic> {
188 typedef float16x4_t PacketHalf;
189 enum {
nr = EIGEN_NEON_GEBP_NR };
202 eigen_assert(
false &&
"Cannot loadRhsQuad for a scalar RHS.");
206 const FixedInt<0>&)
const {
207 c = vfmaq_n_f16(
c,
a,
b);
210 const FixedInt<0>&)
const {
211 c = vfma_n_f16(
c,
a,
b);
217 const FixedInt<0>&)
const {
218 madd_helper<0>(
a,
b,
c);
221 const FixedInt<1>&)
const {
222 madd_helper<1>(
a,
b,
c);
225 const FixedInt<2>&)
const {
226 madd_helper<2>(
a,
b,
c);
229 const FixedInt<3>&)
const {
230 madd_helper<3>(
a,
b,
c);
234 template <
int LaneID>
236 c = vfmaq_lane_f16(
c,
a,
b, LaneID);
#define eigen_assert(x)
Definition: Macros.h:910
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
Scalar * b
Definition: benchVecAdd.cpp:17
RhsScalar_ RhsScalar
Definition: products/GeneralBlockPanelKernel.h:400
EIGEN_STRONG_INLINE void acc(const AccPacket &c, const ResPacket &alpha, ResPacket &r) const
Definition: products/GeneralBlockPanelKernel.h:499
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition: products/GeneralBlockPanelKernel.h:448
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition: products/GeneralBlockPanelKernel.h:457
QuadPacket< RhsPacket > RhsPacketx4
Definition: products/GeneralBlockPanelKernel.h:442
ResPacket AccPacket
Definition: products/GeneralBlockPanelKernel.h:443
std::conditional_t< Vectorizable, ResPacket_, ResScalar > ResPacket
Definition: products/GeneralBlockPanelKernel.h:439
std::conditional_t< Vectorizable, RhsPacket_, RhsScalar > RhsPacket
Definition: products/GeneralBlockPanelKernel.h:438
std::conditional_t< Vectorizable, LhsPacket_, LhsScalar > LhsPacket
Definition: products/GeneralBlockPanelKernel.h:437
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, RhsPacket &dest) const
Definition: products/GeneralBlockPanelKernel.h:463
@ nr
Definition: products/GeneralBlockPanelKernel.h:418
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const LaneIdType &) const
Definition: products/GeneralBlockPanelKernel.h:476
RealScalar alpha
Definition: level1_cplx_impl.h:151
const Scalar * a
Definition: level2_cplx_impl.h:32
Eigen::Matrix< Scalar, Dynamic, Dynamic, ColMajor > tmp
Definition: level3_impl.h:365
@ NEON
Definition: Constants.h:473
@ GEBPPacketFull
Definition: products/GeneralBlockPanelKernel.h:20
__vector float Packet4f
Definition: AltiVec/PacketMath.h:33
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
int c
Definition: calibrate.py:100
Definition: Eigen_Colamd.h:49
Packet B_0
Definition: products/GeneralBlockPanelKernel.h:344