PartialReduxEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_PARTIALREDUX_H
11 #define EIGEN_PARTIALREDUX_H
12 
13 // IWYU pragma: private
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 /***************************************************************************
21  *
22  * This file provides evaluators for partial reductions.
23  * There are two modes:
24  *
25  * - scalar path: simply calls the respective function on the column or row.
26  * -> nothing special here, all the tricky part is handled by the return
27  * types of VectorwiseOp's members. They embed the functor calling the
28  * respective DenseBase's member function.
29  *
30  * - vectorized path: implements a packet-wise reductions followed by
31  * some (optional) processing of the outcome, e.g., division by n for mean.
32  *
33  * For the vectorized path let's observe that the packet-size and outer-unrolling
34  * are both decided by the assignment logic. So all we have to do is to decide
35  * on the inner unrolling.
36  *
37  * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
38  * but be need to be careful to specify correct increment.
39  *
40  ***************************************************************************/
41 
42 /* logic deciding a strategy for unrolling of vectorized paths */
43 template <typename Func, typename Evaluator>
45  enum {
46  OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
48  : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits<Func>::Cost,
50  };
51 };
52 
53 /* Value to be returned when size==0 , by default let's return 0 */
54 template <typename PacketType, typename Func>
56  const typename unpacket_traits<PacketType>::type zero(0);
57  return pset1<PacketType>(zero);
58 }
59 
60 /* For products the default is 1 */
61 template <typename PacketType, typename Scalar>
63  return pset1<PacketType>(Scalar(1));
64 }
65 
66 /* Perform the actual reduction */
67 template <typename Func, typename Evaluator, int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling>
69 
70 /* Perform the actual reduction with unrolling */
71 template <typename Func, typename Evaluator>
72 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> {
74  typedef typename Evaluator::Scalar Scalar;
75 
76  template <typename PacketType>
77  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) {
78  return redux_vec_unroller<Func, Evaluator, 0,
80  func);
81  }
82 };
83 
84 /* Add a specialization of redux_vec_unroller for size==0 at compiletime.
85  * This specialization is not required for general reductions, which is
86  * why it is defined here.
87  */
88 template <typename Func, typename Evaluator, Index Start>
89 struct redux_vec_unroller<Func, Evaluator, Start, 0> {
90  template <typename PacketType>
91  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator&, const Func& f) {
92  return packetwise_redux_empty_value<PacketType>(f);
93  }
94 };
95 
96 /* Perform the actual reduction for dynamic sizes */
97 template <typename Func, typename Evaluator>
98 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> {
99  typedef typename Evaluator::Scalar Scalar;
101 
102  template <typename PacketType>
103  EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) {
104  if (size == 0) return packetwise_redux_empty_value<PacketType>(func);
105 
106  const Index size4 = (size - 1) & (~3);
107  PacketType p = eval.template packetByOuterInner<Unaligned, PacketType>(0, 0);
108  Index i = 1;
109  // This loop is optimized for instruction pipelining:
110  // - each iteration generates two independent instructions
111  // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
112  for (; i < size4; i += 4)
113  p = func.packetOp(
114  p, func.packetOp(func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 0, 0),
115  eval.template packetByOuterInner<Unaligned, PacketType>(i + 1, 0)),
116  func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 2, 0),
117  eval.template packetByOuterInner<Unaligned, PacketType>(i + 3, 0))));
118  for (; i < size; ++i) p = func.packetOp(p, eval.template packetByOuterInner<Unaligned, PacketType>(i, 0));
119  return p;
120  }
121 };
122 
123 template <typename ArgType, typename MemberOp, int Direction>
124 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
125  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > {
130  typedef typename ArgType::Scalar InputScalar;
131  typedef typename XprType::Scalar Scalar;
132  enum {
133  TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
134  };
135  typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
136  enum {
137  CoeffReadCost = TraversalSize == Dynamic ? HugeCost
138  : TraversalSize == 0
139  ? 1
141 
143 
144  Vectorizable_ = bool(int(ArgFlags_) & PacketAccessBit) && bool(MemberOp::Vectorizable) &&
145  (Direction == int(Vertical) ? bool(ArgFlags_ & RowMajorBit) : (ArgFlags_ & RowMajorBit) == 0) &&
146  (TraversalSize != 0),
147 
149  (Vectorizable_ ? PacketAccessBit : 0) | LinearAccessBit,
150 
151  Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
152  };
153 
154  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {
156  : (TraversalSize == 0 ? 1 : int(CostOpType::value)));
157  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
158  }
159 
160  typedef typename XprType::CoeffReturnType CoeffReturnType;
161 
163  return coeff(Direction == Vertical ? j : i);
164  }
165 
167  return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
168  }
169 
170  template <int LoadMode, typename PacketType>
172  return packet<LoadMode, PacketType>(Direction == Vertical ? j : i);
173  }
174 
175  template <int LoadMode, typename PacketType>
177  enum { PacketSize = internal::unpacket_traits<PacketType>::size };
178  typedef Block<const ArgTypeNestedCleaned, Direction == Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
179  Direction == Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime), true /* InnerPanel */>
180  PanelType;
181 
182  PanelType panel(m_arg, Direction == Vertical ? 0 : idx, Direction == Vertical ? idx : 0,
183  Direction == Vertical ? m_arg.rows() : Index(PacketSize),
184  Direction == Vertical ? Index(PacketSize) : m_arg.cols());
185 
186  // FIXME
187  // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of
188  // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just
189  // by pass "vectorization" in this case:
190  if (PacketSize == 1) return internal::pset1<PacketType>(coeff(idx));
191 
192  typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;
193  PanelEvaluator panel_eval(panel);
194  typedef typename MemberOp::BinaryOp BinaryOp;
196  panel_eval, m_functor.binaryFunc(), m_arg.outerSize());
197  return p;
198  }
199 
200  protected:
202  const MemberOp m_functor;
203 };
204 
205 } // end namespace internal
206 
207 } // end namespace Eigen
208 
209 #endif // EIGEN_PARTIALREDUX_H
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Direction
An enum that indicates the direction in Cartesian coordinates.
Definition: GeneralDefine.h:56
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:892
#define EIGEN_STRONG_INLINE
Definition: Macros.h:834
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:23
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C)
Definition: StaticAssert.h:101
float * p
Definition: Tutorial_Map_using.cpp:9
Scalar Scalar int size
Definition: benchVecAdd.cpp:17
SCALAR Scalar
Definition: bench_gemm.cpp:45
Expression of a fixed-size or dynamic-size block.
Definition: Block.h:110
Generic expression of a partially reduxed matrix.
Definition: VectorwiseOp.h:58
Definition: Redux.h:387
static int f(const TensorMap< Tensor< int, 3 > > &tensor)
Definition: cxx11_tensor_map.cpp:237
DirectionType
Definition: Constants.h:263
@ CompleteUnrolling
Definition: Constants.h:306
@ NoUnrolling
Definition: Constants.h:301
@ Vertical
Definition: Constants.h:266
const unsigned int PacketAccessBit
Definition: Constants.h:97
const unsigned int LinearAccessBit
Definition: Constants.h:133
const unsigned int RowMajorBit
Definition: Constants.h:70
return int(ret)+1
typename remove_all< T >::type remove_all_t
Definition: Meta.h:142
EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const Func &)
Definition: PartialReduxEvaluator.h:55
typename add_const_on_value_type< T >::type add_const_on_value_type_t
Definition: Meta.h:274
Namespace containing all symbols from the Eigen library.
Definition: bench_norm.cpp:70
squared absolute value
Definition: GlobalFunctions.h:87
const unsigned int HereditaryBits
Definition: Constants.h:198
const int HugeCost
Definition: Constants.h:48
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:83
const int Dynamic
Definition: Constants.h:25
Extend namespace for flags.
Definition: fsi_chan_precond_driver.cc:56
Definition: Eigen_Colamd.h:49
Definition: TensorMeta.h:47
Definition: XprHelper.h:427
ArgType::Scalar InputScalar
Definition: PartialReduxEvaluator.h:130
add_const_on_value_type_t< ArgTypeNested > ConstArgTypeNested
Definition: PartialReduxEvaluator.h:128
PartialReduxExpr< ArgType, MemberOp, Direction > XprType
Definition: PartialReduxEvaluator.h:126
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const
Definition: PartialReduxEvaluator.h:176
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
Definition: PartialReduxEvaluator.h:166
MemberOp::template Cost< int(TraversalSize)> CostOpType
Definition: PartialReduxEvaluator.h:135
internal::remove_all_t< ArgTypeNested > ArgTypeNestedCleaned
Definition: PartialReduxEvaluator.h:129
internal::nested_eval< ArgType, 1 >::type ArgTypeNested
Definition: PartialReduxEvaluator.h:127
ConstArgTypeNested m_arg
Definition: PartialReduxEvaluator.h:201
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index i, Index j) const
Definition: PartialReduxEvaluator.h:171
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
Definition: PartialReduxEvaluator.h:162
XprType::Scalar Scalar
Definition: PartialReduxEvaluator.h:131
const MemberOp m_functor
Definition: PartialReduxEvaluator.h:202
EIGEN_DEVICE_FUNC evaluator(const XprType xpr)
Definition: PartialReduxEvaluator.h:154
XprType::CoeffReturnType CoeffReturnType
Definition: PartialReduxEvaluator.h:160
Definition: CoreEvaluators.h:118
Definition: CoreEvaluators.h:104
Definition: XprHelper.h:205
std::conditional_t< Evaluate, PlainObject, typename ref_selector< T >::type > type
Definition: XprHelper.h:549
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func &func, Index)
Definition: PartialReduxEvaluator.h:77
redux_novec_unroller< Func, Evaluator, 0, Evaluator::SizeAtCompileTime > Base
Definition: PartialReduxEvaluator.h:73
Evaluator::Scalar Scalar
Definition: PartialReduxEvaluator.h:74
static EIGEN_DEVICE_FUNC PacketType run(const Evaluator &eval, const Func &func, Index size)
Definition: PartialReduxEvaluator.h:103
Evaluator::Scalar Scalar
Definition: PartialReduxEvaluator.h:99
redux_traits< Func, Evaluator >::PacketType PacketScalar
Definition: PartialReduxEvaluator.h:100
Definition: PartialReduxEvaluator.h:68
Definition: PartialReduxEvaluator.h:44
@ Cost
Definition: PartialReduxEvaluator.h:47
@ OuterSize
Definition: PartialReduxEvaluator.h:46
find_best_packet< typename Evaluator::Scalar, Evaluator::SizeAtCompileTime >::type PacketType
Definition: Redux.h:32
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func &f)
Definition: PartialReduxEvaluator.h:91
Template functor to compute the product of two scalars.
Definition: BinaryFunctors.h:73
Definition: ForwardDeclarations.h:21
Definition: GenericPacketMath.h:134
Definition: benchGeometry.cpp:21
EIGEN_DONT_INLINE Scalar zero()
Definition: svd_common.h:232
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
Definition: ZVector/PacketMath.h:50