Eigen  3.4.90 (git rev 9589cc4e7fd8e4538bedef80dd36c7738977a8be)
 
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
Loading...
Searching...
No Matches
PartialReduxEvaluator.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_PARTIALREDUX_H
11#define EIGEN_PARTIALREDUX_H
12
13// IWYU pragma: private
14#include "./InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20/***************************************************************************
21 *
22 * This file provides evaluators for partial reductions.
23 * There are two modes:
24 *
25 * - scalar path: simply calls the respective function on the column or row.
26 * -> nothing special here, all the tricky part is handled by the return
27 * types of VectorwiseOp's members. They embed the functor calling the
28 * respective DenseBase's member function.
29 *
30 * - vectorized path: implements a packet-wise reductions followed by
31 * some (optional) processing of the outcome, e.g., division by n for mean.
32 *
33 * For the vectorized path let's observe that the packet-size and outer-unrolling
34 * are both decided by the assignment logic. So all we have to do is to decide
35 * on the inner unrolling.
36 *
37 * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
38 * but be need to be careful to specify correct increment.
39 *
40 ***************************************************************************/
41
42/* logic deciding a strategy for unrolling of vectorized paths */
43template <typename Func, typename Evaluator>
44struct packetwise_redux_traits {
45 enum {
46 OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
47 Cost = OuterSize == Dynamic ? HugeCost
48 : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits<Func>::Cost,
49 Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
50 };
51};
52
53/* Value to be returned when size==0 , by default let's return 0 */
54template <typename PacketType, typename Func>
55EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const Func&) {
56 const typename unpacket_traits<PacketType>::type zero(0);
57 return pset1<PacketType>(zero);
58}
59
60/* For products the default is 1 */
61template <typename PacketType, typename Scalar>
62EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar, Scalar>&) {
63 return pset1<PacketType>(Scalar(1));
64}
65
66/* Perform the actual reduction */
67template <typename Func, typename Evaluator, int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling>
68struct packetwise_redux_impl;
69
70/* Perform the actual reduction with unrolling */
71template <typename Func, typename Evaluator>
72struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> {
73 typedef redux_novec_unroller<Func, Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
74 typedef typename Evaluator::Scalar Scalar;
75
76 template <typename PacketType>
77 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) {
78 return redux_vec_unroller<Func, Evaluator, 0,
79 packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,
80 func);
81 }
82};
83
84/* Add a specialization of redux_vec_unroller for size==0 at compiletime.
85 * This specialization is not required for general reductions, which is
86 * why it is defined here.
87 */
88template <typename Func, typename Evaluator, Index Start>
89struct redux_vec_unroller<Func, Evaluator, Start, 0> {
90 template <typename PacketType>
91 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator&, const Func& f) {
92 return packetwise_redux_empty_value<PacketType>(f);
93 }
94};
95
96/* Perform the actual reduction for dynamic sizes */
97template <typename Func, typename Evaluator>
98struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> {
99 typedef typename Evaluator::Scalar Scalar;
100 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
101
102 template <typename PacketType>
103 EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) {
104 if (size == 0) return packetwise_redux_empty_value<PacketType>(func);
105
106 const Index size4 = (size - 1) & (~3);
107 PacketType p = eval.template packetByOuterInner<Unaligned, PacketType>(0, 0);
108 Index i = 1;
109 // This loop is optimized for instruction pipelining:
110 // - each iteration generates two independent instructions
111 // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
112 for (; i < size4; i += 4)
113 p = func.packetOp(
114 p, func.packetOp(func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 0, 0),
115 eval.template packetByOuterInner<Unaligned, PacketType>(i + 1, 0)),
116 func.packetOp(eval.template packetByOuterInner<Unaligned, PacketType>(i + 2, 0),
117 eval.template packetByOuterInner<Unaligned, PacketType>(i + 3, 0))));
118 for (; i < size; ++i) p = func.packetOp(p, eval.template packetByOuterInner<Unaligned, PacketType>(i, 0));
119 return p;
120 }
121};
122
123template <typename ArgType, typename MemberOp, int Direction>
124struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
125 : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > {
126 typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
127 typedef typename internal::nested_eval<ArgType, 1>::type ArgTypeNested;
128 typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
129 typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
130 typedef typename ArgType::Scalar InputScalar;
131 typedef typename XprType::Scalar Scalar;
132 enum {
133 TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
134 };
135 typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
136 enum {
137 CoeffReadCost = TraversalSize == Dynamic ? HugeCost
138 : TraversalSize == 0
139 ? 1
140 : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
141
142 ArgFlags_ = evaluator<ArgType>::Flags,
143
144 Vectorizable_ = bool(int(ArgFlags_) & PacketAccessBit) && bool(MemberOp::Vectorizable) &&
145 (Direction == int(Vertical) ? bool(ArgFlags_ & RowMajorBit) : (ArgFlags_ & RowMajorBit) == 0) &&
146 (TraversalSize != 0),
147
148 Flags = (traits<XprType>::Flags & RowMajorBit) | (evaluator<ArgType>::Flags & (HereditaryBits & (~RowMajorBit))) |
149 (Vectorizable_ ? PacketAccessBit : 0) | LinearAccessBit,
150
151 Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
152 };
153
154 EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) {
155 EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize == Dynamic ? HugeCost
156 : (TraversalSize == 0 ? 1 : int(CostOpType::value)));
157 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
158 }
159
160 typedef typename XprType::CoeffReturnType CoeffReturnType;
161
162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const {
163 return coeff(Direction == Vertical ? j : i);
164 }
165
166 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const {
167 return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
168 }
169
170 template <int LoadMode, typename PacketType>
171 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index i, Index j) const {
172 return packet<LoadMode, PacketType>(Direction == Vertical ? j : i);
173 }
174
175 template <int LoadMode, typename PacketType>
176 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const {
177 enum { PacketSize = internal::unpacket_traits<PacketType>::size };
178 typedef Block<const ArgTypeNestedCleaned, Direction == Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
179 Direction == Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime), true /* InnerPanel */>
180 PanelType;
181
182 PanelType panel(m_arg, Direction == Vertical ? 0 : idx, Direction == Vertical ? idx : 0,
183 Direction == Vertical ? m_arg.rows() : Index(PacketSize),
184 Direction == Vertical ? Index(PacketSize) : m_arg.cols());
185
186 // FIXME
187 // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of
188 // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just
189 // by pass "vectorization" in this case:
190 if (PacketSize == 1) return internal::pset1<PacketType>(coeff(idx));
191
192 typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;
193 PanelEvaluator panel_eval(panel);
194 typedef typename MemberOp::BinaryOp BinaryOp;
195 PacketType p = internal::packetwise_redux_impl<BinaryOp, PanelEvaluator>::template run<PacketType>(
196 panel_eval, m_functor.binaryFunc(), m_arg.outerSize());
197 return p;
198 }
199
200 protected:
201 ConstArgTypeNested m_arg;
202 const MemberOp m_functor;
203};
204
205} // end namespace internal
206
207} // end namespace Eigen
208
209#endif // EIGEN_PARTIALREDUX_H
DirectionType
Definition Constants.h:263
@ Vertical
Definition Constants.h:266
const unsigned int PacketAccessBit
Definition Constants.h:97
const unsigned int LinearAccessBit
Definition Constants.h:133
const unsigned int RowMajorBit
Definition Constants.h:70
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
const int HugeCost
Definition Constants.h:48
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82
const int Dynamic
Definition Constants.h:25