Redux.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_REDUX_H
12#define EIGEN_REDUX_H
13
14namespace Eigen {
15
16namespace internal {
17
18// TODO
19// * implement other kind of vectorization
20// * factorize code
21
22/***************************************************************************
23* Part 1 : the logic deciding a strategy for vectorization and unrolling
24***************************************************************************/
25
26template<typename Func, typename Derived>
27struct redux_traits
28{
29public:
30 enum {
31 PacketSize = packet_traits<typename Derived::Scalar>::size,
32 InnerMaxSize = int(Derived::IsRowMajor)
33 ? Derived::MaxColsAtCompileTime
34 : Derived::MaxRowsAtCompileTime
35 };
36
37 enum {
38 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39 && (functor_traits<Func>::PacketAccess),
40 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42 };
43
44public:
45 enum {
46 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48 : int(DefaultTraversal)
49 };
50
51public:
52 enum {
53 Cost = ( Derived::SizeAtCompileTime == Dynamic
54 || Derived::CoeffReadCost == Dynamic
55 || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56 ) ? Dynamic
57 : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58 + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60 };
61
62public:
63 enum {
64 Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65 ? CompleteUnrolling
66 : NoUnrolling
67 };
68};
69
70/***************************************************************************
71* Part 2 : unrollers
72***************************************************************************/
73
74/*** no vectorization ***/
75
76template<typename Func, typename Derived, int Start, int Length>
77struct redux_novec_unroller
78{
79 enum {
80 HalfLength = Length/2
81 };
82
83 typedef typename Derived::Scalar Scalar;
84
85 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86 {
87 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89 }
90};
91
92template<typename Func, typename Derived, int Start>
93struct redux_novec_unroller<Func, Derived, Start, 1>
94{
95 enum {
96 outer = Start / Derived::InnerSizeAtCompileTime,
97 inner = Start % Derived::InnerSizeAtCompileTime
98 };
99
100 typedef typename Derived::Scalar Scalar;
101
102 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103 {
104 return mat.coeffByOuterInner(outer, inner);
105 }
106};
107
108// This is actually dead code and will never be called. It is required
109// to prevent false warnings regarding failed inlining though
110// for 0 length run() will never be called at all.
111template<typename Func, typename Derived, int Start>
112struct redux_novec_unroller<Func, Derived, Start, 0>
113{
114 typedef typename Derived::Scalar Scalar;
115 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116};
117
118/*** vectorization ***/
119
120template<typename Func, typename Derived, int Start, int Length>
121struct redux_vec_unroller
122{
123 enum {
124 PacketSize = packet_traits<typename Derived::Scalar>::size,
125 HalfLength = Length/2
126 };
127
128 typedef typename Derived::Scalar Scalar;
129 typedef typename packet_traits<Scalar>::type PacketScalar;
130
131 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132 {
133 return func.packetOp(
134 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136 }
137};
138
139template<typename Func, typename Derived, int Start>
140struct redux_vec_unroller<Func, Derived, Start, 1>
141{
142 enum {
143 index = Start * packet_traits<typename Derived::Scalar>::size,
144 outer = index / int(Derived::InnerSizeAtCompileTime),
145 inner = index % int(Derived::InnerSizeAtCompileTime),
146 alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147 };
148
149 typedef typename Derived::Scalar Scalar;
150 typedef typename packet_traits<Scalar>::type PacketScalar;
151
152 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153 {
154 return mat.template packetByOuterInner<alignment>(outer, inner);
155 }
156};
157
158/***************************************************************************
159* Part 3 : implementation of all cases
160***************************************************************************/
161
162template<typename Func, typename Derived,
163 int Traversal = redux_traits<Func, Derived>::Traversal,
164 int Unrolling = redux_traits<Func, Derived>::Unrolling
165>
166struct redux_impl;
167
168template<typename Func, typename Derived>
169struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170{
171 typedef typename Derived::Scalar Scalar;
172 typedef typename Derived::Index Index;
173 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174 {
175 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176 Scalar res;
177 res = mat.coeffByOuterInner(0, 0);
178 for(Index i = 1; i < mat.innerSize(); ++i)
179 res = func(res, mat.coeffByOuterInner(0, i));
180 for(Index i = 1; i < mat.outerSize(); ++i)
181 for(Index j = 0; j < mat.innerSize(); ++j)
182 res = func(res, mat.coeffByOuterInner(i, j));
183 return res;
184 }
185};
186
187template<typename Func, typename Derived>
188struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190{};
191
192template<typename Func, typename Derived>
193struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194{
195 typedef typename Derived::Scalar Scalar;
196 typedef typename packet_traits<Scalar>::type PacketScalar;
197 typedef typename Derived::Index Index;
198
199 static Scalar run(const Derived& mat, const Func& func)
200 {
201 const Index size = mat.size();
202 eigen_assert(size && "you are using an empty matrix");
203 const Index packetSize = packet_traits<Scalar>::size;
204 const Index alignedStart = internal::first_aligned(mat);
205 enum {
206 alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
208 };
209 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
210 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211 const Index alignedEnd2 = alignedStart + alignedSize2;
212 const Index alignedEnd = alignedStart + alignedSize;
213 Scalar res;
214 if(alignedSize)
215 {
216 PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218 {
219 PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
221 {
222 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224 }
225
226 packet_res0 = func.packetOp(packet_res0,packet_res1);
227 if(alignedEnd>alignedEnd2)
228 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229 }
230 res = func.predux(packet_res0);
231
232 for(Index index = 0; index < alignedStart; ++index)
233 res = func(res,mat.coeff(index));
234
235 for(Index index = alignedEnd; index < size; ++index)
236 res = func(res,mat.coeff(index));
237 }
238 else // too small to vectorize anything.
239 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240 {
241 res = mat.coeff(0);
242 for(Index index = 1; index < size; ++index)
243 res = func(res,mat.coeff(index));
244 }
245
246 return res;
247 }
248};
249
250template<typename Func, typename Derived>
251struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
252{
253 typedef typename Derived::Scalar Scalar;
254 typedef typename packet_traits<Scalar>::type PacketScalar;
255 typedef typename Derived::Index Index;
256
257 static Scalar run(const Derived& mat, const Func& func)
258 {
259 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
260 const Index innerSize = mat.innerSize();
261 const Index outerSize = mat.outerSize();
262 enum {
263 packetSize = packet_traits<Scalar>::size
264 };
265 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
266 Scalar res;
267 if(packetedInnerSize)
268 {
269 PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
270 for(Index j=0; j<outerSize; ++j)
271 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
272 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
273
274 res = func.predux(packet_res);
275 for(Index j=0; j<outerSize; ++j)
276 for(Index i=packetedInnerSize; i<innerSize; ++i)
277 res = func(res, mat.coeffByOuterInner(j,i));
278 }
279 else // too small to vectorize anything.
280 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
281 {
282 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
283 }
284
285 return res;
286 }
287};
288
289template<typename Func, typename Derived>
290struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
291{
292 typedef typename Derived::Scalar Scalar;
293 typedef typename packet_traits<Scalar>::type PacketScalar;
294 enum {
295 PacketSize = packet_traits<Scalar>::size,
296 Size = Derived::SizeAtCompileTime,
297 VectorizedSize = (Size / PacketSize) * PacketSize
298 };
299 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
300 {
301 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
302 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
303 if (VectorizedSize != Size)
304 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
305 return res;
306 }
307};
308
309} // end namespace internal
310
311/***************************************************************************
312* Part 4 : public API
313***************************************************************************/
314
315
323template<typename Derived>
324template<typename Func>
325EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
326DenseBase<Derived>::redux(const Func& func) const
327{
328 typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
329 return internal::redux_impl<Func, ThisNested>
330 ::run(derived(), func);
331}
332
335template<typename Derived>
336EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
338{
339 return this->redux(Eigen::internal::scalar_min_op<Scalar>());
340}
341
344template<typename Derived>
345EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
347{
348 return this->redux(Eigen::internal::scalar_max_op<Scalar>());
349}
350
355template<typename Derived>
356EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
358{
359 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
360 return Scalar(0);
361 return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
362}
363
368template<typename Derived>
369EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
371{
372 return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
373}
374
382template<typename Derived>
383EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
385{
386 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
387 return Scalar(1);
388 return this->redux(Eigen::internal::scalar_product_op<Scalar>());
389}
390
397template<typename Derived>
398EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
400{
401 return derived().diagonal().sum();
402}
403
404} // end namespace Eigen
405
406#endif // EIGEN_REDUX_H
internal::traits< Derived >::Scalar maxCoeff() const
Definition Redux.h:346
Scalar prod() const
Definition Redux.h:384
internal::traits< Derived >::Scalar minCoeff() const
Definition Redux.h:337
Scalar sum() const
Definition Redux.h:357
@ SizeAtCompileTime
Definition DenseBase.h:105
Scalar mean() const
Definition Redux.h:370
Scalar trace() const
Definition Redux.h:399
@ Aligned
Definition Constants.h:189
@ Unaligned
Definition Constants.h:187
const unsigned int AlignedBit
Definition Constants.h:142
const unsigned int ActualPacketAccessBit
Definition Constants.h:87
const unsigned int LinearAccessBit
Definition Constants.h:112
Definition LDLT.h:18