CoeffBasedProduct.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_COEFFBASED_PRODUCT_H
12#define EIGEN_COEFFBASED_PRODUCT_H
13
14namespace Eigen {
15
16namespace internal {
17
18/*********************************************************************************
19* Coefficient based product implementation.
20* It is designed for the following use cases:
21* - small fixed sizes
22* - lazy products
23*********************************************************************************/
24
25/* Since the all the dimensions of the product are small, here we can rely
26 * on the generic Assign mechanism to evaluate the product per coeff (or packet).
27 *
28 * Note that here the inner-loops should always be unrolled.
29 */
30
31template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
32struct product_coeff_impl;
33
34template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
35struct product_packet_impl;
36
37template<typename LhsNested, typename RhsNested, int NestingFlags>
38struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
39{
40 typedef MatrixXpr XprKind;
41 typedef typename remove_all<LhsNested>::type _LhsNested;
42 typedef typename remove_all<RhsNested>::type _RhsNested;
43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
45 typename traits<_RhsNested>::StorageKind>::ret StorageKind;
46 typedef typename promote_index_type<typename traits<_LhsNested>::Index,
47 typename traits<_RhsNested>::Index>::type Index;
48
49 enum {
50 LhsCoeffReadCost = _LhsNested::CoeffReadCost,
51 RhsCoeffReadCost = _RhsNested::CoeffReadCost,
52 LhsFlags = _LhsNested::Flags,
53 RhsFlags = _RhsNested::Flags,
54
55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
58
59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
61
62 LhsRowMajor = LhsFlags & RowMajorBit,
63 RhsRowMajor = RhsFlags & RowMajorBit,
64
65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
66
67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
68 && (ColsAtCompileTime == Dynamic
69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
70 && (RhsFlags&AlignedBit)
71 )
72 ),
73
74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
75 && (RowsAtCompileTime == Dynamic
76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
77 && (LhsFlags&AlignedBit)
78 )
79 ),
80
81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
83 : (RhsRowMajor && !CanVectorizeLhs),
84
85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
86 | (EvalToRowMajor ? RowMajorBit : 0)
87 | NestingFlags
88 | (LhsFlags & RhsFlags & AlignedBit)
89 // TODO enable vectorization for mixed types
90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
91
92 CoeffReadCost = InnerSize == Dynamic ? Dynamic
93 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
94 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
95
96 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
97 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
98 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
99 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
100 */
101 CanVectorizeInner = SameType
102 && LhsRowMajor
103 && (!RhsRowMajor)
104 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
105 && (LhsFlags & RhsFlags & AlignedBit)
106 && (InnerSize % packet_traits<Scalar>::size == 0)
107 };
108};
109
110} // end namespace internal
111
112template<typename LhsNested, typename RhsNested, int NestingFlags>
113class CoeffBasedProduct
114 : internal::no_assignment_operator,
115 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
116{
117 public:
118
119 typedef MatrixBase<CoeffBasedProduct> Base;
120 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
121 typedef typename Base::PlainObject PlainObject;
122
123 private:
124
125 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
126 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
127
128 enum {
129 PacketSize = internal::packet_traits<Scalar>::size,
130 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
131 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
132 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
133 };
134
135 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
136 Unroll ? InnerSize-1 : Dynamic,
137 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
138
139 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
140
141 public:
142
143 inline CoeffBasedProduct(const CoeffBasedProduct& other)
144 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
145 {}
146
147 template<typename Lhs, typename Rhs>
148 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
149 : m_lhs(lhs), m_rhs(rhs)
150 {
151 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
152 // We still allow to mix T and complex<T>.
153 EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
154 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
155 eigen_assert(lhs.cols() == rhs.rows()
156 && "invalid matrix product"
157 && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
158 }
159
160 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
161 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
162
163 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
164 {
165 Scalar res;
166 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
167 return res;
168 }
169
170 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
171 * which is why we don't set the LinearAccessBit.
172 */
173 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
174 {
175 Scalar res;
176 const Index row = RowsAtCompileTime == 1 ? 0 : index;
177 const Index col = RowsAtCompileTime == 1 ? index : 0;
178 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
179 return res;
180 }
181
182 template<int LoadMode>
183 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
184 {
185 PacketScalar res;
186 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
187 Unroll ? InnerSize-1 : Dynamic,
188 _LhsNested, _RhsNested, PacketScalar, LoadMode>
189 ::run(row, col, m_lhs, m_rhs, res);
190 return res;
191 }
192
193 // Implicit conversion to the nested type (trigger the evaluation of the product)
194 EIGEN_STRONG_INLINE operator const PlainObject& () const
195 {
196 m_result.lazyAssign(*this);
197 return m_result;
198 }
199
200 const _LhsNested& lhs() const { return m_lhs; }
201 const _RhsNested& rhs() const { return m_rhs; }
202
203 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
204 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
205
206 template<int DiagonalIndex>
207 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
208 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
209
210 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
211 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
212
213 protected:
214 typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
215 typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
216
217 mutable PlainObject m_result;
218};
219
220namespace internal {
221
222// here we need to overload the nested rule for products
223// such that the nested type is a const reference to a plain matrix
224template<typename Lhs, typename Rhs, int N, typename PlainObject>
225struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
226{
227 typedef PlainObject const& type;
228};
229
230/***************************************************************************
231* Normal product .coeff() implementation (with meta-unrolling)
232***************************************************************************/
233
234/**************************************
235*** Scalar path - no vectorization ***
236**************************************/
237
238template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
239struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
240{
241 typedef typename Lhs::Index Index;
242 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
243 {
244 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
245 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
246 }
247};
248
249template<typename Lhs, typename Rhs, typename RetScalar>
250struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
251{
252 typedef typename Lhs::Index Index;
253 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
254 {
255 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
256 }
257};
258
259template<typename Lhs, typename Rhs, typename RetScalar>
260struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
261{
262 typedef typename Lhs::Index Index;
263 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
264 {
265 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
266 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
267 for(Index i = 1; i < lhs.cols(); ++i)
268 res += lhs.coeff(row, i) * rhs.coeff(i, col);
269 }
270};
271
272/*******************************************
273*** Scalar path with inner vectorization ***
274*******************************************/
275
276template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
277struct product_coeff_vectorized_unroller
278{
279 typedef typename Lhs::Index Index;
280 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
281 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
282 {
283 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
284 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
285 }
286};
287
288template<typename Lhs, typename Rhs, typename Packet>
289struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
290{
291 typedef typename Lhs::Index Index;
292 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
293 {
294 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
295 }
296};
297
298template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
299struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
300{
301 typedef typename Lhs::PacketScalar Packet;
302 typedef typename Lhs::Index Index;
303 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
304 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
305 {
306 Packet pres;
307 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
308 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
309 res = predux(pres);
310 }
311};
312
313template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
314struct product_coeff_vectorized_dyn_selector
315{
316 typedef typename Lhs::Index Index;
317 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
318 {
319 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
320 }
321};
322
323// NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
324// NOTE maybe they are now useless since we have a specialization for Block<Matrix>
325template<typename Lhs, typename Rhs, int RhsCols>
326struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
327{
328 typedef typename Lhs::Index Index;
329 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
330 {
331 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
332 }
333};
334
335template<typename Lhs, typename Rhs, int LhsRows>
336struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
337{
338 typedef typename Lhs::Index Index;
339 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
340 {
341 res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
342 }
343};
344
345template<typename Lhs, typename Rhs>
346struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
347{
348 typedef typename Lhs::Index Index;
349 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
350 {
351 res = lhs.transpose().cwiseProduct(rhs).sum();
352 }
353};
354
355template<typename Lhs, typename Rhs, typename RetScalar>
356struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
357{
358 typedef typename Lhs::Index Index;
359 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
360 {
361 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
362 }
363};
364
365/*******************
366*** Packet path ***
367*******************/
368
369template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
370struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
371{
372 typedef typename Lhs::Index Index;
373 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
374 {
375 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
376 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
377 }
378};
379
380template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
381struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
382{
383 typedef typename Lhs::Index Index;
384 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
385 {
386 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
387 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
388 }
389};
390
391template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
392struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
393{
394 typedef typename Lhs::Index Index;
395 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
396 {
397 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
398 }
399};
400
401template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
402struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
403{
404 typedef typename Lhs::Index Index;
405 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
406 {
407 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
408 }
409};
410
411template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
412struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
413{
414 typedef typename Lhs::Index Index;
415 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
416 {
417 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
418 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
419 for(Index i = 1; i < lhs.cols(); ++i)
420 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
421 }
422};
423
424template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
425struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
426{
427 typedef typename Lhs::Index Index;
428 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
429 {
430 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
431 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
432 for(Index i = 1; i < lhs.cols(); ++i)
433 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
434 }
435};
436
437} // end namespace internal
438
439} // end namespace Eigen
440
441#endif // EIGEN_COEFFBASED_PRODUCT_H
@ RowMajor
Definition Constants.h:259
@ ColMajor
Definition Constants.h:257
const unsigned int RowMajorBit
Definition Constants.h:48
const unsigned int AlignedBit
Definition Constants.h:142
const unsigned int PacketAccessBit
Definition Constants.h:76
const unsigned int ActualPacketAccessBit
Definition Constants.h:87
const unsigned int EvalBeforeAssigningBit
Definition Constants.h:58
const unsigned int EvalBeforeNestingBit
Definition Constants.h:53
Definition LDLT.h:18