Eigen  5.0.1-dev+60122df6
 
Loading...
Searching...
No Matches
ProductEvaluators.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
7//
8// This Source Code Form is subject to the terms of the Mozilla
9// Public License v. 2.0. If a copy of the MPL was not distributed
10// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
12#ifndef EIGEN_PRODUCTEVALUATORS_H
13#define EIGEN_PRODUCTEVALUATORS_H
14
15// IWYU pragma: private
16#include "./InternalHeaderCheck.h"
17
18namespace Eigen {
19
20namespace internal {
21
30template <typename Lhs, typename Rhs, int Options>
31struct evaluator<Product<Lhs, Rhs, Options>> : public product_evaluator<Product<Lhs, Rhs, Options>> {
32 typedef Product<Lhs, Rhs, Options> XprType;
33 typedef product_evaluator<XprType> Base;
34
35 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
36};
37
38// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39// TODO we should apply that rule only if that's really helpful
40template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
41struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
42 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
43 const Product<Lhs, Rhs, DefaultProduct>>> {
44 static const bool value = true;
45};
46template <typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
47struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
48 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
49 const Product<Lhs, Rhs, DefaultProduct>>>
50 : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> {
51 typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1, Scalar2>,
52 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
53 const Product<Lhs, Rhs, DefaultProduct>>
54 XprType;
55 typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1, Lhs, product), Rhs, DefaultProduct>> Base;
56
57 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
58 : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {}
59};
60
61template <typename Lhs, typename Rhs, int DiagIndex>
62struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex>>
63 : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> {
64 typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
65 typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>> Base;
66
67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
68 : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
69 Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index())) {}
70};
71
72// Helper class to perform a matrix product with the destination at hand.
73// Depending on the sizes of the factors, there are different evaluation strategies
74// as controlled by internal::product_type.
75template <typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape,
76 typename RhsShape = typename evaluator_traits<Rhs>::Shape,
77 int ProductType = internal::product_type<Lhs, Rhs>::value>
78struct generic_product_impl;
79
80template <typename Lhs, typename Rhs>
81struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct>> {
82 static const bool value = true;
83};
84
85// This is the default evaluator implementation for products:
86// It creates a temporary and call generic_product_impl
87template <typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
88struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
89 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> {
90 typedef Product<Lhs, Rhs, Options> XprType;
91 typedef typename XprType::PlainObject PlainObject;
92 typedef evaluator<PlainObject> Base;
93 enum { Flags = Base::Flags | EvalBeforeNestingBit };
94
95 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
96 : m_result(xpr.rows(), xpr.cols()) {
97 internal::construct_at<Base>(this, m_result);
98
99 // FIXME shall we handle nested_eval here?,
100 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in
101 // permutation_matrix_product, transposition_matrix_product, etc.)
102 // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
103 // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
104 // typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
105 // typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
106 //
107 // const LhsNested lhs(xpr.lhs());
108 // const RhsNested rhs(xpr.rhs());
109 //
110 // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
111
112 generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
113 }
114
115 protected:
116 PlainObject m_result;
117};
118
119// The following three shortcuts are enabled only if the scalar types match exactly.
120// TODO: we could enable them for different scalar types when the product is not vectorized.
121
122// Dense = Product
123template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
124struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::assign_op<Scalar, Scalar>, Dense2Dense,
125 std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
126 typedef Product<Lhs, Rhs, Options> SrcXprType;
127 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
128 const internal::assign_op<Scalar, Scalar>&) {
129 Index dstRows = src.rows();
130 Index dstCols = src.cols();
131 if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
132 // FIXME shall we handle nested_eval here?
133 generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
134 }
135};
136
137// Dense += Product
138template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
139struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::add_assign_op<Scalar, Scalar>, Dense2Dense,
140 std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
141 typedef Product<Lhs, Rhs, Options> SrcXprType;
142 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
143 const internal::add_assign_op<Scalar, Scalar>&) {
144 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
145 // FIXME shall we handle nested_eval here?
146 generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
147 }
148};
149
150// Dense -= Product
151template <typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
152struct Assignment<DstXprType, Product<Lhs, Rhs, Options>, internal::sub_assign_op<Scalar, Scalar>, Dense2Dense,
153 std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> {
154 typedef Product<Lhs, Rhs, Options> SrcXprType;
155 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
156 const internal::sub_assign_op<Scalar, Scalar>&) {
157 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
158 // FIXME shall we handle nested_eval here?
159 generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
160 }
161};
162
163// Dense ?= scalar * Product
164// TODO we should apply that rule if that's really helpful
165// for instance, this is not good for inner products
166template <typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis,
167 typename Plain>
168struct Assignment<DstXprType,
169 CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
170 const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
171 const Product<Lhs, Rhs, DefaultProduct>>,
172 AssignFunc, Dense2Dense> {
173 typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis, Scalar>,
174 const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>, Plain>,
175 const Product<Lhs, Rhs, DefaultProduct>>
176 SrcXprType;
177 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
178 const AssignFunc& func) {
179 call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func);
180 }
181};
182
183//----------------------------------------
184// Catch "Dense ?= xpr + Product<>" expression to save one temporary
185// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
186
187template <typename OtherXpr, typename Lhs, typename Rhs>
188struct evaluator_assume_aliasing<
189 CwiseBinaryOp<
190 internal::scalar_sum_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
191 const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
192 DenseShape> {
193 static const bool value = true;
194};
195
196template <typename OtherXpr, typename Lhs, typename Rhs>
197struct evaluator_assume_aliasing<
198 CwiseBinaryOp<
199 internal::scalar_difference_op<typename OtherXpr::Scalar, typename Product<Lhs, Rhs, DefaultProduct>::Scalar>,
200 const OtherXpr, const Product<Lhs, Rhs, DefaultProduct>>,
201 DenseShape> {
202 static const bool value = true;
203};
204
205template <typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
206struct assignment_from_xpr_op_product {
207 template <typename SrcXprType, typename InitialFunc>
208 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
209 const InitialFunc& /*func*/) {
210 call_assignment_no_alias(dst, src.lhs(), Func1());
211 call_assignment_no_alias(dst, src.rhs(), Func2());
212 }
213};
214
215#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP, BINOP, ASSIGN_OP2) \
216 template <typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, \
217 typename SrcScalar, typename OtherScalar, typename ProdScalar> \
218 struct Assignment<DstXprType, \
219 CwiseBinaryOp<internal::BINOP<OtherScalar, ProdScalar>, const OtherXpr, \
220 const Product<Lhs, Rhs, DefaultProduct>>, \
221 internal::ASSIGN_OP<DstScalar, SrcScalar>, Dense2Dense> \
222 : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs, Rhs, DefaultProduct>, \
223 internal::ASSIGN_OP<DstScalar, OtherScalar>, \
224 internal::ASSIGN_OP2<DstScalar, ProdScalar>> {}
225
226EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op);
227EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_sum_op, add_assign_op);
228EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_sum_op, sub_assign_op);
229
230EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op, sub_assign_op);
231EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_difference_op, sub_assign_op);
232EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assign_op);
233
234//----------------------------------------
235
236template <typename Lhs, typename Rhs>
237struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, InnerProduct> {
238 using impl = default_inner_product_impl<Lhs, Rhs, false>;
239 template <typename Dst>
240 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
241 dst.coeffRef(0, 0) = impl::run(lhs, rhs);
242 }
243
244 template <typename Dst>
245 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
246 dst.coeffRef(0, 0) += impl::run(lhs, rhs);
247 }
248
249 template <typename Dst>
250 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
251 dst.coeffRef(0, 0) -= impl::run(lhs, rhs);
252 }
253};
254
255/***********************************************************************
256 * Implementation of outer dense * dense vector product
257 ***********************************************************************/
258
259// Column major result
260template <typename Dst, typename Lhs, typename Rhs, typename Func>
261void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
262 const false_type&) {
263 evaluator<Rhs> rhsEval(rhs);
264 ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs);
265 // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
266 // FIXME not very good if rhs is real and lhs complex while alpha is real too
267 const Index cols = dst.cols();
268 for (Index j = 0; j < cols; ++j) func(dst.col(j), rhsEval.coeff(Index(0), j) * actual_lhs);
269}
270
271// Row major result
272template <typename Dst, typename Lhs, typename Rhs, typename Func>
273void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func,
274 const true_type&) {
275 evaluator<Lhs> lhsEval(lhs);
276 ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs);
277 // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
278 // FIXME not very good if lhs is real and rhs complex while alpha is real too
279 const Index rows = dst.rows();
280 for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs);
281}
282
283template <typename Lhs, typename Rhs>
284struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
285 template <typename T>
286 struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
287 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
288
289 // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
290 struct set {
291 template <typename Dst, typename Src>
292 EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
293 dst.const_cast_derived() = src;
294 }
295 };
296 struct add {
298 template <typename Dst, typename Src>
299 EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
300 dst.const_cast_derived() += src;
301 }
302 };
303 struct sub {
304 template <typename Dst, typename Src>
305 EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const {
306 dst.const_cast_derived() -= src;
307 }
308 };
310 struct adds {
311 Scalar m_scale;
313 explicit adds(const Scalar& s) : m_scale(s) {}
315 template <typename Dst, typename Src>
316 void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
317 dst.const_cast_derived() += m_scale * src;
318 }
319 };
320
321 template <typename Dst>
322 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
323 internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
324 }
325
326 template <typename Dst>
327 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
328 internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
329 }
330
331 template <typename Dst>
332 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
333 internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
334 }
335
336 template <typename Dst>
337 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
338 const Scalar& alpha) {
339 internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
340 }
341};
342
343// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
344template <typename Lhs, typename Rhs, typename Derived>
345struct generic_product_impl_base {
346 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
347
348 template <typename Dst>
349 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
350 dst.setZero();
351 scaleAndAddTo(dst, lhs, rhs, Scalar(1));
352 }
353
354 template <typename Dst>
355 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
356 scaleAndAddTo(dst, lhs, rhs, Scalar(1));
357 }
358
359 template <typename Dst>
360 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
361 scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
362 }
363
364 template <typename Dst>
365 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs,
366 const Scalar& alpha) {
367 Derived::scaleAndAddTo(dst, lhs, rhs, alpha);
368 }
369};
370
371template <typename Lhs, typename Rhs>
372struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>
373 : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, GemvProduct>> {
374 typedef typename nested_eval<Lhs, 1>::type LhsNested;
375 typedef typename nested_eval<Rhs, 1>::type RhsNested;
376 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
377 enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
378 typedef internal::remove_all_t<std::conditional_t<int(Side) == OnTheRight, LhsNested, RhsNested>> MatrixType;
379
380 template <typename Dest>
381 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs,
382 const Scalar& alpha) {
383 // Fallback to inner product if both the lhs and rhs is a runtime vector.
384 if (lhs.rows() == 1 && rhs.cols() == 1) {
385 dst.coeffRef(0, 0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
386 return;
387 }
388 LhsNested actual_lhs(lhs);
389 RhsNested actual_rhs(rhs);
390 internal::gemv_dense_selector<Side, (int(MatrixType::Flags) & RowMajorBit) ? RowMajor : ColMajor,
391 bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(actual_lhs,
392 actual_rhs, dst,
393 alpha);
394 }
395};
396
397template <typename Lhs, typename Rhs>
398struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {
399 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
400
401 template <typename Dst>
402 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
403 // Same as: dst.noalias() = lhs.lazyProduct(rhs);
404 // but easier on the compiler side
405 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar, Scalar>());
406 }
407
408 template <typename Dst>
409 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
410 // dst.noalias() += lhs.lazyProduct(rhs);
411 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar, Scalar>());
412 }
413
414 template <typename Dst>
415 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) {
416 // dst.noalias() -= lhs.lazyProduct(rhs);
417 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar, Scalar>());
418 }
419
420 // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
421 // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance:
422 // dst {,+,-}= (s1*A)*(B*s2)
423 // will be rewritten as:
424 // dst {,+,-}= (s1*s2) * (A.lazyProduct(B))
425 // There are at least four benefits of doing so:
426 // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations.
427 // 2 - it is faster than simply by-passing the heap allocation through stack allocation.
428 // 3 - it makes this fallback consistent with the heavy GEMM routine.
429 // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices.
430 // (see https://stackoverflow.com/questions/54738495)
431 // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3
432 // slower, and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently
433 // enabled only when falling back from the main GEMM.
434 template <typename Dst, typename Func>
435 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs,
436 const Func& func) {
437 enum {
438 HasScalarFactor = blas_traits<Lhs>::HasScalarFactor || blas_traits<Rhs>::HasScalarFactor,
439 ConjLhs = blas_traits<Lhs>::NeedToConjugate,
440 ConjRhs = blas_traits<Rhs>::NeedToConjugate
441 };
442 // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto
443 // this is important for real*complex_mat
444 Scalar actualAlpha = combine_scalar_factors<Scalar>(lhs, rhs);
445
446 eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
447 blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
448 bool_constant<HasScalarFactor>());
449 }
450
451 protected:
452 template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
453 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
454 const Func& func, const Scalar& s /* == 1 */,
455 false_type) {
456 EIGEN_UNUSED_VARIABLE(s);
457 eigen_internal_assert(numext::is_exactly_one(s));
458 call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
459 }
460
461 template <typename Dst, typename LhsT, typename RhsT, typename Func, typename Scalar>
462 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs,
463 const Func& func, const Scalar& s, true_type) {
464 call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func);
465 }
466};
467
468// This specialization enforces the use of a coefficient-based evaluation strategy
469template <typename Lhs, typename Rhs>
470struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, LazyCoeffBasedProductMode>
471 : generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductMode> {};
472
473// Case 2: Evaluate coeff by coeff
474//
475// This is mostly taken from CoeffBasedProduct.h
476// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
477// for the inner dimension of the product, because evaluator object do not know their size.
478
479template <int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
480struct etor_product_coeff_impl;
481
482template <int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
483struct etor_product_packet_impl;
484
485template <typename Lhs, typename Rhs, int ProductTag>
486struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
487 : evaluator_base<Product<Lhs, Rhs, LazyProduct>> {
488 typedef Product<Lhs, Rhs, LazyProduct> XprType;
489 typedef typename XprType::Scalar Scalar;
490 typedef typename XprType::CoeffReturnType CoeffReturnType;
491
492 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
493 : m_lhs(xpr.lhs()),
494 m_rhs(xpr.rhs()),
495 m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
496 m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable
497 // them when not needed, or perhaps declare them on the fly on the packet method... We
498 // have experiment to check what's best.
499 m_innerDim(xpr.lhs().cols()) {
500 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
501 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
502 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
503#if 0
504 std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
505 std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
506 std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
507 std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
508 std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
509 std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
510 std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
511 std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
512 std::cerr << "Alignment= " << Alignment << "\n";
513 std::cerr << "Flags= " << Flags << "\n";
514#endif
515 }
516
517 // Everything below here is taken from CoeffBasedProduct.h
518
519 typedef typename internal::nested_eval<Lhs, Rhs::ColsAtCompileTime>::type LhsNested;
520 typedef typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested;
521
522 typedef internal::remove_all_t<LhsNested> LhsNestedCleaned;
523 typedef internal::remove_all_t<RhsNested> RhsNestedCleaned;
524
525 typedef evaluator<LhsNestedCleaned> LhsEtorType;
526 typedef evaluator<RhsNestedCleaned> RhsEtorType;
527
528 enum {
529 RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
530 ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
531 InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
532 MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
533 MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
534 };
535
536 typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
537 typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;
538
539 enum {
540
541 LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
542 RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
543 CoeffReadCost = InnerSize == 0 ? NumTraits<Scalar>::ReadCost
544 : InnerSize == Dynamic
545 ? HugeCost
546 : InnerSize * (NumTraits<Scalar>::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) +
547 (InnerSize - 1) * NumTraits<Scalar>::AddCost,
548
549 Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
550
551 LhsFlags = LhsEtorType::Flags,
552 RhsFlags = RhsEtorType::Flags,
553
554 LhsRowMajor = LhsFlags & RowMajorBit,
555 RhsRowMajor = RhsFlags & RowMajorBit,
556
557 LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
558 RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
559
560 // Here, we don't care about alignment larger than the usable packet size.
561 LhsAlignment =
562 plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))),
563 RhsAlignment =
564 plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))),
565
566 SameType = is_same<typename LhsNestedCleaned::Scalar, typename RhsNestedCleaned::Scalar>::value,
567
568 CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1),
569 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1),
570
571 EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1
572 : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1)
573 ? 0
574 : (bool(RhsRowMajor) && !CanVectorizeLhs),
575
576 Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) |
577 (EvalToRowMajor ? RowMajorBit : 0)
578 // TODO enable vectorization for mixed types
579 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) |
580 (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
581
582 LhsOuterStrideBytes =
583 int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
584 RhsOuterStrideBytes =
585 int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
586
587 Alignment = bool(CanVectorizeLhs)
588 ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0
589 ? 0
590 : LhsAlignment)
591 : bool(CanVectorizeRhs)
592 ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0
593 ? 0
594 : RhsAlignment)
595 : 0,
596
597 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
598 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
599 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
600 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
601 */
602 CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) &&
603 (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) &&
604 (int(InnerSize) % packet_traits<Scalar>::size == 0)
605 };
606
607 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const {
608 return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
609 }
610
611 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
612 * which is why we don't set the LinearAccessBit.
613 * TODO: this seems possible when the result is a vector
614 */
615 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
616 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
617 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
618 return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum();
619 }
620
621 template <int LoadMode, typename PacketType>
622 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const {
623 PacketType res;
624 typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
625 Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
626 PacketImpl;
627 PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
628 return res;
629 }
630
631 template <int LoadMode, typename PacketType>
632 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index index) const {
633 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
634 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
635 return packet<LoadMode, PacketType>(row, col);
636 }
637
638 template <int LoadMode, typename PacketType>
639 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
640 Index count) const {
641 PacketType res;
642 typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
643 Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
644 PacketImpl;
645 PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
646 return res;
647 }
648
649 template <int LoadMode, typename PacketType>
650 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const {
651 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
652 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
653 return packetSegment<LoadMode, PacketType>(row, col, begin, count);
654 }
655
656 protected:
657 add_const_on_value_type_t<LhsNested> m_lhs;
658 add_const_on_value_type_t<RhsNested> m_rhs;
659
660 LhsEtorType m_lhsImpl;
661 RhsEtorType m_rhsImpl;
662
663 // TODO: Get rid of m_innerDim if known at compile time
664 Index m_innerDim;
665};
666
667template <typename Lhs, typename Rhs>
668struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
669 : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> {
670 typedef Product<Lhs, Rhs, DefaultProduct> XprType;
671 typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
672 typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
673 enum { Flags = Base::Flags | EvalBeforeNestingBit };
674 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr)
675 : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {}
676};
677
678/****************************************
679*** Coeff based product, Packet path ***
680****************************************/
681
682template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
683struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
684 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
685 Index innerDim, Packet& res) {
686 etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
687 innerDim, res);
688 res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
689 rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
690 }
691 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
692 Index innerDim, Packet& res, Index begin, Index count) {
693 etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
694 row, col, lhs, rhs, innerDim, res, begin, count);
695 res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
696 rhs.template packetSegment<LoadMode, Packet>(Index(UnrollingIndex - 1), col, begin, count), res);
697 }
698};
699
700template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
701struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> {
702 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
703 Index innerDim, Packet& res) {
704 etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs,
705 innerDim, res);
706 res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
707 pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
708 }
709 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
710 Index innerDim, Packet& res, Index begin, Index count) {
711 etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
712 row, col, lhs, rhs, innerDim, res, begin, count);
713 res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, Index(UnrollingIndex - 1), begin, count),
714 pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
715 }
716};
717
718template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
719struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
720 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
721 Index /*innerDim*/, Packet& res) {
722 res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
723 }
724 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
725 Index /*innerDim*/, Packet& res, Index begin,
726 Index count) {
727 res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),
728 rhs.template packetSegment<LoadMode, Packet>(Index(0), col, begin, count));
729 }
730};
731
732template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
733struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
734 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
735 Index /*innerDim*/, Packet& res) {
736 res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
737 }
738 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
739 Index /*innerDim*/, Packet& res, Index begin,
740 Index count) {
741 res = pmul(lhs.template packetSegment<LoadMode, Packet>(row, Index(0), begin, count),
742 pset1<Packet>(rhs.coeff(Index(0), col)));
743 }
744};
745
746template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
747struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> {
748 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
749 const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
750 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
751 }
752 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
753 const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
754 Index /*begin*/, Index /*count*/) {
755 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
756 }
757};
758
759template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
760struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> {
761 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
762 const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
763 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
764 }
765 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
766 const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
767 Index /*begin*/, Index /*count*/) {
768 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
769 }
770};
771
772template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
773struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
774 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
775 Index innerDim, Packet& res) {
776 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
777 for (Index i = 0; i < innerDim; ++i)
778 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
779 }
780 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
781 Index innerDim, Packet& res, Index begin, Index count) {
782 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
783 for (Index i = 0; i < innerDim; ++i)
784 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packetSegment<LoadMode, Packet>(i, col, begin, count),
785 res);
786 }
787};
788
789template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
790struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
791 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
792 Index innerDim, Packet& res) {
793 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
794 for (Index i = 0; i < innerDim; ++i)
795 res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
796 }
797 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
798 Index innerDim, Packet& res, Index begin, Index count) {
799 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
800 for (Index i = 0; i < innerDim; ++i)
801 res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, i, begin, count), pset1<Packet>(rhs.coeff(i, col)),
802 res);
803 }
804};
805
806/***************************************************************************
807 * Triangular products
808 ***************************************************************************/
809template <int Mode, bool LhsIsTriangular, typename Lhs, bool LhsIsVector, typename Rhs, bool RhsIsVector>
810struct triangular_product_impl;
811
812template <typename Lhs, typename Rhs, int ProductTag>
813struct generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>
814 : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, TriangularShape, DenseShape, ProductTag>> {
815 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
816
817 template <typename Dest>
818 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
819 triangular_product_impl<Lhs::Mode, true, typename Lhs::MatrixType, false, Rhs, Rhs::ColsAtCompileTime == 1>::run(
820 dst, lhs.nestedExpression(), rhs, alpha);
821 }
822};
823
824template <typename Lhs, typename Rhs, int ProductTag>
825struct generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>
826 : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, TriangularShape, ProductTag>> {
827 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
828
829 template <typename Dest>
830 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
831 triangular_product_impl<Rhs::Mode, false, Lhs, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, false>::run(
832 dst, lhs, rhs.nestedExpression(), alpha);
833 }
834};
835
836/***************************************************************************
837 * SelfAdjoint products
838 ***************************************************************************/
839template <typename Lhs, int LhsMode, bool LhsIsVector, typename Rhs, int RhsMode, bool RhsIsVector>
840struct selfadjoint_product_impl;
841
842template <typename Lhs, typename Rhs, int ProductTag>
843struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
844 : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>> {
845 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
846
847 template <typename Dest>
848 static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
849 selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::ColsAtCompileTime == 1>::run(
850 dst, lhs.nestedExpression(), rhs, alpha);
851 }
852};
853
854template <typename Lhs, typename Rhs, int ProductTag>
855struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
856 : generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>> {
857 typedef typename Product<Lhs, Rhs>::Scalar Scalar;
858
859 template <typename Dest>
860 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
861 selfadjoint_product_impl<Lhs, 0, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, Rhs::Mode, false>::run(
862 dst, lhs, rhs.nestedExpression(), alpha);
863 }
864};
865
866/***************************************************************************
867 * Diagonal products
868 ***************************************************************************/
869
870template <typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
871struct diagonal_product_evaluator_base : evaluator_base<Derived> {
872 typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
873
874 public:
875 enum {
876 CoeffReadCost = int(NumTraits<Scalar>::MulCost) + int(evaluator<MatrixType>::CoeffReadCost) +
877 int(evaluator<DiagonalType>::CoeffReadCost),
878
879 MatrixFlags = evaluator<MatrixType>::Flags,
880 DiagFlags = evaluator<DiagonalType>::Flags,
881
882 StorageOrder_ = (Derived::MaxRowsAtCompileTime == 1 && Derived::MaxColsAtCompileTime != 1) ? RowMajor
883 : (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor
884 : MatrixFlags & RowMajorBit ? RowMajor
885 : ColMajor,
886 SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor),
887
888 ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) ||
889 (int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)),
890 SameTypes_ = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
891 // FIXME currently we need same types, but in the future the next rule should be the one
892 // Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ &&
893 // bool(int(DiagFlags)&PacketAccessBit))),
894 Vectorizable_ = bool(int(MatrixFlags) & PacketAccessBit) && SameTypes_ &&
895 (SameStorageOrder_ || (MatrixFlags & LinearAccessBit) == LinearAccessBit) &&
896 (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))),
897 LinearAccessMask_ =
898 (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
899 Flags =
900 ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0),
901 Alignment = evaluator<MatrixType>::Alignment,
902
903 AsScalarProduct =
904 (DiagonalType::SizeAtCompileTime == 1) ||
905 (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::RowsAtCompileTime == 1 &&
906 ProductOrder == OnTheLeft) ||
907 (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == 1 && ProductOrder == OnTheRight)
908 };
909
910 EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType& mat, const DiagonalType& diag)
911 : m_diagImpl(diag), m_matImpl(mat) {
912 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
913 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
914 }
915
916 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const {
917 if (AsScalarProduct)
918 return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
919 else
920 return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
921 }
922
923 protected:
924 template <int LoadMode, typename PacketType>
925 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const {
926 return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
927 internal::pset1<PacketType>(m_diagImpl.coeff(id)));
928 }
929
930 template <int LoadMode, typename PacketType>
931 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const {
932 enum {
933 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
934 DiagonalPacketLoadMode = plain_enum_min(
935 LoadMode,
936 ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
937 };
938 return internal::pmul(m_matImpl.template packet<LoadMode, PacketType>(row, col),
939 m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
940 }
941
942 template <int LoadMode, typename PacketType>
943 EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
944 internal::true_type) const {
945 return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
946 internal::pset1<PacketType>(m_diagImpl.coeff(id)));
947 }
948
949 template <int LoadMode, typename PacketType>
950 EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
951 internal::false_type) const {
952 enum {
953 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
954 DiagonalPacketLoadMode = plain_enum_min(
955 LoadMode,
956 ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
957 };
958 return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
959 m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
960 }
961
962 evaluator<DiagonalType> m_diagImpl;
963 evaluator<MatrixType> m_matImpl;
964};
965
966// diagonal * dense
967template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
968struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
969 : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
970 OnTheLeft> {
971 typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
972 OnTheLeft>
973 Base;
974 using Base::coeff;
975 using Base::m_diagImpl;
976 using Base::m_matImpl;
977 typedef typename Base::Scalar Scalar;
978
979 typedef Product<Lhs, Rhs, ProductKind> XprType;
980 typedef typename XprType::PlainObject PlainObject;
981 typedef typename Lhs::DiagonalVectorType DiagonalType;
982
983 static constexpr int StorageOrder = Base::StorageOrder_;
984 using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;
985
986 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
987
988 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
989 return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
990 }
991
992#ifndef EIGEN_GPUCC
993 template <int LoadMode, typename PacketType>
994 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
995 // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
996 // See also similar calls below.
997 return this->template packet_impl<LoadMode, PacketType>(row, col, row, IsRowMajor_t());
998 }
999
1000 template <int LoadMode, typename PacketType>
1001 EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1002 return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
1003 int(StorageOrder) == ColMajor ? 0 : idx);
1004 }
1005
1006 template <int LoadMode, typename PacketType>
1007 EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1008 // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
1009 // See also similar calls below.
1010 return this->template packet_segment_impl<LoadMode, PacketType>(row, col, row, begin, count, IsRowMajor_t());
1011 }
1012
1013 template <int LoadMode, typename PacketType>
1014 EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1015 return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1016 begin, count);
1017 }
1018#endif
1019};
1020
1021// dense * diagonal
1022template <typename Lhs, typename Rhs, int ProductKind, int ProductTag>
1023struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
1024 : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1025 OnTheRight> {
1026 typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>,
1027 OnTheRight>
1028 Base;
1029 using Base::coeff;
1030 using Base::m_diagImpl;
1031 using Base::m_matImpl;
1032 typedef typename Base::Scalar Scalar;
1033
1034 typedef Product<Lhs, Rhs, ProductKind> XprType;
1035 typedef typename XprType::PlainObject PlainObject;
1036
1037 static constexpr int StorageOrder = Base::StorageOrder_;
1038 using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;
1039
1040 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
1041
1042 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const {
1043 return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
1044 }
1045
1046#ifndef EIGEN_GPUCC
1047 template <int LoadMode, typename PacketType>
1048 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
1049 return this->template packet_impl<LoadMode, PacketType>(row, col, col, IsColMajor_t());
1050 }
1051
1052 template <int LoadMode, typename PacketType>
1053 EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
1054 return packet<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx);
1055 }
1056
1057 template <int LoadMode, typename PacketType>
1058 EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
1059 return this->template packet_segment_impl<LoadMode, PacketType>(row, col, col, begin, count, IsColMajor_t());
1060 }
1061
1062 template <int LoadMode, typename PacketType>
1063 EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
1064 return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
1065 begin, count);
1066 }
1067#endif
1068};
1069
1070/***************************************************************************
1071 * Products with permutation matrices
1072 ***************************************************************************/
1073
1079template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1080struct permutation_matrix_product;
1081
1082template <typename ExpressionType, int Side, bool Transposed>
1083struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> {
1084 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1085 typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1086
1087 template <typename Dest, typename PermutationType>
1088 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm,
1089 const ExpressionType& xpr) {
1090 MatrixType mat(xpr);
1091 const Index n = Side == OnTheLeft ? mat.rows() : mat.cols();
1092 // FIXME we need an is_same for expression that is not sensitive to constness. For instance
1093 // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
1094 // if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
1095 if (is_same_dense(dst, mat)) {
1096 // apply the permutation inplace
1097 Matrix<bool, PermutationType::RowsAtCompileTime, 1, 0, PermutationType::MaxRowsAtCompileTime> mask(perm.size());
1098 mask.fill(false);
1099 Index r = 0;
1100 while (r < perm.size()) {
1101 // search for the next seed
1102 while (r < perm.size() && mask[r]) r++;
1103 if (r >= perm.size()) break;
1104 // we got one, let's follow it until we are back to the seed
1105 Index k0 = r++;
1106 Index kPrev = k0;
1107 mask.coeffRef(k0) = true;
1108 for (Index k = perm.indices().coeff(k0); k != k0; k = perm.indices().coeff(k)) {
1109 Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1110 Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
1111 .swap(Block < Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime,
1112 Side == OnTheRight
1113 ? 1
1114 : Dest::ColsAtCompileTime > (dst, ((Side == OnTheLeft) ^ Transposed) ? k0 : kPrev));
1115
1116 mask.coeffRef(k) = true;
1117 kPrev = k;
1118 }
1119 }
1120 } else {
1121 for (Index i = 0; i < n; ++i) {
1122 Block<Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side == OnTheRight ? 1 : Dest::ColsAtCompileTime>(
1123 dst, ((Side == OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
1124
1125 =
1126
1127 Block < const MatrixTypeCleaned,
1128 Side == OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,
1129 Side == OnTheRight ? 1
1130 : MatrixTypeCleaned::ColsAtCompileTime >
1131 (mat, ((Side == OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
1132 }
1133 }
1134 }
1135};
1136
1137template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1138struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> {
1139 template <typename Dest>
1140 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1141 permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1142 }
1143};
1144
1145template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1146struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> {
1147 template <typename Dest>
1148 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1149 permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1150 }
1151};
1152
1153template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1154struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> {
1155 template <typename Dest>
1156 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) {
1157 permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1158 }
1159};
1160
1161template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1162struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> {
1163 template <typename Dest>
1164 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) {
1165 permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1166 }
1167};
1168
1169/***************************************************************************
1170 * Products with transpositions matrices
1171 ***************************************************************************/
1172
1173// FIXME could we unify Transpositions and Permutation into a single "shape"??
1174
1179template <typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1180struct transposition_matrix_product {
1181 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1182 typedef remove_all_t<MatrixType> MatrixTypeCleaned;
1183
1184 template <typename Dest, typename TranspositionType>
1185 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr,
1186 const ExpressionType& xpr) {
1187 MatrixType mat(xpr);
1188 typedef typename TranspositionType::StorageIndex StorageIndex;
1189 const Index size = tr.size();
1190 StorageIndex j = 0;
1191
1192 if (!is_same_dense(dst, mat)) dst = mat;
1193
1194 for (Index k = (Transposed ? size - 1 : 0); Transposed ? k >= 0 : k < size; Transposed ? --k : ++k)
1195 if (Index(j = tr.coeff(k)) != k) {
1196 if (Side == OnTheLeft)
1197 dst.row(k).swap(dst.row(j));
1198 else if (Side == OnTheRight)
1199 dst.col(k).swap(dst.col(j));
1200 }
1201 }
1202};
1203
1204template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1205struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1206 template <typename Dest>
1207 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1208 transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1209 }
1210};
1211
1212template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1213struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> {
1214 template <typename Dest>
1215 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1216 transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1217 }
1218};
1219
1220template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1221struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> {
1222 template <typename Dest>
1223 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) {
1224 transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1225 }
1226};
1227
1228template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1229struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> {
1230 template <typename Dest>
1231 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) {
1232 transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1233 }
1234};
1235
1236/***************************************************************************
1237 * skew symmetric products
1238 * for now we just call the generic implementation
1239 ***************************************************************************/
1240template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1241struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, MatrixShape, ProductTag> {
1242 template <typename Dest>
1243 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1244 generic_product_impl<typename Lhs::DenseMatrixType, Rhs, DenseShape, MatrixShape, ProductTag>::evalTo(dst, lhs,
1245 rhs);
1246 }
1247};
1248
1249template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1250struct generic_product_impl<Lhs, Rhs, MatrixShape, SkewSymmetricShape, ProductTag> {
1251 template <typename Dest>
1252 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1253 generic_product_impl<Lhs, typename Rhs::DenseMatrixType, MatrixShape, DenseShape, ProductTag>::evalTo(dst, lhs,
1254 rhs);
1255 }
1256};
1257
1258template <typename Lhs, typename Rhs, int ProductTag>
1259struct generic_product_impl<Lhs, Rhs, SkewSymmetricShape, SkewSymmetricShape, ProductTag> {
1260 template <typename Dest>
1261 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) {
1262 generic_product_impl<typename Lhs::DenseMatrixType, typename Rhs::DenseMatrixType, DenseShape, DenseShape,
1263 ProductTag>::evalTo(dst, lhs, rhs);
1264 }
1265};
1266
1267template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1268struct generic_product_impl<Lhs, Rhs, MatrixShape, HomogeneousShape, ProductTag>
1269 : generic_product_impl<Lhs, typename Rhs::PlainObject, MatrixShape, DenseShape, ProductTag> {};
1270
1271template <typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1272struct generic_product_impl<Lhs, Rhs, HomogeneousShape, MatrixShape, ProductTag>
1273 : generic_product_impl<typename Lhs::PlainObject, Rhs, DenseShape, MatrixShape, ProductTag> {};
1274
1275template <typename Lhs, typename Rhs, int ProductTag>
1276struct generic_product_impl<Lhs, Rhs, PermutationShape, HomogeneousShape, ProductTag>
1277 : generic_product_impl<Lhs, Rhs, PermutationShape, DenseShape, ProductTag> {};
1278
1279template <typename Lhs, typename Rhs, int ProductTag>
1280struct generic_product_impl<Lhs, Rhs, HomogeneousShape, PermutationShape, ProductTag>
1281 : generic_product_impl<Lhs, Rhs, DenseShape, PermutationShape, ProductTag> {};
1282
1283} // end namespace internal
1284
1285} // end namespace Eigen
1286
1287#endif // EIGEN_PRODUCT_EVALUATORS_H
@ Aligned16
Definition Constants.h:237
@ ColMajor
Definition Constants.h:318
@ RowMajor
Definition Constants.h:320
@ OnTheLeft
Definition Constants.h:331
@ OnTheRight
Definition Constants.h:333
const unsigned int ActualPacketAccessBit
Definition Constants.h:108
const unsigned int PacketAccessBit
Definition Constants.h:97
const unsigned int LinearAccessBit
Definition Constants.h:133
const unsigned int EvalBeforeNestingBit
Definition Constants.h:74
const unsigned int RowMajorBit
Definition Constants.h:70
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
std::enable_if_t< std::is_base_of< DenseBase< std::decay_t< DerivedA > >, std::decay_t< DerivedA > >::value &&std::is_base_of< DenseBase< std::decay_t< DerivedB > >, std::decay_t< DerivedB > >::value, void > swap(DerivedA &&a, DerivedB &&b)
Definition DenseBase.h:667
const int HugeCost
Definition Constants.h:48
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82
const int Dynamic
Definition Constants.h:25
void operator()(const Dst &dst, const Src &src) const
Definition ProductEvaluators.h:316