Eigen  5.0.1-dev+60122df6
 
Loading...
Searching...
No Matches
AssignEvaluator.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7//
8// This Source Code Form is subject to the terms of the Mozilla
9// Public License v. 2.0. If a copy of the MPL was not distributed
10// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
12#ifndef EIGEN_ASSIGN_EVALUATOR_H
13#define EIGEN_ASSIGN_EVALUATOR_H
14
15// IWYU pragma: private
16#include "./InternalHeaderCheck.h"
17
18namespace Eigen {
19
20// This implementation is based on Assign.h
21
22namespace internal {
23
24/***************************************************************************
25 * Part 1 : the logic deciding a strategy for traversal and unrolling *
26 ***************************************************************************/
27
28// copy_using_evaluator_traits is based on assign_traits
29
30template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = Dynamic>
31struct copy_using_evaluator_traits {
32 using Src = typename SrcEvaluator::XprType;
33 using Dst = typename DstEvaluator::XprType;
34 using DstScalar = typename Dst::Scalar;
35
36 static constexpr int DstFlags = DstEvaluator::Flags;
37 static constexpr int SrcFlags = SrcEvaluator::Flags;
38
39 public:
40 static constexpr int DstAlignment = DstEvaluator::Alignment;
41 static constexpr int SrcAlignment = SrcEvaluator::Alignment;
42 static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment);
43 static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit);
44 static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit);
45 static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit);
46 static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime;
47 static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime);
48 static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime);
49 static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime);
50 static constexpr int MaxRowsAtCompileTime =
51 min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime);
52 static constexpr int MaxColsAtCompileTime =
53 min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime);
54 static constexpr int MaxSizeAtCompileTime =
55 min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime);
56 static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime
57 : DstIsRowMajor ? ColsAtCompileTime
58 : RowsAtCompileTime;
59 static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime
60 : DstIsRowMajor ? MaxColsAtCompileTime
61 : MaxRowsAtCompileTime;
62 static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize);
63 static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize);
64 static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
65
66 // TODO distinguish between linear traversal and inner-traversals
67 using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
68 using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
69
70 static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
71 static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
72
73 public:
74 static constexpr int LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment;
75 static constexpr int InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment;
76
77 private:
78 static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor;
79 static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) &&
80 bool(functor_traits<AssignFunc>::PacketAccess);
81 static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) &&
82 (InnerSizeAtCompileTime % InnerPacketSize == 0) &&
83 (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) &&
84 (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment);
85 static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit);
86 static constexpr bool MayLinearVectorize =
87 MightVectorize && MayLinearize && DstHasDirectAccess &&
88 (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) &&
89 (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize);
90 /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
91 so it's only good for large enough sizes. */
92 static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize;
93 static constexpr bool MaySliceVectorize =
94 MightVectorize && DstHasDirectAccess &&
95 (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold);
96 /* slice vectorization can be slow, so we only want it if the slices are big, which is
97 indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
98 in a fixed-size matrix
99 However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
100
101 public:
102 static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal
103 : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize))
104 ? LinearVectorizedTraversal
105 : MayInnerVectorize ? InnerVectorizedTraversal
106 : MayLinearVectorize ? LinearVectorizedTraversal
107 : MaySliceVectorize ? SliceVectorizedTraversal
108 : MayLinearize ? LinearTraversal
109 : DefaultTraversal;
110 static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal ||
111 Traversal == SliceVectorizedTraversal;
112
113 using PacketType = std::conditional_t<Traversal == LinearVectorizedTraversal, LinearPacketType, InnerPacketType>;
114
115 private:
116 static constexpr int ActualPacketSize = Vectorized ? unpacket_traits<PacketType>::size : 1;
117 static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize;
118 static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost);
119 static constexpr bool MayUnrollCompletely =
120 (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
121 static constexpr bool MayUnrollInner =
122 (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit);
123
124 public:
125 static constexpr int Unrolling =
126 (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal)
127 ? (MayUnrollCompletely ? CompleteUnrolling
128 : MayUnrollInner ? InnerUnrolling
129 : NoUnrolling)
130 : Traversal == LinearVectorizedTraversal
131 ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment))
132 ? CompleteUnrolling
133 : NoUnrolling)
134 : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling)
135#if EIGEN_UNALIGNED_VECTORIZE
136 : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
137#endif
138 : NoUnrolling;
139 static constexpr bool UsePacketSegment = has_packet_segment<PacketType>::value;
140
141#ifdef EIGEN_DEBUG_ASSIGN
142 static void debug() {
143 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
144 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
145 std::cerr.setf(std::ios::hex, std::ios::basefield);
146 std::cerr << "DstFlags"
147 << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
148 std::cerr << "SrcFlags"
149 << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
150 std::cerr.unsetf(std::ios::hex);
151 EIGEN_DEBUG_VAR(DstAlignment)
152 EIGEN_DEBUG_VAR(SrcAlignment)
153 EIGEN_DEBUG_VAR(LinearRequiredAlignment)
154 EIGEN_DEBUG_VAR(InnerRequiredAlignment)
155 EIGEN_DEBUG_VAR(JointAlignment)
156 EIGEN_DEBUG_VAR(InnerSizeAtCompileTime)
157 EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime)
158 EIGEN_DEBUG_VAR(LinearPacketSize)
159 EIGEN_DEBUG_VAR(InnerPacketSize)
160 EIGEN_DEBUG_VAR(ActualPacketSize)
161 EIGEN_DEBUG_VAR(StorageOrdersAgree)
162 EIGEN_DEBUG_VAR(MightVectorize)
163 EIGEN_DEBUG_VAR(MayLinearize)
164 EIGEN_DEBUG_VAR(MayInnerVectorize)
165 EIGEN_DEBUG_VAR(MayLinearVectorize)
166 EIGEN_DEBUG_VAR(MaySliceVectorize)
167 std::cerr << "Traversal"
168 << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
169 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
170 EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
171 EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
172 EIGEN_DEBUG_VAR(UnrollingLimit)
173 EIGEN_DEBUG_VAR(MayUnrollCompletely)
174 EIGEN_DEBUG_VAR(MayUnrollInner)
175 std::cerr << "Unrolling"
176 << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
177 std::cerr << std::endl;
178 }
179#endif
180};
181
182/***************************************************************************
183 * Part 2 : meta-unrollers
184 ***************************************************************************/
185
186/************************
187*** Default traversal ***
188************************/
189
190template <typename Kernel, int Index_, int Stop>
191struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling {
192 static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
193 static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
194
195 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
196 kernel.assignCoeffByOuterInner(Outer, Inner);
197 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
198 }
199};
200
201template <typename Kernel, int Stop>
202struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
203 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
204};
205
206template <typename Kernel, int Index_, int Stop>
207struct copy_using_evaluator_DefaultTraversal_InnerUnrolling {
208 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) {
209 kernel.assignCoeffByOuterInner(outer, Index_);
210 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_ + 1, Stop>::run(kernel, outer);
211 }
212};
213
214template <typename Kernel, int Stop>
215struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> {
216 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
217};
218
219/***********************
220*** Linear traversal ***
221***********************/
222
223template <typename Kernel, int Index_, int Stop>
224struct copy_using_evaluator_LinearTraversal_CompleteUnrolling {
225 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
226 kernel.assignCoeff(Index_);
227 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_ + 1, Stop>::run(kernel);
228 }
229};
230
231template <typename Kernel, int Stop>
232struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> {
233 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
234};
235
236/**************************
237*** Inner vectorization ***
238**************************/
239
240template <typename Kernel, int Index_, int Stop>
241struct copy_using_evaluator_innervec_CompleteUnrolling {
242 using PacketType = typename Kernel::PacketType;
243 static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime;
244 static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime;
245 static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
246 static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
247 static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
248
249 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
250 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(Outer, Inner);
251 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
252 }
253};
254
255template <typename Kernel, int Stop>
256struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> {
257 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
258};
259
260template <typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
261struct copy_using_evaluator_innervec_InnerUnrolling {
262 using PacketType = typename Kernel::PacketType;
263 static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
264
265 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
266 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
267 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel,
268 outer);
269 }
270};
271
272template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
273struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> {
274 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
275};
276
277template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
278struct copy_using_evaluator_innervec_segment {
279 using PacketType = typename Kernel::PacketType;
280
281 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
282 kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
283 Stop - Start);
284 }
285};
286
287template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
288struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
289 /*UsePacketSegment*/ false>
290 : copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
291
292template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
293struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
294 /*UsePacketSegment*/ true> {
295 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
296};
297
298template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
299struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
300 /*UsePacketSegment*/ false> {
301 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
302};
303
304/***************************************************************************
305 * Part 3 : implementation of all cases
306 ***************************************************************************/
307
308// dense_assignment_loop is based on assign_impl
309
310template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
311 int Unrolling = Kernel::AssignmentTraits::Unrolling>
312struct dense_assignment_loop_impl;
313
314template <typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal,
315 int Unrolling = Kernel::AssignmentTraits::Unrolling>
316struct dense_assignment_loop {
317 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
318#ifdef __cpp_lib_is_constant_evaluated
319 if (internal::is_constant_evaluated())
320 dense_assignment_loop_impl<Kernel, Traversal == AllAtOnceTraversal ? AllAtOnceTraversal : DefaultTraversal,
321 NoUnrolling>::run(kernel);
322 else
323#endif
324 dense_assignment_loop_impl<Kernel, Traversal, Unrolling>::run(kernel);
325 }
326};
327
328/************************
329***** Special Cases *****
330************************/
331
332// Zero-sized assignment is a no-op.
333template <typename Kernel, int Unrolling>
334struct dense_assignment_loop_impl<Kernel, AllAtOnceTraversal, Unrolling> {
335 static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
336
337 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) {
338 EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
339 }
340};
341
342/************************
343*** Default traversal ***
344************************/
345
346template <typename Kernel>
347struct dense_assignment_loop_impl<Kernel, DefaultTraversal, NoUnrolling> {
348 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) {
349 for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
350 for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
351 kernel.assignCoeffByOuterInner(outer, inner);
352 }
353 }
354 }
355};
356
357template <typename Kernel>
358struct dense_assignment_loop_impl<Kernel, DefaultTraversal, CompleteUnrolling> {
359 static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
360
361 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
362 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
363 }
364};
365
366template <typename Kernel>
367struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
368 static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
369
370 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
371 const Index outerSize = kernel.outerSize();
372 for (Index outer = 0; outer < outerSize; ++outer)
373 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, InnerSizeAtCompileTime>::run(kernel, outer);
374 }
375};
376
377/***************************
378*** Linear vectorization ***
379***************************/
380
381// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
382// of the non vectorizable beginning and ending parts
383
384template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
385struct unaligned_dense_assignment_loop {
386 // if Skip == true, then do nothing
387 template <typename Kernel>
388 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {}
389 template <typename Kernel>
390 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/,
391 Index /*innerStart*/, Index /*innerEnd*/) {}
392};
393
394template <typename PacketType, int DstAlignment, int SrcAlignment>
395struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
396 /*Skip*/ false> {
397 template <typename Kernel>
398 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
399 Index count = end - start;
400 eigen_assert(count <= unpacket_traits<PacketType>::size);
401 if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
402 }
403 template <typename Kernel>
404 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) {
405 Index count = end - start;
406 eigen_assert(count <= unpacket_traits<PacketType>::size);
407 if (count > 0)
408 kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
409 }
410};
411
412template <typename PacketType, int DstAlignment, int SrcAlignment>
413struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
414 /*Skip*/ false> {
415 template <typename Kernel>
416 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) {
417 for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
418 }
419 template <typename Kernel>
420 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart,
421 Index innerEnd) {
422 for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
423 }
424};
425
426template <typename Kernel, int Index_, int Stop>
427struct copy_using_evaluator_linearvec_CompleteUnrolling {
428 using PacketType = typename Kernel::PacketType;
429 static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
430 static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
431 static constexpr int NextIndex = Index_ + unpacket_traits<PacketType>::size;
432
433 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
434 kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(Index_);
435 copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
436 }
437};
438
439template <typename Kernel, int Stop>
440struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
441 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
442};
443
444template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
445struct copy_using_evaluator_linearvec_segment {
446 using PacketType = typename Kernel::PacketType;
447 static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
448 static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
449
450 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
451 kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
452 }
453};
454
455template <typename Kernel, int Index_, int Stop>
456struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
457 : copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
458
459template <typename Kernel, int Stop>
460struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
461 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
462};
463
464template <typename Kernel, int Stop>
465struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
466 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {}
467};
468
469template <typename Kernel>
470struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
471 using Scalar = typename Kernel::Scalar;
472 using PacketType = typename Kernel::PacketType;
473 static constexpr int PacketSize = unpacket_traits<PacketType>::size;
474 static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
475 static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
476 static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
477 static constexpr bool Alignable =
478 (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
479 static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
480 static constexpr bool DstIsAligned = DstAlignment >= Alignment;
481 static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
482
483 using head_loop =
484 unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
485 using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
486
487 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
488 const Index size = kernel.size();
489 const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
490 const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
491
492 head_loop::run(kernel, 0, alignedStart);
493
494 for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
495 kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
496
497 tail_loop::run(kernel, alignedEnd, size);
498 }
499};
500
501template <typename Kernel>
502struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
503 using PacketType = typename Kernel::PacketType;
504 static constexpr int PacketSize = unpacket_traits<PacketType>::size;
505 static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
506 static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
507 static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
508
509 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
510 copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
511 copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
512 }
513};
514
515/**************************
516*** Inner vectorization ***
517**************************/
518
519template <typename Kernel>
520struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, NoUnrolling> {
521 using PacketType = typename Kernel::PacketType;
522 static constexpr int PacketSize = unpacket_traits<PacketType>::size;
523 static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
524 static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
525
526 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
527 const Index innerSize = kernel.innerSize();
528 const Index outerSize = kernel.outerSize();
529 for (Index outer = 0; outer < outerSize; ++outer)
530 for (Index inner = 0; inner < innerSize; inner += PacketSize)
531 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
532 }
533};
534
535template <typename Kernel>
536struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
537 static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime;
538
539 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
540 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, SizeAtCompileTime>::run(kernel);
541 }
542};
543
544template <typename Kernel>
545struct dense_assignment_loop_impl<Kernel, InnerVectorizedTraversal, InnerUnrolling> {
546 static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
547 static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
548 static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
549
550 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
551 const Index outerSize = kernel.outerSize();
552 for (Index outer = 0; outer < outerSize; ++outer)
553 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, InnerSize, SrcAlignment, DstAlignment>::run(kernel,
554 outer);
555 }
556};
557
558/***********************
559*** Linear traversal ***
560***********************/
561
562template <typename Kernel>
563struct dense_assignment_loop_impl<Kernel, LinearTraversal, NoUnrolling> {
564 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
565 const Index size = kernel.size();
566 for (Index i = 0; i < size; ++i) kernel.assignCoeff(i);
567 }
568};
569
570template <typename Kernel>
571struct dense_assignment_loop_impl<Kernel, LinearTraversal, CompleteUnrolling> {
572 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
573 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, Kernel::AssignmentTraits::SizeAtCompileTime>::run(
574 kernel);
575 }
576};
577
578/**************************
579*** Slice vectorization ***
580***************************/
581
582template <typename Kernel>
583struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling> {
584 using Scalar = typename Kernel::Scalar;
585 using PacketType = typename Kernel::PacketType;
586 static constexpr int PacketSize = unpacket_traits<PacketType>::size;
587 static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
588 static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
589 static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
590 static constexpr bool Alignable =
591 (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
592 static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
593 static constexpr bool DstIsAligned = DstAlignment >= Alignment;
594 static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
595
596 using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
597 using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
598
599 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
600 const Scalar* dst_ptr = kernel.dstDataPtr();
601 const Index innerSize = kernel.innerSize();
602 const Index outerSize = kernel.outerSize();
603 const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
604 Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
605
606 for (Index outer = 0; outer < outerSize; ++outer) {
607 const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
608
609 head_loop::run(kernel, outer, 0, alignedStart);
610
611 // do the vectorizable part of the assignment
612 for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
613 kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
614
615 tail_loop::run(kernel, outer, alignedEnd, innerSize);
616
617 alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
618 }
619 }
620};
621
622#if EIGEN_UNALIGNED_VECTORIZE
623template <typename Kernel>
624struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolling> {
625 using PacketType = typename Kernel::PacketType;
626 static constexpr int PacketSize = unpacket_traits<PacketType>::size;
627 static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
628 static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
629 static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
630
631 using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
632 using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
633 Unaligned, UsePacketSegment>;
634
635 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
636 for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
637 packet_loop::run(kernel, outer);
638 packet_segment_loop::run(kernel, outer);
639 }
640 }
641};
642#endif
643
644/***************************************************************************
645 * Part 4 : Generic dense assignment kernel
646 ***************************************************************************/
647
648// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
649// to another dense writable evaluator.
650// It is parametrized by the two evaluators, and the actual assignment functor.
651// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
652// One can customize the assignment using this generic dense_assignment_kernel with different
653// functors, or by completely overloading it, by-passing a functor.
654template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
655class generic_dense_assignment_kernel {
656 protected:
657 typedef typename DstEvaluatorTypeT::XprType DstXprType;
658 typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
659
660 public:
661 typedef DstEvaluatorTypeT DstEvaluatorType;
662 typedef SrcEvaluatorTypeT SrcEvaluatorType;
663 typedef typename DstEvaluatorType::Scalar Scalar;
664 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
665 typedef typename AssignmentTraits::PacketType PacketType;
666
667 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst,
668 const SrcEvaluatorType& src,
669 const Functor& func,
670 DstXprType& dstExpr)
671 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
672#ifdef EIGEN_DEBUG_ASSIGN
673 AssignmentTraits::debug();
674#endif
675 }
676
677 EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); }
678 EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); }
679 EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); }
680 EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); }
681 EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); }
682 EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); }
683
684 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; }
685 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; }
686
688 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
689 m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
690 }
691
693 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) {
694 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
695 }
696
698 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
699 Index row = rowIndexByOuterInner(outer, inner);
700 Index col = colIndexByOuterInner(outer, inner);
701 assignCoeff(row, col);
702 }
703
704 template <int StoreMode, int LoadMode, typename Packet>
705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) {
706 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row, col),
707 m_src.template packet<LoadMode, Packet>(row, col));
708 }
709
710 template <int StoreMode, int LoadMode, typename Packet>
711 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) {
712 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode, Packet>(index));
713 }
714
715 template <int StoreMode, int LoadMode, typename Packet>
716 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) {
717 Index row = rowIndexByOuterInner(outer, inner);
718 Index col = colIndexByOuterInner(outer, inner);
719 assignPacket<StoreMode, LoadMode, Packet>(row, col);
720 }
721
722 template <int StoreMode, int LoadMode, typename Packet>
723 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
724 m_functor.template assignPacketSegment<StoreMode>(
725 &m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
726 count);
727 }
728
729 template <int StoreMode, int LoadMode, typename Packet>
730 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
731 m_functor.template assignPacketSegment<StoreMode>(
732 &m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
733 }
734
735 template <int StoreMode, int LoadMode, typename Packet>
736 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
737 Index count) {
738 Index row = rowIndexByOuterInner(outer, inner);
739 Index col = colIndexByOuterInner(outer, inner);
740 assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
741 }
742
743 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
744 typedef typename DstEvaluatorType::ExpressionTraits Traits;
745 return int(Traits::RowsAtCompileTime) == 1 ? 0
746 : int(Traits::ColsAtCompileTime) == 1 ? inner
747 : int(DstEvaluatorType::Flags) & RowMajorBit ? outer
748 : inner;
749 }
750
751 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
752 typedef typename DstEvaluatorType::ExpressionTraits Traits;
753 return int(Traits::ColsAtCompileTime) == 1 ? 0
754 : int(Traits::RowsAtCompileTime) == 1 ? inner
755 : int(DstEvaluatorType::Flags) & RowMajorBit ? inner
756 : outer;
757 }
758
759 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); }
760
761 protected:
762 DstEvaluatorType& m_dst;
763 const SrcEvaluatorType& m_src;
764 const Functor& m_functor;
765 // TODO find a way to avoid the needs of the original expression
766 DstXprType& m_dstExpr;
767};
768
769// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
770// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
771// when computing the product.
772
773template <typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
774class restricted_packet_dense_assignment_kernel
775 : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> {
776 protected:
777 typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
778
779 public:
780 typedef typename Base::Scalar Scalar;
781 typedef typename Base::DstXprType DstXprType;
782 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
783 typedef typename AssignmentTraits::PacketType PacketType;
784
785 EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src,
786 const Functor& func, DstXprType& dstExpr)
787 : Base(dst, src, func, dstExpr) {}
788};
789
790/***************************************************************************
791 * Part 5 : Entry point for dense rectangular assignment
792 ***************************************************************************/
793
794template <typename DstXprType, typename SrcXprType, typename Functor>
795EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
796 const Functor& /*func*/) {
797 EIGEN_ONLY_USED_FOR_DEBUG(dst);
798 EIGEN_ONLY_USED_FOR_DEBUG(src);
799 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
800}
801
802template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
803EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
804 const internal::assign_op<T1, T2>& /*func*/) {
805 Index dstRows = src.rows();
806 Index dstCols = src.cols();
807 if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
808 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
809}
810
811template <typename DstXprType, typename SrcXprType, typename Functor>
812EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src,
813 const Functor& func) {
814 typedef evaluator<DstXprType> DstEvaluatorType;
815 typedef evaluator<SrcXprType> SrcEvaluatorType;
816
817 SrcEvaluatorType srcEvaluator(src);
818
819 // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
820 // we need to resize the destination after the source evaluator has been created.
821 resize_if_allowed(dst, src, func);
822
823 DstEvaluatorType dstEvaluator(dst);
824
825 typedef generic_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Functor> Kernel;
826 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
827
828 dense_assignment_loop<Kernel>::run(kernel);
829}
830
831template <typename DstXprType, typename SrcXprType>
832EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) {
833 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>());
834}
835
836/***************************************************************************
837 * Part 6 : Generic assignment
838 ***************************************************************************/
839
840// Based on the respective shapes of the destination and source,
841// the class AssignmentKind determine the kind of assignment mechanism.
842// AssignmentKind must define a Kind typedef.
843template <typename DstShape, typename SrcShape>
844struct AssignmentKind;
845
846// Assignment kind defined in this file:
847struct Dense2Dense {};
848struct EigenBase2EigenBase {};
849
850template <typename, typename>
851struct AssignmentKind {
852 typedef EigenBase2EigenBase Kind;
853};
854template <>
855struct AssignmentKind<DenseShape, DenseShape> {
856 typedef Dense2Dense Kind;
857};
858
859// This is the main assignment class
860template <typename DstXprType, typename SrcXprType, typename Functor,
861 typename Kind = typename AssignmentKind<typename evaluator_traits<DstXprType>::Shape,
862 typename evaluator_traits<SrcXprType>::Shape>::Kind,
863 typename EnableIf = void>
864struct Assignment;
865
866// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic
867// transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite
868// complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does
869// not has to bother about these annoying details.
870
871template <typename Dst, typename Src>
872EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
873 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
874}
875template <typename Dst, typename Src>
876EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) {
877 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
878}
879
880// Deal with "assume-aliasing"
881template <typename Dst, typename Src, typename Func>
882EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
883 Dst& dst, const Src& src, const Func& func, std::enable_if_t<evaluator_assume_aliasing<Src>::value, void*> = 0) {
884 typename plain_matrix_type<Src>::type tmp(src);
885 call_assignment_no_alias(dst, tmp, func);
886}
887
888template <typename Dst, typename Src, typename Func>
889EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
890 Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
891 call_assignment_no_alias(dst, src, func);
892}
893
894// by-pass "assume-aliasing"
895// When there is no aliasing, we require that 'dst' has been properly resized
896template <typename Dst, template <typename> class StorageBase, typename Src, typename Func>
897EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias<Dst, StorageBase>& dst, const Src& src,
898 const Func& func) {
899 call_assignment_no_alias(dst.expression(), src, func);
900}
901
902template <typename Dst, typename Src, typename Func>
903EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src,
904 const Func& func) {
905 enum {
906 NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) ||
907 (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) &&
908 int(Dst::SizeAtCompileTime) != 1
909 };
910
911 typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst> ActualDstTypeCleaned;
912 typedef std::conditional_t<NeedToTranspose, Transpose<Dst>, Dst&> ActualDstType;
913 ActualDstType actualDst(dst);
914
915 // TODO check whether this is the right place to perform these checks:
916 EIGEN_STATIC_ASSERT_LVALUE(Dst)
917 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src)
918 EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar);
919
920 Assignment<ActualDstTypeCleaned, Src, Func>::run(actualDst, src, func);
921}
922
923template <typename Dst, typename Src, typename Func>
924EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src,
925 const Func& func) {
926 typedef evaluator<Dst> DstEvaluatorType;
927 typedef evaluator<Src> SrcEvaluatorType;
928 typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType, SrcEvaluatorType, Func> Kernel;
929
930 EIGEN_STATIC_ASSERT_LVALUE(Dst)
931 EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
932
933 SrcEvaluatorType srcEvaluator(src);
934 resize_if_allowed(dst, src, func);
935
936 DstEvaluatorType dstEvaluator(dst);
937 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
938
939 dense_assignment_loop<Kernel>::run(kernel);
940}
941
942template <typename Dst, typename Src>
943EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) {
944 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
945}
946
947template <typename Dst, typename Src, typename Func>
948EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src,
949 const Func& func) {
950 // TODO check whether this is the right place to perform these checks:
951 EIGEN_STATIC_ASSERT_LVALUE(Dst)
952 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src)
953 EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar);
954
955 Assignment<Dst, Src, Func>::run(dst, src, func);
956}
957template <typename Dst, typename Src>
958EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) {
959 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
960}
961
962// forward declaration
963template <typename Dst, typename Src>
964EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
965
966// Generic Dense to Dense assignment
967// Note that the last template argument "Weak" is needed to make it possible to perform
968// both partial specialization+SFINAE without ambiguous specialization
969template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
970struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
971 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
972 const Functor& func) {
973#ifndef EIGEN_NO_DEBUG
974 if (!internal::is_constant_evaluated()) {
975 internal::check_for_aliasing(dst, src);
976 }
977#endif
978
979 call_dense_assignment_loop(dst, src, func);
980 }
981};
982
983template <typename DstXprType, typename SrcPlainObject, typename Weak>
984struct Assignment<DstXprType, CwiseNullaryOp<scalar_constant_op<typename DstXprType::Scalar>, SrcPlainObject>,
985 assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
986 using Scalar = typename DstXprType::Scalar;
987 using NullaryOp = scalar_constant_op<Scalar>;
988 using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
989 using Functor = assign_op<Scalar, Scalar>;
990 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
991 const Functor& /*func*/) {
992 eigen_fill_impl<DstXprType>::run(dst, src);
993 }
994};
995
996template <typename DstXprType, typename SrcPlainObject, typename Weak>
997struct Assignment<DstXprType, CwiseNullaryOp<scalar_zero_op<typename DstXprType::Scalar>, SrcPlainObject>,
998 assign_op<typename DstXprType::Scalar, typename DstXprType::Scalar>, Dense2Dense, Weak> {
999 using Scalar = typename DstXprType::Scalar;
1000 using NullaryOp = scalar_zero_op<Scalar>;
1001 using SrcXprType = CwiseNullaryOp<NullaryOp, SrcPlainObject>;
1002 using Functor = assign_op<Scalar, Scalar>;
1003 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src,
1004 const Functor& /*func*/) {
1005 eigen_zero_impl<DstXprType>::run(dst, src);
1006 }
1007};
1008
1009// Generic assignment through evalTo.
1010// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
1011// Note that the last template argument "Weak" is needed to make it possible to perform
1012// both partial specialization+SFINAE without ambiguous specialization
1013template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
1014struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> {
1015 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1016 DstXprType& dst, const SrcXprType& src,
1017 const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
1018 Index dstRows = src.rows();
1019 Index dstCols = src.cols();
1020 if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1021
1022 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1023 src.evalTo(dst);
1024 }
1025
1026 // NOTE The following two functions are templated to avoid their instantiation if not needed
1027 // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
1028 template <typename SrcScalarType>
1029 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1030 DstXprType& dst, const SrcXprType& src,
1031 const internal::add_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
1032 Index dstRows = src.rows();
1033 Index dstCols = src.cols();
1034 if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1035
1036 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1037 src.addTo(dst);
1038 }
1039
1040 template <typename SrcScalarType>
1041 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(
1042 DstXprType& dst, const SrcXprType& src,
1043 const internal::sub_assign_op<typename DstXprType::Scalar, SrcScalarType>& /*func*/) {
1044 Index dstRows = src.rows();
1045 Index dstCols = src.cols();
1046 if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
1047
1048 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1049 src.subTo(dst);
1050 }
1051};
1052
1053} // namespace internal
1054
1055} // end namespace Eigen
1056
1057#endif // EIGEN_ASSIGN_EVALUATOR_H
@ Unaligned
Definition Constants.h:235
const unsigned int ActualPacketAccessBit
Definition Constants.h:108
const unsigned int LinearAccessBit
Definition Constants.h:133
const unsigned int DirectAccessBit
Definition Constants.h:159
const unsigned int RowMajorBit
Definition Constants.h:70
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82
const int Dynamic
Definition Constants.h:25