Eigen  5.0.1-dev+7c7d8473
 
Loading...
Searching...
No Matches
GenericPacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_GENERIC_PACKET_MATH_H
12#define EIGEN_GENERIC_PACKET_MATH_H
13
14// IWYU pragma: private
15#include "./InternalHeaderCheck.h"
16
17namespace Eigen {
18
19namespace internal {
20
28
29#ifndef EIGEN_DEBUG_ALIGNED_LOAD
30#define EIGEN_DEBUG_ALIGNED_LOAD
31#endif
32
33#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
34#define EIGEN_DEBUG_UNALIGNED_LOAD
35#endif
36
37#ifndef EIGEN_DEBUG_ALIGNED_STORE
38#define EIGEN_DEBUG_ALIGNED_STORE
39#endif
40
41#ifndef EIGEN_DEBUG_UNALIGNED_STORE
42#define EIGEN_DEBUG_UNALIGNED_STORE
43#endif
44
45struct default_packet_traits {
46 enum {
47 // Ops that are implemented for most types.
48 HasAdd = 1,
49 HasSub = 1,
50 HasShift = 1,
51 HasMul = 1,
52 HasNegate = 1,
53 HasAbs = 1,
54 HasAbs2 = 1,
55 HasMin = 1,
56 HasMax = 1,
57 HasConj = 1,
58 HasSetLinear = 1,
59 HasSign = 1,
60 // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61 // types
62 HasRound = 1,
63
64 HasArg = 0,
65 HasAbsDiff = 0,
66 // This flag is used to indicate whether packet comparison is supported.
67 // pcmp_eq and pcmp_lt should be defined for it to be true.
68 HasCmp = 0,
69
70 HasDiv = 0,
71 HasReciprocal = 0,
72 HasSqrt = 0,
73 HasRsqrt = 0,
74 HasCbrt = 0,
75 HasExp = 0,
76 HasExpm1 = 0,
77 HasLog = 0,
78 HasLog1p = 0,
79 HasLog10 = 0,
80 HasPow = 0,
81 HasSin = 0,
82 HasCos = 0,
83 HasTan = 0,
84 HasASin = 0,
85 HasACos = 0,
86 HasATan = 0,
87 HasATanh = 0,
88 HasSinh = 0,
89 HasCosh = 0,
90 HasTanh = 0,
91 HasLGamma = 0,
92 HasDiGamma = 0,
93 HasZeta = 0,
94 HasPolygamma = 0,
95 HasErf = 0,
96 HasErfc = 0,
97 HasNdtri = 0,
98 HasBessel = 0,
99 HasIGamma = 0,
100 HasIGammaDerA = 0,
101 HasGammaSampleDerAlpha = 0,
102 HasIGammac = 0,
103 HasBetaInc = 0
104 };
105};
106
107template <typename T>
108struct packet_traits : default_packet_traits {
109 typedef T type;
110 typedef T half;
111 enum {
112 Vectorizable = 0,
113 size = 1,
114 AlignedOnScalar = 0,
115 };
116 enum {
117 HasAdd = 0,
118 HasSub = 0,
119 HasMul = 0,
120 HasNegate = 0,
121 HasAbs = 0,
122 HasAbs2 = 0,
123 HasMin = 0,
124 HasMax = 0,
125 HasConj = 0,
126 HasSetLinear = 0
127 };
128};
129
130template <typename T>
131struct packet_traits<const T> : packet_traits<T> {};
132
133template <typename T>
134struct unpacket_traits {
135 typedef T type;
136 typedef T half;
137 typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
138 enum {
139 size = 1,
140 alignment = alignof(T),
141 vectorizable = false,
142 masked_load_available = false,
143 masked_store_available = false
144 };
145};
146
147template <typename T>
148struct unpacket_traits<const T> : unpacket_traits<T> {};
149
153template <typename Packet>
154struct is_scalar {
155 using Scalar = typename unpacket_traits<Packet>::type;
156 enum { value = internal::is_same<Packet, Scalar>::value };
157};
158
159// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
160// 1) the packets are the same type, or
161// 2) the packets differ only in sign.
162// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
163template <typename SrcPacket, typename TgtPacket,
164 bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
165struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
166template <>
167struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
168template <>
169struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
170template <>
171struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
172template <>
173struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
174
175template <typename SrcPacket, typename TgtPacket>
176struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
177 using SrcScalar = typename unpacket_traits<SrcPacket>::type;
178 static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
179 using TgtScalar = typename unpacket_traits<TgtPacket>::type;
180 static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
181 static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
182};
183
184// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
185template <typename SrcPacket, typename TgtPacket>
186struct is_degenerate {
187 static constexpr bool value =
188 is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
189};
190
191template <typename Packet>
192struct is_half {
193 using Scalar = typename unpacket_traits<Packet>::type;
194 static constexpr int Size = unpacket_traits<Packet>::size;
195 using DefaultPacket = typename packet_traits<Scalar>::type;
196 static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
197 static constexpr bool value = Size != 1 && Size < DefaultSize;
198};
199
200template <typename Src, typename Tgt>
201struct type_casting_traits {
202 enum {
203 VectorizedCast =
204 is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
205 SrcCoeffRatio = 1,
206 TgtCoeffRatio = 1
207 };
208};
209
210// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
211template <typename Src, typename Tgt>
212struct vectorized_type_casting_traits {
213 enum : int {
214 DefaultSrcPacketSize = packet_traits<Src>::size,
215 DefaultTgtPacketSize = packet_traits<Tgt>::size,
216 VectorizedCast = 1,
217 SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
218 TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
219 };
220};
221
224template <typename T, int unique_id = 0>
225struct eigen_packet_wrapper {
226 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
227 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
228 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
229 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
230 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
231 m_val = v;
232 return *this;
233 }
234
235 T m_val;
236};
237
238template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
239struct preinterpret_generic;
240
241template <typename Target, typename Packet>
242struct preinterpret_generic<Target, Packet, false> {
243 // the packets are not the same, attempt scalar bit_cast
244 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
245 return numext::bit_cast<Target, Packet>(a);
246 }
247};
248
249template <typename Packet>
250struct preinterpret_generic<Packet, Packet, true> {
251 // the packets are the same type: do nothing
252 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
253};
254
255template <typename ComplexPacket>
256struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
257 using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
258 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
259};
260
262template <typename Target, typename Packet>
263EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
264 return preinterpret_generic<Target, Packet>::run(a);
265}
266
267template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
268 bool TgtIsHalf = is_half<TgtPacket>::value>
269struct pcast_generic;
270
271template <typename SrcPacket, typename TgtPacket>
272struct pcast_generic<SrcPacket, TgtPacket, false, false> {
273 // the packets are not degenerate: attempt scalar static_cast
274 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
275 return cast_impl<SrcPacket, TgtPacket>::run(a);
276 }
277};
278
279template <typename Packet>
280struct pcast_generic<Packet, Packet, true, false> {
281 // the packets are the same: do nothing
282 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
283};
284
285template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
286struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
287 // the packets are degenerate: preinterpret is equivalent to pcast
288 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
289};
290
292template <typename SrcPacket, typename TgtPacket>
293EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
294 return pcast_generic<SrcPacket, TgtPacket>::run(a);
295}
296template <typename SrcPacket, typename TgtPacket>
297EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
298 return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
299}
300template <typename SrcPacket, typename TgtPacket>
301EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
302 const SrcPacket& d) {
303 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
304}
305template <typename SrcPacket, typename TgtPacket>
306EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
307 const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
308 const SrcPacket& h) {
309 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
310}
311
312template <typename SrcPacket, typename TgtPacket>
313struct pcast_generic<SrcPacket, TgtPacket, false, true> {
314 // TgtPacket is a half packet of some other type
315 // perform cast and truncate result
316 using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
317 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
318 return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
319 }
320};
321
323template <typename Packet>
324EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
325 return a + b;
326}
327// Avoid compiler warning for boolean algebra.
328template <>
329EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
330 return a || b;
331}
332
337template <typename Packet>
338EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
339 const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
340
342template <typename Packet>
343EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
344 return a - b;
345}
346
348template <typename Packet>
349EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
350 EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
351 NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
352 return numext::negate(a);
353}
354
356template <typename Packet>
357EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
358 return numext::conj(a);
359}
360
362template <typename Packet>
363EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
364 return a * b;
365}
366// Avoid compiler warning for boolean algebra.
367template <>
368EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
369 return a && b;
370}
371
373template <typename Packet>
374EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
375 return a / b;
376}
377// Avoid compiler warning for boolean algebra.
378template <>
379EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
380 return a && b;
381}
382
383// In the generic packet case, memset to all one bits.
384template <typename Packet, typename EnableIf = void>
385struct ptrue_impl {
386 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
387 Packet b;
388 memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
389 return b;
390 }
391};
392
393// Use a value of one for scalars.
394template <typename Scalar>
395struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
396 static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
397};
398
399// For booleans, we can only directly set a valid `bool` value to avoid UB.
400template <>
401struct ptrue_impl<bool, void> {
402 static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
403};
404
406template <typename Packet>
407EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
408 return ptrue_impl<Packet>::run(a);
409}
410
411// In the general packet case, memset to zero.
412template <typename Packet, typename EnableIf = void>
413struct pzero_impl {
414 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
415 Packet b;
416 memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
417 return b;
418 }
419};
420
421// For scalars, explicitly set to Scalar(0), since the underlying representation
422// for zero may not consist of all-zero bits.
423template <typename T>
424struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
425 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
426};
427
429template <typename Packet>
430EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
431 return pzero_impl<Packet>::run(a);
432}
433
434template <typename T>
435struct bit_and {
436 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
437};
438
439template <typename T>
440struct bit_or {
441 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
442};
443
444template <typename T>
445struct bit_xor {
446 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
447};
448
449template <typename T>
450struct bit_not {
451 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
452};
453
454template <>
455struct bit_and<bool> {
456 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
457};
458
459template <>
460struct bit_or<bool> {
461 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
462};
463
464template <>
465struct bit_xor<bool> {
466 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
467};
468
469template <>
470struct bit_not<bool> {
471 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
472};
473
474// Use operators &, |, ^, ~.
475template <typename T>
476struct operator_bitwise_helper {
477 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
478 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
479 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
480 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
481};
482
483// Apply binary operations byte-by-byte
484template <typename T>
485struct bytewise_bitwise_helper {
486 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
487 return binary(a, b, bit_and<unsigned char>());
488 }
489 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
490 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
491 return binary(a, b, bit_xor<unsigned char>());
492 }
493 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
494
495 private:
496 template <typename Op>
497 EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
498 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
499 T c;
500 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
501 for (size_t i = 0; i < sizeof(T); ++i) {
502 *c_ptr++ = op(*a_ptr++);
503 }
504 return c;
505 }
506
507 template <typename Op>
508 EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
509 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
510 const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
511 T c;
512 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
513 for (size_t i = 0; i < sizeof(T); ++i) {
514 *c_ptr++ = op(*a_ptr++, *b_ptr++);
515 }
516 return c;
517 }
518};
519
520// In the general case, use byte-by-byte manipulation.
521template <typename T, typename EnableIf = void>
522struct bitwise_helper : public bytewise_bitwise_helper<T> {};
523
524// For integers or non-trivial scalars, use binary operators.
525template <typename T>
526struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
527 (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
528 : public operator_bitwise_helper<T> {};
529
531template <typename Packet>
532EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
533 return bitwise_helper<Packet>::bitwise_and(a, b);
534}
535
537template <typename Packet>
538EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
539 return bitwise_helper<Packet>::bitwise_or(a, b);
540}
541
543template <typename Packet>
544EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
545 return bitwise_helper<Packet>::bitwise_xor(a, b);
546}
547
549template <typename Packet>
550EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
551 return bitwise_helper<Packet>::bitwise_not(a);
552}
553
555template <typename Packet>
556EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
557 return pand(a, pnot(b));
558}
559
561template <typename Packet>
562EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
563 return a < b ? ptrue(a) : pzero(a);
564}
565
567template <typename Packet>
568EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
569 return a == b ? ptrue(a) : pzero(a);
570}
571
573template <typename Packet>
574EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
575 return por(pcmp_eq(a, b), pcmp_lt(a, b));
576}
577
579template <typename Packet>
580EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
581 return a >= b ? pzero(a) : ptrue(a);
582}
583
584// In the general case, use bitwise select.
585template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
586struct pselect_impl {
587 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
588 return por(pand(a, mask), pandnot(b, mask));
589 }
590};
591
592// For scalars, use ternary select.
593template <typename Packet>
594struct pselect_impl<Packet, true> {
595 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
596 return numext::select(mask, a, b);
597 }
598};
599
601template <typename Packet>
602EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
603 return pselect_impl<Packet>::run(mask, a, b);
604}
605
606template <>
607EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
608 return cond ? a : b;
609}
610
613template <int NaNPropagation, bool IsInteger>
614struct pminmax_impl {
615 template <typename Packet, typename Op>
616 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
617 return op(a, b);
618 }
619};
620
623template <>
624struct pminmax_impl<PropagateNaN, false> {
625 template <typename Packet, typename Op>
626 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
627 Packet not_nan_mask_a = pcmp_eq(a, a);
628 Packet not_nan_mask_b = pcmp_eq(b, b);
629 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
630 }
631};
632
636template <>
637struct pminmax_impl<PropagateNumbers, false> {
638 template <typename Packet, typename Op>
639 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
640 Packet not_nan_mask_a = pcmp_eq(a, a);
641 Packet not_nan_mask_b = pcmp_eq(b, b);
642 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
643 }
644};
645
646#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
647
650template <typename Packet>
651EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
652 return numext::mini(a, b);
653}
654
657template <int NaNPropagation, typename Packet>
658EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
659 constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
660 return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
661}
662
665template <typename Packet>
666EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
667 return numext::maxi(a, b);
668}
669
672template <int NaNPropagation, typename Packet>
673EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
674 constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
675 return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
676}
677
679template <typename Packet>
680EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
681 return numext::abs(a);
682}
683template <>
684EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
685 return a;
686}
687template <>
688EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
689 return a;
690}
691template <>
692EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
693 return a;
694}
695
697template <typename Packet>
698EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
699 return pselect(peven_mask(a), padd(a, b), psub(a, b));
700}
701
703template <typename Packet>
704EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
705 using numext::arg;
706 return arg(a);
707}
708
710template <int N, typename T>
711EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
712 return numext::arithmetic_shift_right(a, N);
713}
714
716template <int N, typename T>
717EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
718 return numext::logical_shift_right(a, N);
719}
720
722template <int N, typename T>
723EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
724 return numext::logical_shift_left(a, N);
725}
726
730template <typename Packet>
731EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
732 int exp;
733 EIGEN_USING_STD(frexp);
734 Packet result = static_cast<Packet>(frexp(a, &exp));
735 exponent = static_cast<Packet>(exp);
736 return result;
737}
738
742template <typename Packet>
743EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
744 EIGEN_USING_STD(ldexp)
745 return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
746}
747
749template <typename Packet>
750EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
751 return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
752}
753
755template <typename Packet>
756EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
757 return *from;
758}
759
764template <typename Packet>
765EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
766 const Index offset = 0) {
767 const Index packet_size = unpacket_traits<Packet>::size;
768 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
769 typedef typename unpacket_traits<Packet>::type Scalar;
770 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
771 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
772 elements[i] = from[i - offset];
773 }
774 return pload<Packet>(elements);
775}
776
778template <typename Packet>
779EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
780 return *from;
781}
782
785template <typename Packet>
786EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
787 const Index offset = 0) {
788 const Index packet_size = unpacket_traits<Packet>::size;
789 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
790 typedef typename unpacket_traits<Packet>::type Scalar;
791 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
792 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
793 elements[i] = from[i - offset];
794 }
795 return pload<Packet>(elements);
796}
797
802template <typename Packet>
803EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
804 const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
805
807template <typename Packet>
808EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
809 return a;
810}
811
813template <typename Packet, typename BitsType>
814EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
815
817template <typename Packet>
818EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
819 return pset1<Packet>(*a);
820}
821
827template <typename Packet>
828EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
829 return *from;
830}
831
838template <typename Packet>
839EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
840 return pload1<Packet>(from);
841}
842
852template <typename Packet>
853EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
854 Packet& a2, Packet& a3) {
855 a0 = pload1<Packet>(a + 0);
856 a1 = pload1<Packet>(a + 1);
857 a2 = pload1<Packet>(a + 2);
858 a3 = pload1<Packet>(a + 3);
859}
860
868template <typename Packet>
869EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
870 a0 = pload1<Packet>(a + 0);
871 a1 = pload1<Packet>(a + 1);
872}
873
875template <typename Packet>
876EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
877 return a;
878}
879
880template <typename Packet, typename EnableIf = void>
881struct peven_mask_impl {
882 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
883 typedef typename unpacket_traits<Packet>::type Scalar;
884 const size_t n = unpacket_traits<Packet>::size;
885 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
886 for (size_t i = 0; i < n; ++i) {
887 memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
888 }
889 return ploadu<Packet>(elements);
890 }
891};
892
893template <typename Scalar>
894struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
895 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
896};
897
900template <typename Packet>
901EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
902 return peven_mask_impl<Packet>::run(a);
903}
904
906template <typename Scalar, typename Packet>
907EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
908 (*to) = from;
909}
910
914template <typename Scalar, typename Packet>
915EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
916 const Index packet_size = unpacket_traits<Packet>::size;
917 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
918 EIGEN_ALIGN_MAX Scalar elements[packet_size];
919 pstore<Scalar>(elements, from);
920 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
921 to[i] = elements[i + offset];
922 }
923}
924
926template <typename Scalar, typename Packet>
927EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
928 (*to) = from;
929}
930
932template <typename Scalar, typename Packet>
933EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
934 const Index packet_size = unpacket_traits<Packet>::size;
935 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
936 EIGEN_ALIGN_MAX Scalar elements[packet_size];
937 pstore<Scalar>(elements, from);
938 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
939 to[i] = elements[i + offset];
940 }
941}
942
947template <typename Scalar, typename Packet>
948EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
949 Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
950
951template <typename Scalar, typename Packet>
952EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
953 return ploadu<Packet>(from);
954}
955
956template <typename Scalar, typename Packet>
957EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
958 const Index packet_size = unpacket_traits<Packet>::size;
959 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
960 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
961 elements[i] = from[i * stride];
962 }
963 return pload<Packet>(elements);
964}
965
966template <typename Scalar, typename Packet>
967EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
968 pstore(to, from);
969}
970
971template <typename Scalar, typename Packet>
972EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
973 const Index packet_size = unpacket_traits<Packet>::size;
974 EIGEN_ALIGN_MAX Scalar elements[packet_size];
975 pstore<Scalar>(elements, from);
976 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
977 to[i * stride] = elements[i];
978 }
979}
980
982template <typename Scalar>
983EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
984#if defined(EIGEN_HIP_DEVICE_COMPILE)
985 // do nothing
986#elif defined(EIGEN_CUDA_ARCH)
987#if defined(__LP64__) || EIGEN_OS_WIN64
988 // 64-bit pointer operand constraint for inlined asm
989 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
990#else
991 // 32-bit pointer operand constraint for inlined asm
992 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
993#endif
994#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
995 __builtin_prefetch(addr);
996#endif
997}
998
1000template <typename Packet>
1001EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
1002 return a;
1003}
1004
1006template <typename Packet>
1007EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
1008 return Packet(numext::imag(a), numext::real(a));
1009}
1010
1011/**************************
1012 * Special math functions
1013 ***************************/
1014
1016template <typename Packet>
1017EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
1018 return pandnot(ptrue(a), pcmp_eq(a, a));
1019}
1020
1022template <typename Packet>
1023EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
1024 using Scalar = typename unpacket_traits<Packet>::type;
1025 constexpr Scalar inf = NumTraits<Scalar>::infinity();
1026 return pcmp_eq(pabs(a), pset1<Packet>(inf));
1027}
1028
1030template <typename Packet>
1031EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1032 EIGEN_USING_STD(sin);
1033 return sin(a);
1034}
1035
1037template <typename Packet>
1038EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1039 EIGEN_USING_STD(cos);
1040 return cos(a);
1041}
1042
1044template <typename Packet>
1045EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1046 EIGEN_USING_STD(tan);
1047 return tan(a);
1048}
1049
1051template <typename Packet>
1052EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1053 EIGEN_USING_STD(asin);
1054 return asin(a);
1055}
1056
1058template <typename Packet>
1059EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1060 EIGEN_USING_STD(acos);
1061 return acos(a);
1062}
1063
1065template <typename Packet>
1066EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1067 EIGEN_USING_STD(sinh);
1068 return sinh(a);
1069}
1070
1072template <typename Packet>
1073EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1074 EIGEN_USING_STD(cosh);
1075 return cosh(a);
1076}
1077
1079template <typename Packet>
1080EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1081 EIGEN_USING_STD(atan);
1082 return atan(a);
1083}
1084
1086template <typename Packet>
1087EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1088 EIGEN_USING_STD(tanh);
1089 return tanh(a);
1090}
1091
1093template <typename Packet>
1094EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1095 EIGEN_USING_STD(atanh);
1096 return atanh(a);
1097}
1098
1100template <typename Packet>
1101EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1102 return numext::exp(a);
1103}
1104
1106template <typename Packet>
1107EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
1108 return numext::exp2(a);
1109}
1110
1112template <typename Packet>
1113EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1114 return numext::expm1(a);
1115}
1116
1118template <typename Packet>
1119EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1120 EIGEN_USING_STD(log);
1121 return log(a);
1122}
1123
1125template <typename Packet>
1126EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1127 return numext::log1p(a);
1128}
1129
1131template <typename Packet>
1132EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1133 EIGEN_USING_STD(log10);
1134 return log10(a);
1135}
1136
1138template <typename Packet>
1139EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1140 using Scalar = typename internal::unpacket_traits<Packet>::type;
1141 using RealScalar = typename NumTraits<Scalar>::Real;
1142 return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
1143}
1144
1146template <typename Packet>
1147EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1148 return numext::sqrt(a);
1149}
1150
1152template <typename Packet>
1153EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1154 return numext::cbrt(a);
1155}
1156
1157template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1158 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1159struct nearest_integer_packetop_impl {
1160 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1161 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1162 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1163 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1164 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1165};
1166
1168template <typename Packet>
1169EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1170 return nearest_integer_packetop_impl<Packet>::run_round(a);
1171}
1172
1174template <typename Packet>
1175EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1176 return nearest_integer_packetop_impl<Packet>::run_floor(a);
1177}
1178
1181template <typename Packet>
1182EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1183 return nearest_integer_packetop_impl<Packet>::run_rint(a);
1184}
1185
1187template <typename Packet>
1188EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1189 return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1190}
1191
1193template <typename Packet>
1194EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1195 return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1196}
1197
1198template <typename Packet, typename EnableIf = void>
1199struct psign_impl {
1200 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1201};
1202
1204template <typename Packet>
1205EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1206 return psign_impl<Packet>::run(a);
1207}
1208
1209template <>
1210EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1211 return a;
1212}
1213
1215template <typename Packet>
1216EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1217 return a;
1218}
1219
1224template <typename Packet>
1225EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1226 typename unpacket_traits<Packet>::half, Packet>
1227predux_half_dowto4(const Packet& a) {
1228 return a;
1229}
1230
1231// Slow generic implementation of Packet reduction.
1232template <typename Packet, typename Op>
1233EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
1234 typedef typename unpacket_traits<Packet>::type Scalar;
1235 const size_t n = unpacket_traits<Packet>::size;
1236 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1237 pstoreu<Scalar>(elements, a);
1238 for (size_t k = n / 2; k > 0; k /= 2) {
1239 for (size_t i = 0; i < k; ++i) {
1240 elements[i] = op(elements[i], elements[i + k]);
1241 }
1242 }
1243 return elements[0];
1244}
1245
1247template <typename Packet>
1248EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
1249 return a;
1250}
1251
1253template <typename Packet>
1254EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1255 typedef typename unpacket_traits<Packet>::type Scalar;
1256 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1257}
1258
1260template <typename Packet>
1261EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1262 typedef typename unpacket_traits<Packet>::type Scalar;
1263 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
1264}
1265
1267template <typename Packet>
1268EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1269 typedef typename unpacket_traits<Packet>::type Scalar;
1270 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
1271}
1272
1273template <int NaNPropagation, typename Packet>
1274struct predux_min_max_helper_impl {
1275 using Scalar = typename unpacket_traits<Packet>::type;
1276 static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
1277 template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1278 static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1279 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1280 }
1281 template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1282 static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1283 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1284 }
1285 template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1286 static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1287 return predux_min(a);
1288 }
1289 template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1290 static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1291 return predux_max(a);
1292 }
1293};
1294
1295template <int NaNPropagation, typename Packet>
1296EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1297 return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
1298}
1299
1300template <int NaNPropagation, typename Packet>
1301EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1302 return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
1303}
1304
1305#undef EIGEN_BINARY_OP_NAN_PROPAGATION
1306
1310// not needed yet
1311// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1312// { return bool(a); }
1313
1317template <typename Packet>
1318EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
1319 // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1320 // It is expected that "true" is either:
1321 // - Scalar(1)
1322 // - bits full of ones (NaN for floats),
1323 // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1324 // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1325 typedef typename unpacket_traits<Packet>::type Scalar;
1326 return numext::not_equal_strict(predux(a), Scalar(0));
1327}
1328
1329/***************************************************************************
1330 * The following functions might not have to be overwritten for vectorized types
1331 ***************************************************************************/
1332
1333template <typename Packet, typename EnableIf = void>
1334struct pmadd_impl {
1335 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1336 return padd(pmul(a, b), c);
1337 }
1338 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1339 return psub(pmul(a, b), c);
1340 }
1341 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1342 return psub(c, pmul(a, b));
1343 }
1344 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1345 return pnegate(pmadd(a, b, c));
1346 }
1347};
1348
1349template <typename Scalar>
1350struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
1351 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1352 return numext::madd<Scalar>(a, b, c);
1353 }
1354 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1355 return numext::madd<Scalar>(a, b, Scalar(-c));
1356 }
1357 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1358 return numext::madd<Scalar>(Scalar(-a), b, c);
1359 }
1360 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1361 return -Scalar(numext::madd<Scalar>(a, b, c));
1362 }
1363};
1364
1365// Multiply-add instructions.
1367template <typename Packet>
1368EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1369 return pmadd_impl<Packet>::pmadd(a, b, c);
1370}
1371
1373template <typename Packet>
1374EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1375 return pmadd_impl<Packet>::pmsub(a, b, c);
1376}
1377
1379template <typename Packet>
1380EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1381 return pmadd_impl<Packet>::pnmadd(a, b, c);
1382}
1383
1385template <typename Packet>
1386EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1387 return pmadd_impl<Packet>::pnmsub(a, b, c);
1388}
1389
1392// NOTE: this function must really be templated on the packet type (think about different packet types for the same
1393// scalar type)
1394template <typename Packet>
1395inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1396 pstore(to, pset1<Packet>(a));
1397}
1398
1401template <typename Packet, int Alignment>
1402EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1403 if (Alignment >= unpacket_traits<Packet>::alignment)
1404 return pload<Packet>(from);
1405 else
1406 return ploadu<Packet>(from);
1407}
1408
1411template <typename Packet, int Alignment>
1412EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1413 const Index n, const Index offset = 0) {
1414 if (Alignment >= unpacket_traits<Packet>::alignment)
1415 return pload_partial<Packet>(from, n, offset);
1416 else
1417 return ploadu_partial<Packet>(from, n, offset);
1418}
1419
1422template <typename Scalar, typename Packet, int Alignment>
1423EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1424 if (Alignment >= unpacket_traits<Packet>::alignment)
1425 pstore(to, from);
1426 else
1427 pstoreu(to, from);
1428}
1429
1432template <typename Scalar, typename Packet, int Alignment>
1433EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1434 const Index offset = 0) {
1435 if (Alignment >= unpacket_traits<Packet>::alignment)
1436 pstore_partial(to, from, n, offset);
1437 else
1438 pstoreu_partial(to, from, n, offset);
1439}
1440
1446template <typename Packet, int LoadMode>
1447EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
1448 return ploadt<Packet, LoadMode>(from);
1449}
1450
1451/***************************************************************************
1452 * Fast complex products (GCC generates a function call which is very slow)
1453 ***************************************************************************/
1454
1455// Eigen+CUDA does not support complexes.
1456#if !defined(EIGEN_GPUCC)
1457
1458template <>
1459inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1460 return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1461}
1462
1463template <>
1464inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1465 return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1466}
1467
1468#endif
1469
1470/***************************************************************************
1471 * PacketBlock, that is a collection of N packets where the number of words
1472 * in the packet is a multiple of N.
1473 ***************************************************************************/
1474template <typename Packet, int N = unpacket_traits<Packet>::size>
1475struct PacketBlock {
1476 Packet packet[N];
1477};
1478
1479template <typename Packet>
1480EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
1481 // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1482}
1483
1485template <typename Packet>
1486EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1487 using Scalar = typename unpacket_traits<Packet>::type;
1488 return pdiv(pset1<Packet>(Scalar(1)), a);
1489}
1490
1492template <typename Packet>
1493EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1494 return preciprocal<Packet>(psqrt(a));
1495}
1496
1497template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1498 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1499struct psignbit_impl;
1500template <typename Packet, bool IsInteger>
1501struct psignbit_impl<Packet, true, IsInteger> {
1502 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1503};
1504template <typename Packet>
1505struct psignbit_impl<Packet, false, false> {
1506 // generic implementation if not specialized in PacketMath.h
1507 // slower than arithmetic shift
1508 typedef typename unpacket_traits<Packet>::type Scalar;
1509 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1510 const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1511 const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1512 return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1513 }
1514};
1515template <typename Packet>
1516struct psignbit_impl<Packet, false, true> {
1517 // generic implementation for integer packets
1518 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1519};
1521template <typename Packet>
1522EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1523 return psignbit_impl<Packet>::run(a);
1524}
1525
1527template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1528EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1529 return numext::atan2(y, x);
1530}
1531
1533template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1534EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1535 typedef typename internal::unpacket_traits<Packet>::type Scalar;
1536
1537 // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1538 // for how corner cases are supposed to be handled according to the
1539 // IEEE floating-point standard (IEC 60559).
1540 const Packet kSignMask = pset1<Packet>(-Scalar(0));
1541 const Packet kZero = pzero(x);
1542 const Packet kOne = pset1<Packet>(Scalar(1));
1543 const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1544
1545 const Packet x_has_signbit = psignbit(x);
1546 const Packet y_signmask = pand(y, kSignMask);
1547 const Packet x_signmask = pand(x, kSignMask);
1548 const Packet result_signmask = pxor(y_signmask, x_signmask);
1549 const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1550
1551 const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1552 const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1553
1554 Packet arg = pdiv(y, x);
1555 arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1556 arg = pselect(x_and_y_are_zero, result_signmask, arg);
1557
1558 Packet result = patan(arg);
1559 result = padd(result, shift);
1560 return result;
1561}
1562
1564template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1565EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1566 return Packet(numext::arg(a));
1567}
1568
1570template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1571EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1572 EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1573 THIS METHOD IS FOR COMPLEX TYPES ONLY)
1574 using RealPacket = typename unpacket_traits<Packet>::as_real;
1575 // a // r i r i ...
1576 RealPacket aflip = pcplxflip(a).v; // i r i r ...
1577 RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1578 return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1579}
1580
1583template <typename Packet>
1584EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1585 Index count) {
1586 using Scalar = typename unpacket_traits<Packet>::type;
1587 constexpr Index PacketSize = unpacket_traits<Packet>::size;
1588 eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1589 Scalar aux[PacketSize] = {};
1590 for (Index k = begin; k < begin + count; k++) {
1591 aux[k] = from[k];
1592 }
1593 return ploadu<Packet>(aux);
1594}
1595
1598template <typename Packet>
1599EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1600 Index count) {
1601 return ploaduSegment<Packet>(from, begin, count);
1602}
1603
1607template <typename Scalar, typename Packet>
1608EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1609 constexpr Index PacketSize = unpacket_traits<Packet>::size;
1610 eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1611 Scalar aux[PacketSize];
1612 pstoreu<Scalar, Packet>(aux, from);
1613 for (Index k = begin; k < begin + count; k++) {
1614 to[k] = aux[k];
1615 }
1616}
1617
1621template <typename Scalar, typename Packet>
1622EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1623 return pstoreuSegment(to, from, begin, count);
1624}
1625
1628template <typename Packet, int Alignment>
1629EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1630 Index count) {
1631 constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1632 if (Alignment >= RequiredAlignment) {
1633 return ploadSegment<Packet>(from, begin, count);
1634 } else {
1635 return ploaduSegment<Packet>(from, begin, count);
1636 }
1637}
1638
1641template <typename Scalar, typename Packet, int Alignment>
1642EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1643 constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1644 if (Alignment >= RequiredAlignment) {
1645 pstoreSegment<Scalar, Packet>(to, from, begin, count);
1646 } else {
1647 pstoreuSegment<Scalar, Packet>(to, from, begin, count);
1648 }
1649}
1650
1651#ifndef EIGEN_NO_IO
1652
1653template <typename Packet>
1654class StreamablePacket {
1655 public:
1656 using Scalar = typename unpacket_traits<Packet>::type;
1657 StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
1658
1659 friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
1660 os << "{" << packet.v_[0];
1661 for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
1662 os << "," << packet.v_[i];
1663 }
1664 os << "}";
1665 return os;
1666 }
1667
1668 private:
1669 Scalar v_[unpacket_traits<Packet>::size];
1670};
1671
1675template <typename Packet>
1676StreamablePacket<Packet> postream(const Packet& packet) {
1677 return StreamablePacket<Packet>(packet);
1678}
1679
1680#endif // EIGEN_NO_IO
1681
1682} // end namespace internal
1683
1684} // end namespace Eigen
1685
1686#endif // EIGEN_GENERIC_PACKET_MATH_H
@ PropagateNaN
Definition Constants.h:342
@ PropagateNumbers
Definition Constants.h:344
@ PropagateFast
Definition Constants.h:340
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atanh_op< typename Derived::Scalar >, const Derived > atanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)