Eigen  5.0.1-dev+60122df6
 
Loading...
Searching...
No Matches
GenericPacketMath.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_GENERIC_PACKET_MATH_H
12#define EIGEN_GENERIC_PACKET_MATH_H
13
14// IWYU pragma: private
15#include "./InternalHeaderCheck.h"
16
17namespace Eigen {
18
19namespace internal {
20
28
29#ifndef EIGEN_DEBUG_ALIGNED_LOAD
30#define EIGEN_DEBUG_ALIGNED_LOAD
31#endif
32
33#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
34#define EIGEN_DEBUG_UNALIGNED_LOAD
35#endif
36
37#ifndef EIGEN_DEBUG_ALIGNED_STORE
38#define EIGEN_DEBUG_ALIGNED_STORE
39#endif
40
41#ifndef EIGEN_DEBUG_UNALIGNED_STORE
42#define EIGEN_DEBUG_UNALIGNED_STORE
43#endif
44
45struct default_packet_traits {
46 enum {
47 // Ops that are implemented for most types.
48 HasAdd = 1,
49 HasSub = 1,
50 HasShift = 1,
51 HasMul = 1,
52 HasNegate = 1,
53 HasAbs = 1,
54 HasAbs2 = 1,
55 HasMin = 1,
56 HasMax = 1,
57 HasConj = 1,
58 HasSetLinear = 1,
59 HasSign = 1,
60 // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
61 // types
62 HasRound = 1,
63
64 HasArg = 0,
65 HasAbsDiff = 0,
66 HasBlend = 0,
67 // This flag is used to indicate whether packet comparison is supported.
68 // pcmp_eq and pcmp_lt should be defined for it to be true.
69 HasCmp = 0,
70
71 HasDiv = 0,
72 HasReciprocal = 0,
73 HasSqrt = 0,
74 HasRsqrt = 0,
75 HasCbrt = 0,
76 HasExp = 0,
77 HasExpm1 = 0,
78 HasLog = 0,
79 HasLog1p = 0,
80 HasLog10 = 0,
81 HasPow = 0,
82 HasSin = 0,
83 HasCos = 0,
84 HasTan = 0,
85 HasASin = 0,
86 HasACos = 0,
87 HasATan = 0,
88 HasATanh = 0,
89 HasSinh = 0,
90 HasCosh = 0,
91 HasTanh = 0,
92 HasLGamma = 0,
93 HasDiGamma = 0,
94 HasZeta = 0,
95 HasPolygamma = 0,
96 HasErf = 0,
97 HasErfc = 0,
98 HasNdtri = 0,
99 HasBessel = 0,
100 HasIGamma = 0,
101 HasIGammaDerA = 0,
102 HasGammaSampleDerAlpha = 0,
103 HasIGammac = 0,
104 HasBetaInc = 0
105 };
106};
107
108template <typename T>
109struct packet_traits : default_packet_traits {
110 typedef T type;
111 typedef T half;
112 enum {
113 Vectorizable = 0,
114 size = 1,
115 AlignedOnScalar = 0,
116 };
117 enum {
118 HasAdd = 0,
119 HasSub = 0,
120 HasMul = 0,
121 HasNegate = 0,
122 HasAbs = 0,
123 HasAbs2 = 0,
124 HasMin = 0,
125 HasMax = 0,
126 HasConj = 0,
127 HasSetLinear = 0
128 };
129};
130
131template <typename T>
132struct packet_traits<const T> : packet_traits<T> {};
133
134template <typename T>
135struct unpacket_traits {
136 typedef T type;
137 typedef T half;
138 typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
139 enum {
140 size = 1,
141 alignment = alignof(T),
142 vectorizable = false,
143 masked_load_available = false,
144 masked_store_available = false
145 };
146};
147
148template <typename T>
149struct unpacket_traits<const T> : unpacket_traits<T> {};
150
154template <typename Packet>
155struct is_scalar {
156 using Scalar = typename unpacket_traits<Packet>::type;
157 enum { value = internal::is_same<Packet, Scalar>::value };
158};
159
160// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
161// 1) the packets are the same type, or
162// 2) the packets differ only in sign.
163// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
164template <typename SrcPacket, typename TgtPacket,
165 bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
166struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
167template <>
168struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
169template <>
170struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
171template <>
172struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
173template <>
174struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
175
176template <typename SrcPacket, typename TgtPacket>
177struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
178 using SrcScalar = typename unpacket_traits<SrcPacket>::type;
179 static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
180 using TgtScalar = typename unpacket_traits<TgtPacket>::type;
181 static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
182 static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
183};
184
185// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
186template <typename SrcPacket, typename TgtPacket>
187struct is_degenerate {
188 static constexpr bool value =
189 is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
190};
191
192template <typename Packet>
193struct is_half {
194 using Scalar = typename unpacket_traits<Packet>::type;
195 static constexpr int Size = unpacket_traits<Packet>::size;
196 using DefaultPacket = typename packet_traits<Scalar>::type;
197 static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
198 static constexpr bool value = Size != 1 && Size < DefaultSize;
199};
200
201template <typename Src, typename Tgt>
202struct type_casting_traits {
203 enum {
204 VectorizedCast =
205 is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
206 SrcCoeffRatio = 1,
207 TgtCoeffRatio = 1
208 };
209};
210
211// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
212template <typename Src, typename Tgt>
213struct vectorized_type_casting_traits {
214 enum : int {
215 DefaultSrcPacketSize = packet_traits<Src>::size,
216 DefaultTgtPacketSize = packet_traits<Tgt>::size,
217 VectorizedCast = 1,
218 SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
219 TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
220 };
221};
222
225template <typename T, int unique_id = 0>
226struct eigen_packet_wrapper {
227 EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
228 EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
229 EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
230 EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
231 EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
232 m_val = v;
233 return *this;
234 }
235
236 T m_val;
237};
238
239template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
240struct preinterpret_generic;
241
242template <typename Target, typename Packet>
243struct preinterpret_generic<Target, Packet, false> {
244 // the packets are not the same, attempt scalar bit_cast
245 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
246 return numext::bit_cast<Target, Packet>(a);
247 }
248};
249
250template <typename Packet>
251struct preinterpret_generic<Packet, Packet, true> {
252 // the packets are the same type: do nothing
253 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
254};
255
256template <typename ComplexPacket>
257struct preinterpret_generic<typename unpacket_traits<ComplexPacket>::as_real, ComplexPacket, false> {
258 using RealPacket = typename unpacket_traits<ComplexPacket>::as_real;
259 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; }
260};
261
263template <typename Target, typename Packet>
264EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
265 return preinterpret_generic<Target, Packet>::run(a);
266}
267
268template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
269 bool TgtIsHalf = is_half<TgtPacket>::value>
270struct pcast_generic;
271
272template <typename SrcPacket, typename TgtPacket>
273struct pcast_generic<SrcPacket, TgtPacket, false, false> {
274 // the packets are not degenerate: attempt scalar static_cast
275 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
276 return cast_impl<SrcPacket, TgtPacket>::run(a);
277 }
278};
279
280template <typename Packet>
281struct pcast_generic<Packet, Packet, true, false> {
282 // the packets are the same: do nothing
283 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
284};
285
286template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
287struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
288 // the packets are degenerate: preinterpret is equivalent to pcast
289 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
290};
291
293template <typename SrcPacket, typename TgtPacket>
294EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
295 return pcast_generic<SrcPacket, TgtPacket>::run(a);
296}
297template <typename SrcPacket, typename TgtPacket>
298EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
299 return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
300}
301template <typename SrcPacket, typename TgtPacket>
302EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
303 const SrcPacket& d) {
304 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
305}
306template <typename SrcPacket, typename TgtPacket>
307EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
308 const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
309 const SrcPacket& h) {
310 return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
311}
312
313template <typename SrcPacket, typename TgtPacket>
314struct pcast_generic<SrcPacket, TgtPacket, false, true> {
315 // TgtPacket is a half packet of some other type
316 // perform cast and truncate result
317 using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
318 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
319 return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
320 }
321};
322
324template <typename Packet>
325EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
326 return a + b;
327}
328// Avoid compiler warning for boolean algebra.
329template <>
330EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
331 return a || b;
332}
333
338template <typename Packet>
339EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
340 const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
341
343template <typename Packet>
344EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
345 return a - b;
346}
347
349template <typename Packet>
350EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
351 EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
352 NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
353 return numext::negate(a);
354}
355
357template <typename Packet>
358EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
359 return numext::conj(a);
360}
361
363template <typename Packet>
364EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
365 return a * b;
366}
367// Avoid compiler warning for boolean algebra.
368template <>
369EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
370 return a && b;
371}
372
374template <typename Packet>
375EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
376 return a / b;
377}
378// Avoid compiler warning for boolean algebra.
379template <>
380EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
381 return a && b;
382}
383
384// In the generic packet case, memset to all one bits.
385template <typename Packet, typename EnableIf = void>
386struct ptrue_impl {
387 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
388 Packet b;
389 memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
390 return b;
391 }
392};
393
394// Use a value of one for scalars.
395template <typename Scalar>
396struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
397 static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
398};
399
400// For booleans, we can only directly set a valid `bool` value to avoid UB.
401template <>
402struct ptrue_impl<bool, void> {
403 static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
404};
405
407template <typename Packet>
408EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
409 return ptrue_impl<Packet>::run(a);
410}
411
412// In the general packet case, memset to zero.
413template <typename Packet, typename EnableIf = void>
414struct pzero_impl {
415 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
416 Packet b;
417 memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
418 return b;
419 }
420};
421
422// For scalars, explicitly set to Scalar(0), since the underlying representation
423// for zero may not consist of all-zero bits.
424template <typename T>
425struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
426 static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
427};
428
430template <typename Packet>
431EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
432 return pzero_impl<Packet>::run(a);
433}
434
435template <typename T>
436struct bit_and {
437 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
438};
439
440template <typename T>
441struct bit_or {
442 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
443};
444
445template <typename T>
446struct bit_xor {
447 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
448};
449
450template <typename T>
451struct bit_not {
452 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
453};
454
455template <>
456struct bit_and<bool> {
457 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
458};
459
460template <>
461struct bit_or<bool> {
462 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
463};
464
465template <>
466struct bit_xor<bool> {
467 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
468};
469
470template <>
471struct bit_not<bool> {
472 EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
473};
474
475// Use operators &, |, ^, ~.
476template <typename T>
477struct operator_bitwise_helper {
478 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
479 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
480 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
481 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
482};
483
484// Apply binary operations byte-by-byte
485template <typename T>
486struct bytewise_bitwise_helper {
487 EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
488 return binary(a, b, bit_and<unsigned char>());
489 }
490 EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
491 EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
492 return binary(a, b, bit_xor<unsigned char>());
493 }
494 EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
495
496 private:
497 template <typename Op>
498 EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
499 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
500 T c;
501 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
502 for (size_t i = 0; i < sizeof(T); ++i) {
503 *c_ptr++ = op(*a_ptr++);
504 }
505 return c;
506 }
507
508 template <typename Op>
509 EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
510 const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
511 const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
512 T c;
513 unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
514 for (size_t i = 0; i < sizeof(T); ++i) {
515 *c_ptr++ = op(*a_ptr++, *b_ptr++);
516 }
517 return c;
518 }
519};
520
521// In the general case, use byte-by-byte manipulation.
522template <typename T, typename EnableIf = void>
523struct bitwise_helper : public bytewise_bitwise_helper<T> {};
524
525// For integers or non-trivial scalars, use binary operators.
526template <typename T>
527struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
528 (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
529 : public operator_bitwise_helper<T> {};
530
532template <typename Packet>
533EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
534 return bitwise_helper<Packet>::bitwise_and(a, b);
535}
536
538template <typename Packet>
539EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
540 return bitwise_helper<Packet>::bitwise_or(a, b);
541}
542
544template <typename Packet>
545EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
546 return bitwise_helper<Packet>::bitwise_xor(a, b);
547}
548
550template <typename Packet>
551EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
552 return bitwise_helper<Packet>::bitwise_not(a);
553}
554
556template <typename Packet>
557EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
558 return pand(a, pnot(b));
559}
560
562template <typename Packet>
563EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
564 return a < b ? ptrue(a) : pzero(a);
565}
566
568template <typename Packet>
569EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
570 return a == b ? ptrue(a) : pzero(a);
571}
572
574template <typename Packet>
575EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
576 return por(pcmp_eq(a, b), pcmp_lt(a, b));
577}
578
580template <typename Packet>
581EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
582 return a >= b ? pzero(a) : ptrue(a);
583}
584
585// In the general case, use bitwise select.
586template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
587struct pselect_impl {
588 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
589 return por(pand(a, mask), pandnot(b, mask));
590 }
591};
592
593// For scalars, use ternary select.
594template <typename Packet>
595struct pselect_impl<Packet, true> {
596 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
597 return numext::select(mask, a, b);
598 }
599};
600
602template <typename Packet>
603EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
604 return pselect_impl<Packet>::run(mask, a, b);
605}
606
607template <>
608EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
609 return cond ? a : b;
610}
611
614template <int NaNPropagation, bool IsInteger>
615struct pminmax_impl {
616 template <typename Packet, typename Op>
617 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
618 return op(a, b);
619 }
620};
621
624template <>
625struct pminmax_impl<PropagateNaN, false> {
626 template <typename Packet, typename Op>
627 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
628 Packet not_nan_mask_a = pcmp_eq(a, a);
629 Packet not_nan_mask_b = pcmp_eq(b, b);
630 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
631 }
632};
633
637template <>
638struct pminmax_impl<PropagateNumbers, false> {
639 template <typename Packet, typename Op>
640 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
641 Packet not_nan_mask_a = pcmp_eq(a, a);
642 Packet not_nan_mask_b = pcmp_eq(b, b);
643 return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
644 }
645};
646
647#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); }
648
651template <typename Packet>
652EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
653 return numext::mini(a, b);
654}
655
658template <int NaNPropagation, typename Packet>
659EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
660 constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
661 return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
662}
663
666template <typename Packet>
667EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
668 return numext::maxi(a, b);
669}
670
673template <int NaNPropagation, typename Packet>
674EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
675 constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
676 return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
677}
678
680template <typename Packet>
681EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
682 return numext::abs(a);
683}
684template <>
685EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
686 return a;
687}
688template <>
689EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
690 return a;
691}
692template <>
693EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
694 return a;
695}
696
698template <typename Packet>
699EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
700 return pselect(peven_mask(a), padd(a, b), psub(a, b));
701}
702
704template <typename Packet>
705EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
706 using numext::arg;
707 return arg(a);
708}
709
711template <int N, typename T>
712EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
713 return numext::arithmetic_shift_right(a, N);
714}
715
717template <int N, typename T>
718EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
719 return numext::logical_shift_right(a, N);
720}
721
723template <int N, typename T>
724EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
725 return numext::logical_shift_left(a, N);
726}
727
731template <typename Packet>
732EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
733 int exp;
734 EIGEN_USING_STD(frexp);
735 Packet result = static_cast<Packet>(frexp(a, &exp));
736 exponent = static_cast<Packet>(exp);
737 return result;
738}
739
743template <typename Packet>
744EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
745 EIGEN_USING_STD(ldexp)
746 return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
747}
748
750template <typename Packet>
751EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
752 return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
753}
754
756template <typename Packet>
757EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
758 return *from;
759}
760
765template <typename Packet>
766EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
767 const Index offset = 0) {
768 const Index packet_size = unpacket_traits<Packet>::size;
769 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
770 typedef typename unpacket_traits<Packet>::type Scalar;
771 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
772 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
773 elements[i] = from[i - offset];
774 }
775 return pload<Packet>(elements);
776}
777
779template <typename Packet>
780EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
781 return *from;
782}
783
786template <typename Packet>
787EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
788 const Index offset = 0) {
789 const Index packet_size = unpacket_traits<Packet>::size;
790 eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
791 typedef typename unpacket_traits<Packet>::type Scalar;
792 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
793 for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
794 elements[i] = from[i - offset];
795 }
796 return pload<Packet>(elements);
797}
798
803template <typename Packet>
804EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
805 const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
806
808template <typename Packet>
809EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
810 return a;
811}
812
814template <typename Packet, typename BitsType>
815EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
816
818template <typename Packet>
819EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
820 return pset1<Packet>(*a);
821}
822
828template <typename Packet>
829EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
830 return *from;
831}
832
839template <typename Packet>
840EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
841 return pload1<Packet>(from);
842}
843
853template <typename Packet>
854EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
855 Packet& a2, Packet& a3) {
856 a0 = pload1<Packet>(a + 0);
857 a1 = pload1<Packet>(a + 1);
858 a2 = pload1<Packet>(a + 2);
859 a3 = pload1<Packet>(a + 3);
860}
861
869template <typename Packet>
870EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
871 a0 = pload1<Packet>(a + 0);
872 a1 = pload1<Packet>(a + 1);
873}
874
876template <typename Packet>
877EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
878 return a;
879}
880
881template <typename Packet, typename EnableIf = void>
882struct peven_mask_impl {
883 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
884 typedef typename unpacket_traits<Packet>::type Scalar;
885 const size_t n = unpacket_traits<Packet>::size;
886 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
887 for (size_t i = 0; i < n; ++i) {
888 memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
889 }
890 return ploadu<Packet>(elements);
891 }
892};
893
894template <typename Scalar>
895struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
896 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
897};
898
901template <typename Packet>
902EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
903 return peven_mask_impl<Packet>::run(a);
904}
905
907template <typename Scalar, typename Packet>
908EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
909 (*to) = from;
910}
911
915template <typename Scalar, typename Packet>
916EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
917 const Index packet_size = unpacket_traits<Packet>::size;
918 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
919 EIGEN_ALIGN_MAX Scalar elements[packet_size];
920 pstore<Scalar>(elements, from);
921 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
922 to[i] = elements[i + offset];
923 }
924}
925
927template <typename Scalar, typename Packet>
928EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
929 (*to) = from;
930}
931
933template <typename Scalar, typename Packet>
934EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
935 const Index packet_size = unpacket_traits<Packet>::size;
936 eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
937 EIGEN_ALIGN_MAX Scalar elements[packet_size];
938 pstore<Scalar>(elements, from);
939 for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
940 to[i] = elements[i + offset];
941 }
942}
943
948template <typename Scalar, typename Packet>
949EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
950 Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
951
952template <typename Scalar, typename Packet>
953EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
954 return ploadu<Packet>(from);
955}
956
957template <typename Scalar, typename Packet>
958EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
959 const Index packet_size = unpacket_traits<Packet>::size;
960 EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
961 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
962 elements[i] = from[i * stride];
963 }
964 return pload<Packet>(elements);
965}
966
967template <typename Scalar, typename Packet>
968EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
969 pstore(to, from);
970}
971
972template <typename Scalar, typename Packet>
973EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
974 const Index packet_size = unpacket_traits<Packet>::size;
975 EIGEN_ALIGN_MAX Scalar elements[packet_size];
976 pstore<Scalar>(elements, from);
977 for (Index i = 0; i < numext::mini(n, packet_size); i++) {
978 to[i * stride] = elements[i];
979 }
980}
981
983template <typename Scalar>
984EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
985#if defined(EIGEN_HIP_DEVICE_COMPILE)
986 // do nothing
987#elif defined(EIGEN_CUDA_ARCH)
988#if defined(__LP64__) || EIGEN_OS_WIN64
989 // 64-bit pointer operand constraint for inlined asm
990 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
991#else
992 // 32-bit pointer operand constraint for inlined asm
993 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
994#endif
995#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
996 __builtin_prefetch(addr);
997#endif
998}
999
1001template <typename Packet>
1002EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
1003 return a;
1004}
1005
1007template <typename Packet>
1008EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
1009 return Packet(numext::imag(a), numext::real(a));
1010}
1011
1012/**************************
1013 * Special math functions
1014 ***************************/
1015
1017template <typename Packet>
1018EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
1019 return pandnot(ptrue(a), pcmp_eq(a, a));
1020}
1021
1023template <typename Packet>
1024EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
1025 using Scalar = typename unpacket_traits<Packet>::type;
1026 constexpr Scalar inf = NumTraits<Scalar>::infinity();
1027 return pcmp_eq(pabs(a), pset1<Packet>(inf));
1028}
1029
1031template <typename Packet>
1032EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
1033 EIGEN_USING_STD(sin);
1034 return sin(a);
1035}
1036
1038template <typename Packet>
1039EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
1040 EIGEN_USING_STD(cos);
1041 return cos(a);
1042}
1043
1045template <typename Packet>
1046EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
1047 EIGEN_USING_STD(tan);
1048 return tan(a);
1049}
1050
1052template <typename Packet>
1053EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
1054 EIGEN_USING_STD(asin);
1055 return asin(a);
1056}
1057
1059template <typename Packet>
1060EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
1061 EIGEN_USING_STD(acos);
1062 return acos(a);
1063}
1064
1066template <typename Packet>
1067EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
1068 EIGEN_USING_STD(sinh);
1069 return sinh(a);
1070}
1071
1073template <typename Packet>
1074EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
1075 EIGEN_USING_STD(cosh);
1076 return cosh(a);
1077}
1078
1080template <typename Packet>
1081EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
1082 EIGEN_USING_STD(atan);
1083 return atan(a);
1084}
1085
1087template <typename Packet>
1088EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
1089 EIGEN_USING_STD(tanh);
1090 return tanh(a);
1091}
1092
1094template <typename Packet>
1095EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
1096 EIGEN_USING_STD(atanh);
1097 return atanh(a);
1098}
1099
1101template <typename Packet>
1102EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
1103 return numext::exp(a);
1104}
1105
1107template <typename Packet>
1108EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
1109 return numext::exp2(a);
1110}
1111
1113template <typename Packet>
1114EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
1115 return numext::expm1(a);
1116}
1117
1119template <typename Packet>
1120EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
1121 EIGEN_USING_STD(log);
1122 return log(a);
1123}
1124
1126template <typename Packet>
1127EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
1128 return numext::log1p(a);
1129}
1130
1132template <typename Packet>
1133EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
1134 EIGEN_USING_STD(log10);
1135 return log10(a);
1136}
1137
1139template <typename Packet>
1140EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
1141 using Scalar = typename internal::unpacket_traits<Packet>::type;
1142 using RealScalar = typename NumTraits<Scalar>::Real;
1143 return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
1144}
1145
1147template <typename Packet>
1148EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
1149 return numext::sqrt(a);
1150}
1151
1153template <typename Packet>
1154EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
1155 return numext::cbrt(a);
1156}
1157
1158template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1159 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1160struct nearest_integer_packetop_impl {
1161 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
1162 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
1163 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
1164 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
1165 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
1166};
1167
1169template <typename Packet>
1170EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
1171 return nearest_integer_packetop_impl<Packet>::run_round(a);
1172}
1173
1175template <typename Packet>
1176EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
1177 return nearest_integer_packetop_impl<Packet>::run_floor(a);
1178}
1179
1182template <typename Packet>
1183EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
1184 return nearest_integer_packetop_impl<Packet>::run_rint(a);
1185}
1186
1188template <typename Packet>
1189EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
1190 return nearest_integer_packetop_impl<Packet>::run_ceil(a);
1191}
1192
1194template <typename Packet>
1195EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
1196 return nearest_integer_packetop_impl<Packet>::run_trunc(a);
1197}
1198
1199template <typename Packet, typename EnableIf = void>
1200struct psign_impl {
1201 static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
1202};
1203
1205template <typename Packet>
1206EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
1207 return psign_impl<Packet>::run(a);
1208}
1209
1210template <>
1211EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
1212 return a;
1213}
1214
1216template <typename Packet>
1217EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
1218 return a;
1219}
1220
1225template <typename Packet>
1226EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
1227 typename unpacket_traits<Packet>::half, Packet>
1228predux_half_dowto4(const Packet& a) {
1229 return a;
1230}
1231
1232// Slow generic implementation of Packet reduction.
1233template <typename Packet, typename Op>
1234EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
1235 typedef typename unpacket_traits<Packet>::type Scalar;
1236 const size_t n = unpacket_traits<Packet>::size;
1237 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1238 pstoreu<Scalar>(elements, a);
1239 for (size_t k = n / 2; k > 0; k /= 2) {
1240 for (size_t i = 0; i < k; ++i) {
1241 elements[i] = op(elements[i], elements[i + k]);
1242 }
1243 }
1244 return elements[0];
1245}
1246
1248template <typename Packet>
1249EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
1250 return a;
1251}
1252
1254template <typename Packet>
1255EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
1256 typedef typename unpacket_traits<Packet>::type Scalar;
1257 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1258}
1259
1261template <typename Packet>
1262EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1263 typedef typename unpacket_traits<Packet>::type Scalar;
1264 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
1265}
1266
1268template <typename Packet>
1269EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1270 typedef typename unpacket_traits<Packet>::type Scalar;
1271 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
1272}
1273
1274template <int NaNPropagation, typename Packet>
1275struct predux_min_max_helper_impl {
1276 using Scalar = typename unpacket_traits<Packet>::type;
1277 static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
1278 template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1279 static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1280 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1281 }
1282 template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
1283 static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1284 return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1285 }
1286 template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1287 static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
1288 return predux_min(a);
1289 }
1290 template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
1291 static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
1292 return predux_max(a);
1293 }
1294};
1295
1296template <int NaNPropagation, typename Packet>
1297EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
1298 return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
1299}
1300
1301template <int NaNPropagation, typename Packet>
1302EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
1303 return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
1304}
1305
1306#undef EIGEN_BINARY_OP_NAN_PROPAGATION
1307
1311// not needed yet
1312// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1313// { return bool(a); }
1314
1318template <typename Packet>
1319EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
1320 // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1321 // It is expected that "true" is either:
1322 // - Scalar(1)
1323 // - bits full of ones (NaN for floats),
1324 // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1325 // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1326 typedef typename unpacket_traits<Packet>::type Scalar;
1327 return numext::not_equal_strict(predux(a), Scalar(0));
1328}
1329
1330/***************************************************************************
1331 * The following functions might not have to be overwritten for vectorized types
1332 ***************************************************************************/
1333
1334template <typename Packet, typename EnableIf = void>
1335struct pmadd_impl {
1336 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1337 return padd(pmul(a, b), c);
1338 }
1339 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1340 return psub(pmul(a, b), c);
1341 }
1342 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1343 return psub(c, pmul(a, b));
1344 }
1345 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1346 return pnegate(pmadd(a, b, c));
1347 }
1348};
1349
1350template <typename Scalar>
1351struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
1352 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1353 return numext::madd<Scalar>(a, b, c);
1354 }
1355 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1356 return numext::madd<Scalar>(a, b, Scalar(-c));
1357 }
1358 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
1359 return numext::madd<Scalar>(Scalar(-a), b, c);
1360 }
1361 static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
1362 return -Scalar(numext::madd<Scalar>(a, b, c));
1363 }
1364};
1365
1366// Multiply-add instructions.
1368template <typename Packet>
1369EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
1370 return pmadd_impl<Packet>::pmadd(a, b, c);
1371}
1372
1374template <typename Packet>
1375EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
1376 return pmadd_impl<Packet>::pmsub(a, b, c);
1377}
1378
1380template <typename Packet>
1381EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
1382 return pmadd_impl<Packet>::pnmadd(a, b, c);
1383}
1384
1386template <typename Packet>
1387EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
1388 return pmadd_impl<Packet>::pnmsub(a, b, c);
1389}
1390
1393// NOTE: this function must really be templated on the packet type (think about different packet types for the same
1394// scalar type)
1395template <typename Packet>
1396inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
1397 pstore(to, pset1<Packet>(a));
1398}
1399
1402template <typename Packet, int Alignment>
1403EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
1404 if (Alignment >= unpacket_traits<Packet>::alignment)
1405 return pload<Packet>(from);
1406 else
1407 return ploadu<Packet>(from);
1408}
1409
1412template <typename Packet, int Alignment>
1413EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
1414 const Index n, const Index offset = 0) {
1415 if (Alignment >= unpacket_traits<Packet>::alignment)
1416 return pload_partial<Packet>(from, n, offset);
1417 else
1418 return ploadu_partial<Packet>(from, n, offset);
1419}
1420
1423template <typename Scalar, typename Packet, int Alignment>
1424EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
1425 if (Alignment >= unpacket_traits<Packet>::alignment)
1426 pstore(to, from);
1427 else
1428 pstoreu(to, from);
1429}
1430
1433template <typename Scalar, typename Packet, int Alignment>
1434EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
1435 const Index offset = 0) {
1436 if (Alignment >= unpacket_traits<Packet>::alignment)
1437 pstore_partial(to, from, n, offset);
1438 else
1439 pstoreu_partial(to, from, n, offset);
1440}
1441
1447template <typename Packet, int LoadMode>
1448EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
1449 return ploadt<Packet, LoadMode>(from);
1450}
1451
1452/***************************************************************************
1453 * Fast complex products (GCC generates a function call which is very slow)
1454 ***************************************************************************/
1455
1456// Eigen+CUDA does not support complexes.
1457#if !defined(EIGEN_GPUCC)
1458
1459template <>
1460inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
1461 return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1462}
1463
1464template <>
1465inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
1466 return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
1467}
1468
1469#endif
1470
1471/***************************************************************************
1472 * PacketBlock, that is a collection of N packets where the number of words
1473 * in the packet is a multiple of N.
1474 ***************************************************************************/
1475template <typename Packet, int N = unpacket_traits<Packet>::size>
1476struct PacketBlock {
1477 Packet packet[N];
1478};
1479
1480template <typename Packet>
1481EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
1482 // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1483}
1484
1485/***************************************************************************
1486 * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1487 * words from 2 packets.
1488 ***************************************************************************/
1489template <size_t N>
1490struct Selector {
1491 bool select[N];
1492};
1493
1494template <typename Packet>
1495EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
1496 const Packet& thenPacket, const Packet& elsePacket) {
1497 return ifPacket.select[0] ? thenPacket : elsePacket;
1498}
1499
1501template <typename Packet>
1502EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1503 using Scalar = typename unpacket_traits<Packet>::type;
1504 return pdiv(pset1<Packet>(Scalar(1)), a);
1505}
1506
1508template <typename Packet>
1509EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
1510 return preciprocal<Packet>(psqrt(a));
1511}
1512
1513template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1514 bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1515struct psignbit_impl;
1516template <typename Packet, bool IsInteger>
1517struct psignbit_impl<Packet, true, IsInteger> {
1518 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1519};
1520template <typename Packet>
1521struct psignbit_impl<Packet, false, false> {
1522 // generic implementation if not specialized in PacketMath.h
1523 // slower than arithmetic shift
1524 typedef typename unpacket_traits<Packet>::type Scalar;
1525 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1526 const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1527 const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1528 return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1529 }
1530};
1531template <typename Packet>
1532struct psignbit_impl<Packet, false, true> {
1533 // generic implementation for integer packets
1534 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1535};
1537template <typename Packet>
1538EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
1539 return psignbit_impl<Packet>::run(a);
1540}
1541
1543template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1544EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1545 return numext::atan2(y, x);
1546}
1547
1549template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1550EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1551 typedef typename internal::unpacket_traits<Packet>::type Scalar;
1552
1553 // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1554 // for how corner cases are supposed to be handled according to the
1555 // IEEE floating-point standard (IEC 60559).
1556 const Packet kSignMask = pset1<Packet>(-Scalar(0));
1557 const Packet kZero = pzero(x);
1558 const Packet kOne = pset1<Packet>(Scalar(1));
1559 const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1560
1561 const Packet x_has_signbit = psignbit(x);
1562 const Packet y_signmask = pand(y, kSignMask);
1563 const Packet x_signmask = pand(x, kSignMask);
1564 const Packet result_signmask = pxor(y_signmask, x_signmask);
1565 const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1566
1567 const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1568 const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1569
1570 Packet arg = pdiv(y, x);
1571 arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1572 arg = pselect(x_and_y_are_zero, result_signmask, arg);
1573
1574 Packet result = patan(arg);
1575 result = padd(result, shift);
1576 return result;
1577}
1578
1580template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1581EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1582 return Packet(numext::arg(a));
1583}
1584
1586template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1587EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1588 EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
1589 THIS METHOD IS FOR COMPLEX TYPES ONLY)
1590 using RealPacket = typename unpacket_traits<Packet>::as_real;
1591 // a // r i r i ...
1592 RealPacket aflip = pcplxflip(a).v; // i r i r ...
1593 RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1594 return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1595}
1596
1599template <typename Packet>
1600EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1601 Index count) {
1602 using Scalar = typename unpacket_traits<Packet>::type;
1603 constexpr Index PacketSize = unpacket_traits<Packet>::size;
1604 eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1605 Scalar aux[PacketSize] = {};
1606 for (Index k = begin; k < begin + count; k++) {
1607 aux[k] = from[k];
1608 }
1609 return ploadu<Packet>(aux);
1610}
1611
1614template <typename Packet>
1615EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1616 Index count) {
1617 return ploaduSegment<Packet>(from, begin, count);
1618}
1619
1623template <typename Scalar, typename Packet>
1624EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1625 constexpr Index PacketSize = unpacket_traits<Packet>::size;
1626 eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
1627 Scalar aux[PacketSize];
1628 pstoreu<Scalar, Packet>(aux, from);
1629 for (Index k = begin; k < begin + count; k++) {
1630 to[k] = aux[k];
1631 }
1632}
1633
1637template <typename Scalar, typename Packet>
1638EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1639 return pstoreuSegment(to, from, begin, count);
1640}
1641
1644template <typename Packet, int Alignment>
1645EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
1646 Index count) {
1647 constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1648 if (Alignment >= RequiredAlignment) {
1649 return ploadSegment<Packet>(from, begin, count);
1650 } else {
1651 return ploaduSegment<Packet>(from, begin, count);
1652 }
1653}
1654
1657template <typename Scalar, typename Packet, int Alignment>
1658EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
1659 constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
1660 if (Alignment >= RequiredAlignment) {
1661 pstoreSegment<Scalar, Packet>(to, from, begin, count);
1662 } else {
1663 pstoreuSegment<Scalar, Packet>(to, from, begin, count);
1664 }
1665}
1666
1667#ifndef EIGEN_NO_IO
1668
1669template <typename Packet>
1670class StreamablePacket {
1671 public:
1672 using Scalar = typename unpacket_traits<Packet>::type;
1673 StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
1674
1675 friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
1676 os << "{" << packet.v_[0];
1677 for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
1678 os << "," << packet.v_[i];
1679 }
1680 os << "}";
1681 return os;
1682 }
1683
1684 private:
1685 Scalar v_[unpacket_traits<Packet>::size];
1686};
1687
1691template <typename Packet>
1692StreamablePacket<Packet> postream(const Packet& packet) {
1693 return StreamablePacket<Packet>(packet);
1694}
1695
1696#endif // EIGEN_NO_IO
1697
1698} // end namespace internal
1699
1700} // end namespace Eigen
1701
1702#endif // EIGEN_GENERIC_PACKET_MATH_H
@ PropagateNaN
Definition Constants.h:342
@ PropagateNumbers
Definition Constants.h:344
@ PropagateFast
Definition Constants.h:340
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atanh_op< typename Derived::Scalar >, const Derived > atanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)