16#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
17#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
23template<
typename T>
struct make_integer;
24template<>
struct make_integer<float> {
typedef numext::int32_t type; };
25template<>
struct make_integer<double> {
typedef numext::int64_t type; };
26template<>
struct make_integer<half> {
typedef numext::int16_t type; };
27template<>
struct make_integer<bfloat16> {
typedef numext::int16_t type; };
29template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
30Packet pfrexp_generic_get_biased_exponent(
const Packet& a) {
31 typedef typename unpacket_traits<Packet>::type Scalar;
32 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
33 enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
34 return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));
39template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
40Packet pfrexp_generic(
const Packet& a, Packet& exponent) {
41 typedef typename unpacket_traits<Packet>::type Scalar;
42 typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
44 TotalBits =
sizeof(Scalar) * CHAR_BIT,
45 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
46 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
49 EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask =
50 ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) << int(MantissaBits));
51 const Packet sign_mantissa_mask = pset1frombits<Packet>(
static_cast<ScalarUI
>(scalar_sign_mantissa_mask));
52 const Packet half = pset1<Packet>(Scalar(0.5));
53 const Packet zero = pzero(a);
54 const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)());
57 const Packet is_denormal = pcmp_lt(pabs(a), normal_min);
58 EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(
int(MantissaBits) + 1);
60 const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) <<
int(scalar_normalization_offset));
61 const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
62 const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);
65 const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(
int(ExponentBits)-1)) - ScalarUI(2));
66 Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
67 const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset));
68 exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);
71 exponent = pfrexp_generic_get_biased_exponent(normalized_a);
74 const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) <<
int(ExponentBits)) - ScalarUI(1));
75 const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
76 const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));
77 const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));
78 exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));
84template<
typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
85Packet pldexp_generic(
const Packet& a,
const Packet& exponent) {
108 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
109 typedef typename unpacket_traits<Packet>::type Scalar;
110 typedef typename unpacket_traits<PacketI>::type ScalarI;
112 TotalBits =
sizeof(Scalar) * CHAR_BIT,
113 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
114 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
117 const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<
int(ExponentBits)) + ScalarI(
int(MantissaBits) - 1)));
118 const PacketI bias = pset1<PacketI>((ScalarI(1)<<(
int(ExponentBits)-1)) - ScalarI(1));
119 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
120 PacketI b = parithmetic_shift_right<2>(e);
121 Packet c = preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(padd(b, bias)));
122 Packet out = pmul(pmul(pmul(a, c), c), c);
123 b = psub(psub(psub(e, b), b), b);
124 c = preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(padd(b, bias)));
138template<
typename Packet>
139struct pldexp_fast_impl {
140 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
141 typedef typename unpacket_traits<Packet>::type Scalar;
142 typedef typename unpacket_traits<PacketI>::type ScalarI;
144 TotalBits =
sizeof(Scalar) * CHAR_BIT,
145 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
146 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
149 static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
150 Packet run(
const Packet& a,
const Packet& exponent) {
151 const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(
int(ExponentBits)-1)) - ScalarI(1)));
152 const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<
int(ExponentBits)) - ScalarI(1)));
154 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit));
156 return pmul(a, preinterpret<Packet>(plogical_shift_left<
int(MantissaBits)>(e)));
166template <
typename Packet,
bool base2>
167EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
169Packet plog_impl_float(
const Packet _x)
173 const Packet cst_1 = pset1<Packet>(1.0f);
174 const Packet cst_neg_half = pset1<Packet>(-0.5f);
176 const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
177 const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
178 const Packet cst_pos_inf = pset1frombits<Packet>( 0x7f800000u);
181 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
182 const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
183 const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
184 const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
185 const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
186 const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
187 const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
188 const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
189 const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
190 const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
193 x = pmax(x, cst_min_norm_pos);
206 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
207 Packet tmp = pand(x, mask);
209 e = psub(e, pand(cst_1, mask));
212 Packet x2 = pmul(x, x);
213 Packet x3 = pmul(x2, x);
218 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
219 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
220 y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
221 y = pmadd(y, x, cst_cephes_log_p2);
222 y1 = pmadd(y1, x, cst_cephes_log_p5);
223 y2 = pmadd(y2, x, cst_cephes_log_p8);
224 y = pmadd(y, x3, y1);
225 y = pmadd(y, x3, y2);
228 y = pmadd(cst_neg_half, x2, y);
233 const Packet cst_log2e = pset1<Packet>(
static_cast<float>(EIGEN_LOG2E));
234 x = pmadd(x, cst_log2e, e);
236 const Packet cst_ln2 = pset1<Packet>(
static_cast<float>(EIGEN_LN2));
237 x = pmadd(e, cst_ln2, x);
240 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
241 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
242 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
247 return pselect(iszero_mask, cst_minus_inf,
248 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
251template <
typename Packet>
252EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
254Packet plog_float(
const Packet _x)
256 return plog_impl_float<Packet,
false>(_x);
259template <
typename Packet>
260EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
262Packet plog2_float(
const Packet _x)
264 return plog_impl_float<Packet,
true>(_x);
276template <
typename Packet,
bool base2>
277EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
279Packet plog_impl_double(
const Packet _x)
283 const Packet cst_1 = pset1<Packet>(1.0);
284 const Packet cst_neg_half = pset1<Packet>(-0.5);
286 const Packet cst_min_norm_pos = pset1frombits<Packet>(
static_cast<uint64_t
>(0x0010000000000000ull));
287 const Packet cst_minus_inf = pset1frombits<Packet>(
static_cast<uint64_t
>(0xfff0000000000000ull));
288 const Packet cst_pos_inf = pset1frombits<Packet>(
static_cast<uint64_t
>(0x7ff0000000000000ull));
293 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
294 const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
295 const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
296 const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
297 const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
298 const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
299 const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
301 const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
302 const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
303 const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
304 const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
305 const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
306 const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
309 x = pmax(x, cst_min_norm_pos);
322 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
323 Packet tmp = pand(x, mask);
325 e = psub(e, pand(cst_1, mask));
328 Packet x2 = pmul(x, x);
329 Packet x3 = pmul(x2, x);
334 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
335 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
336 y = pmadd(y, x, cst_cephes_log_p2);
337 y1 = pmadd(y1, x, cst_cephes_log_p5);
338 y_ = pmadd(y, x3, y1);
340 y = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
341 y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
342 y = pmadd(y, x, cst_cephes_log_q2);
343 y1 = pmadd(y1, x, cst_cephes_log_q5);
344 y = pmadd(y, x3, y1);
349 y = pmadd(cst_neg_half, x2, y);
354 const Packet cst_log2e = pset1<Packet>(
static_cast<double>(EIGEN_LOG2E));
355 x = pmadd(x, cst_log2e, e);
357 const Packet cst_ln2 = pset1<Packet>(
static_cast<double>(EIGEN_LN2));
358 x = pmadd(e, cst_ln2, x);
361 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
362 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
363 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
368 return pselect(iszero_mask, cst_minus_inf,
369 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
372template <
typename Packet>
373EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
375Packet plog_double(
const Packet _x)
377 return plog_impl_double<Packet,
false>(_x);
380template <
typename Packet>
381EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
383Packet plog2_double(
const Packet _x)
385 return plog_impl_double<Packet,
true>(_x);
391template<
typename Packet>
392Packet generic_plog1p(
const Packet& x)
394 typedef typename unpacket_traits<Packet>::type ScalarType;
395 const Packet one = pset1<Packet>(ScalarType(1));
396 Packet xp1 = padd(x, one);
397 Packet small_mask = pcmp_eq(xp1, one);
398 Packet log1 = plog(xp1);
399 Packet inf_mask = pcmp_eq(xp1, log1);
400 Packet log_large = pmul(x, pdiv(log1, psub(xp1, one)));
401 return pselect(por(small_mask, inf_mask), x, log_large);
407template<
typename Packet>
408Packet generic_expm1(
const Packet& x)
410 typedef typename unpacket_traits<Packet>::type ScalarType;
411 const Packet one = pset1<Packet>(ScalarType(1));
412 const Packet neg_one = pset1<Packet>(ScalarType(-1));
414 Packet one_mask = pcmp_eq(u, one);
415 Packet u_minus_one = psub(u, one);
416 Packet neg_one_mask = pcmp_eq(u_minus_one, neg_one);
417 Packet logu = plog(u);
422 Packet pos_inf_mask = pcmp_eq(logu, u);
423 Packet expm1 = pmul(u_minus_one, pdiv(x, logu));
424 expm1 = pselect(pos_inf_mask, u, expm1);
425 return pselect(one_mask,
427 pselect(neg_one_mask,
436template <
typename Packet>
437EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
439Packet pexp_float(
const Packet _x)
441 const Packet cst_1 = pset1<Packet>(1.0f);
442 const Packet cst_half = pset1<Packet>(0.5f);
443 const Packet cst_exp_hi = pset1<Packet>( 88.723f);
444 const Packet cst_exp_lo = pset1<Packet>(-88.723f);
446 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
447 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
448 const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
449 const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
450 const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
451 const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
452 const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
455 Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);
459 Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
464 const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
465 const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
466 Packet r = pmadd(m, cst_cephes_exp_C1, x);
467 r = pmadd(m, cst_cephes_exp_C2, r);
469 Packet r2 = pmul(r, r);
470 Packet r3 = pmul(r2, r);
474 y = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
475 y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
477 y = pmadd(y, r, cst_cephes_exp_p2);
478 y1 = pmadd(y1, r, cst_cephes_exp_p5);
479 y = pmadd(y, r3, y1);
480 y = pmadd(y, r2, y2);
484 return pmax(pldexp(y,m), _x);
487template <
typename Packet>
488EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
490Packet pexp_double(
const Packet _x)
494 const Packet cst_1 = pset1<Packet>(1.0);
495 const Packet cst_2 = pset1<Packet>(2.0);
496 const Packet cst_half = pset1<Packet>(0.5);
498 const Packet cst_exp_hi = pset1<Packet>(709.784);
499 const Packet cst_exp_lo = pset1<Packet>(-709.784);
501 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
502 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
503 const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
504 const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
505 const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
506 const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
507 const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
508 const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
509 const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
510 const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
515 x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);
517 fx = pmadd(cst_cephes_LOG2EF, x, cst_half);
525 tmp = pmul(fx, cst_cephes_exp_C1);
526 Packet z = pmul(fx, cst_cephes_exp_C2);
530 Packet x2 = pmul(x, x);
533 Packet px = cst_cephes_exp_p0;
534 px = pmadd(px, x2, cst_cephes_exp_p1);
535 px = pmadd(px, x2, cst_cephes_exp_p2);
539 Packet qx = cst_cephes_exp_q0;
540 qx = pmadd(qx, x2, cst_cephes_exp_q1);
541 qx = pmadd(qx, x2, cst_cephes_exp_q2);
542 qx = pmadd(qx, x2, cst_cephes_exp_q3);
547 x = pdiv(px, psub(qx, px));
548 x = pmadd(cst_2, x, cst_1);
553 return pmax(pldexp(x,fx), _x);
565inline float trig_reduce_huge (
float xf,
int *quadrant)
567 using Eigen::numext::int32_t;
568 using Eigen::numext::uint32_t;
569 using Eigen::numext::int64_t;
570 using Eigen::numext::uint64_t;
572 const double pio2_62 = 3.4061215800865545e-19;
573 const uint64_t zero_dot_five = uint64_t(1) << 61;
577 static const uint32_t two_over_pi [] =
579 0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
580 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
581 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
582 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
583 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
584 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
585 0x10e41000, 0xe4100000
588 uint32_t xi = numext::bit_cast<uint32_t>(xf);
593 uint32_t e = (xi >> 23) - 118;
595 xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);
598 uint32_t twoopi_1 = two_over_pi[i-1];
599 uint32_t twoopi_2 = two_over_pi[i+3];
600 uint32_t twoopi_3 = two_over_pi[i+7];
604 p = uint64_t(xi) * twoopi_3;
605 p = uint64_t(xi) * twoopi_2 + (p >> 32);
606 p = (uint64_t(xi * twoopi_1) << 32) + p;
609 uint64_t q = (p + zero_dot_five) >> 62;
616 return float(
double(int64_t(p)) * pio2_62);
619template<
bool ComputeSine,
typename Packet>
620EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
622#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
623__attribute__((optimize(
"-fno-unsafe-math-optimizations")))
625Packet psincos_float(
const Packet& _x)
627 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
629 const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f);
630 const Packet cst_rounding_magic = pset1<Packet>(12582912);
631 const PacketI csti_1 = pset1<PacketI>(1);
632 const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u);
637 Packet y = pmul(x, cst_2oPI);
640 Packet y_round = padd(y, cst_rounding_magic);
641 EIGEN_OPTIMIZATION_BARRIER(y_round)
642 PacketI y_int = preinterpret<PacketI>(y_round);
643 y = psub(y_round, cst_rounding_magic);
647 #if defined(EIGEN_VECTORIZE_FMA)
650 const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
651 x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);
652 x = pmadd(y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f), x);
653 x = pmadd(y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f), x);
661 const float huge_th = ComputeSine ? 25966.f : 18838.f;
662 x = pmadd(y, pset1<Packet>(-1.5703125), x);
663 EIGEN_OPTIMIZATION_BARRIER(x)
664 x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x);
665 EIGEN_OPTIMIZATION_BARRIER(x)
666 x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x);
667 x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x);
683 if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x))))
685 const int PacketSize = unpacket_traits<Packet>::size;
686 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
float vals[PacketSize];
687 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
float x_cpy[PacketSize];
688 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(Packet))
int y_int2[PacketSize];
689 pstoreu(vals, pabs(_x));
691 pstoreu(y_int2, y_int);
692 for(
int k=0; k<PacketSize;++k)
695 if(val>=huge_th && (numext::isfinite)(val))
696 x_cpy[k] = trig_reduce_huge(val,&y_int2[k]);
698 x = ploadu<Packet>(x_cpy);
699 y_int = ploadu<PacketI>(y_int2);
705 Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))
706 : preinterpret<Packet>(plogical_shift_left<30>(padd(y_int,csti_1)));
707 sign_bit = pand(sign_bit, cst_sign_mask);
711 Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));
713 Packet x2 = pmul(x,x);
716 Packet y1 = pset1<Packet>(2.4372266125283204019069671630859375e-05f);
717 y1 = pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f ));
718 y1 = pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f ));
719 y1 = pmadd(y1, x2, pset1<Packet>(-0.5f));
720 y1 = pmadd(y1, x2, pset1<Packet>(1.f));
730 Packet y2 = pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);
731 y2 = pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));
732 y2 = pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));
734 y2 = pmadd(y2, x, x);
737 y = ComputeSine ? pselect(poly_mask,y2,y1)
738 : pselect(poly_mask,y1,y2);
741 return pxor(y, sign_bit);
744template<
typename Packet>
745EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
747Packet psin_float(
const Packet& x)
749 return psincos_float<true>(x);
752template<
typename Packet>
753EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
755Packet pcos_float(
const Packet& x)
757 return psincos_float<false>(x);
760template<
typename Packet>
761EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
762EIGEN_UNUSED Packet pdiv_complex(
const Packet& x,
const Packet& y) {
763 typedef typename unpacket_traits<Packet>::as_real RealPacket;
767 const RealPacket y_abs = pabs(y.v);
768 const RealPacket y_abs_flip = pcplxflip(Packet(y_abs)).v;
769 const RealPacket y_max = pmax(y_abs, y_abs_flip);
770 const RealPacket y_scaled = pdiv(y.v, y_max);
772 const RealPacket y_scaled_sq = pmul(y_scaled, y_scaled);
773 const RealPacket denom = padd(y_scaled_sq, pcplxflip(Packet(y_scaled_sq)).v);
774 Packet result_scaled = pmul(x, pconj(Packet(y_scaled)));
776 result_scaled = Packet(pdiv(result_scaled.v, denom));
778 return Packet(pdiv(result_scaled.v, y_max));
781template<
typename Packet>
782EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
784Packet psqrt_complex(
const Packet& a) {
785 typedef typename unpacket_traits<Packet>::type Scalar;
786 typedef typename Scalar::value_type RealScalar;
787 typedef typename unpacket_traits<Packet>::as_real RealPacket;
825 RealPacket a_abs = pabs(a.v);
826 RealPacket a_abs_flip = pcplxflip(Packet(a_abs)).v;
827 RealPacket a_max = pmax(a_abs, a_abs_flip);
828 RealPacket a_min = pmin(a_abs, a_abs_flip);
829 RealPacket a_min_zero_mask = pcmp_eq(a_min, pzero(a_min));
830 RealPacket a_max_zero_mask = pcmp_eq(a_max, pzero(a_max));
831 RealPacket r = pdiv(a_min, a_max);
832 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
833 RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));
835 l = pselect(a_min_zero_mask, a_max, l);
840 const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
842 rho.v = psqrt(pmul(cst_half, padd(a_abs, l)));
847 RealPacket eta = pandnot(pmul(cst_half, pdiv(a.v, pcplxflip(rho).v)), a_max_zero_mask);
848 RealPacket real_mask = peven_mask(a.v);
849 Packet positive_real_result;
851 positive_real_result.v = pselect(real_mask, rho.v, eta);
855 const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
856 const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
857 RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
858 Packet negative_real_result;
860 negative_real_result.v = por(pabs(pcplxflip(positive_real_result).v), imag_signs);
863 Packet negative_real_mask;
864 negative_real_mask.v = pcmp_lt(pand(real_mask, a.v), pzero(a.v));
865 negative_real_mask.v = por(negative_real_mask.v, pcplxflip(negative_real_mask).v);
866 Packet result = pselect(negative_real_mask, negative_real_result, positive_real_result);
873 const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
875 is_inf.v = pcmp_eq(a_abs, cst_pos_inf);
877 is_real_inf.v = pand(is_inf.v, real_mask);
878 is_real_inf = por(is_real_inf, pcplxflip(is_real_inf));
880 Packet real_inf_result;
881 real_inf_result.v = pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).v);
882 real_inf_result.v = pselect(negative_real_mask.v, pcplxflip(real_inf_result).v, real_inf_result.v);
885 is_imag_inf.v = pandnot(is_inf.v, real_mask);
886 is_imag_inf = por(is_imag_inf, pcplxflip(is_imag_inf));
887 Packet imag_inf_result;
888 imag_inf_result.v = por(pand(cst_pos_inf, real_mask), pandnot(a.v, real_mask));
890 return pselect(is_imag_inf, imag_inf_result,
891 pselect(is_real_inf, real_inf_result,result));
901template<
typename Packet>
903void absolute_split(
const Packet& x, Packet& n, Packet& r) {
910template<
typename Packet>
912void fast_twosum(
const Packet& x,
const Packet& y, Packet& s_hi, Packet& s_lo) {
914 const Packet t = psub(s_hi, x);
918#ifdef EIGEN_VECTORIZE_FMA
923template<
typename Packet>
925void twoprod(
const Packet& x,
const Packet& y,
926 Packet& p_hi, Packet& p_lo) {
928 p_lo = pmadd(x, y, pnegate(p_hi));
938template<
typename Packet>
940void veltkamp_splitting(
const Packet& x, Packet& x_hi, Packet& x_lo) {
941 typedef typename unpacket_traits<Packet>::type Scalar;
942 EIGEN_CONSTEXPR
int shift = (NumTraits<Scalar>::digits() + 1) / 2;
943 const Scalar shift_scale = Scalar(uint64_t(1) << shift);
944 const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar(1)), x);
945 Packet rho = psub(x, gamma);
946 x_hi = padd(rho, gamma);
947 x_lo = psub(x, x_hi);
954template<
typename Packet>
956void twoprod(
const Packet& x,
const Packet& y,
957 Packet& p_hi, Packet& p_lo) {
958 Packet x_hi, x_lo, y_hi, y_lo;
959 veltkamp_splitting(x, x_hi, x_lo);
960 veltkamp_splitting(y, y_hi, y_lo);
963 p_lo = pmadd(x_hi, y_hi, pnegate(p_hi));
964 p_lo = pmadd(x_hi, y_lo, p_lo);
965 p_lo = pmadd(x_lo, y_hi, p_lo);
966 p_lo = pmadd(x_lo, y_lo, p_lo);
978template<
typename Packet>
980 void twosum(
const Packet& x_hi,
const Packet& x_lo,
981 const Packet& y_hi,
const Packet& y_lo,
982 Packet& s_hi, Packet& s_lo) {
983 const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
984 Packet r_hi_1, r_lo_1;
985 fast_twosum(x_hi, y_hi,r_hi_1, r_lo_1);
986 Packet r_hi_2, r_lo_2;
987 fast_twosum(y_hi, x_hi,r_hi_2, r_lo_2);
988 const Packet r_hi = pselect(x_greater_mask, r_hi_1, r_hi_2);
990 const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo);
991 const Packet s2 = padd(padd(x_lo, r_lo_2), y_lo);
992 const Packet s = pselect(x_greater_mask, s1, s2);
994 fast_twosum(r_hi, s, s_hi, s_lo);
999template<
typename Packet>
1001 void fast_twosum(
const Packet& x_hi,
const Packet& x_lo,
1002 const Packet& y_hi,
const Packet& y_lo,
1003 Packet& s_hi, Packet& s_lo) {
1005 fast_twosum(x_hi, y_hi, r_hi, r_lo);
1006 const Packet s = padd(padd(y_lo, r_lo), x_lo);
1007 fast_twosum(r_hi, s, s_hi, s_lo);
1013template<
typename Packet>
1015void fast_twosum(
const Packet& x,
1016 const Packet& y_hi,
const Packet& y_lo,
1017 Packet& s_hi, Packet& s_lo) {
1019 fast_twosum(x, y_hi, r_hi, r_lo);
1020 const Packet s = padd(y_lo, r_lo);
1021 fast_twosum(r_hi, s, s_hi, s_lo);
1032template<
typename Packet>
1034void twoprod(
const Packet& x_hi,
const Packet& x_lo,
const Packet& y,
1035 Packet& p_hi, Packet& p_lo) {
1037 twoprod(x_hi, y, c_hi, c_lo1);
1038 const Packet c_lo2 = pmul(x_lo, y);
1040 fast_twosum(c_hi, c_lo2, t_hi, t_lo1);
1041 const Packet t_lo2 = padd(t_lo1, c_lo1);
1042 fast_twosum(t_hi, t_lo2, p_hi, p_lo);
1051template<
typename Packet>
1053void twoprod(
const Packet& x_hi,
const Packet& x_lo,
1054 const Packet& y_hi,
const Packet& y_lo,
1055 Packet& p_hi, Packet& p_lo) {
1056 Packet p_hi_hi, p_hi_lo;
1057 twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
1058 Packet p_lo_hi, p_lo_lo;
1059 twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
1060 fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
1065template <
typename Packet>
1066void doubleword_reciprocal(
const Packet& x, Packet& recip_hi, Packet& recip_lo) {
1067 typedef typename unpacket_traits<Packet>::type Scalar;
1069 Packet approx_recip = prsqrt(x);
1070 approx_recip = pmul(approx_recip, approx_recip);
1077 Packet t1_hi, t1_lo;
1078 twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);
1080 Packet t2_hi, t2_lo;
1081 fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
1082 Packet t3_hi, t3_lo;
1083 fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);
1085 twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
1090template <
typename Scalar>
1091struct accurate_log2 {
1092 template <
typename Packet>
1094 void operator()(
const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1095 log2_x_hi = plog2(x);
1096 log2_x_lo = pzero(x);
1107struct accurate_log2<float> {
1108 template <
typename Packet>
1110 void operator()(
const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
1126 const Packet p6 = pset1<Packet>( 9.703654795885e-2f);
1127 const Packet p5 = pset1<Packet>(-0.1690667718648f);
1128 const Packet p4 = pset1<Packet>( 0.1720575392246f);
1129 const Packet p3 = pset1<Packet>(-0.1789081543684f);
1130 const Packet p2 = pset1<Packet>( 0.2050433009862f);
1131 const Packet p1 = pset1<Packet>(-0.2404672354459f);
1132 const Packet p0 = pset1<Packet>( 0.2885761857032f);
1134 const Packet C3_hi = pset1<Packet>(-0.360674142838f);
1135 const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);
1136 const Packet C2_hi = pset1<Packet>(0.480897903442f);
1137 const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);
1138 const Packet C1_hi = pset1<Packet>(-0.721347510815f);
1139 const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);
1140 const Packet C0_hi = pset1<Packet>(1.44269502163f);
1141 const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);
1142 const Packet one = pset1<Packet>(1.0f);
1144 const Packet x = psub(z, one);
1148 Packet x2 = pmul(x,x);
1149 Packet p_even = pmadd(p6, x2, p4);
1150 p_even = pmadd(p_even, x2, p2);
1151 p_even = pmadd(p_even, x2, p0);
1152 Packet p_odd = pmadd(p5, x2, p3);
1153 p_odd = pmadd(p_odd, x2, p1);
1154 Packet p = pmadd(p_odd, x, p_even);
1163 twoprod(p, x, t_hi, t_lo);
1164 fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
1166 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1167 fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
1169 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1170 fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);
1172 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1173 fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);
1176 twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo);
1188struct accurate_log2<double> {
1189 template <
typename Packet>
1191 void operator()(
const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1213 const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
1214 const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
1215 const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
1216 const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
1217 const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
1218 const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
1219 const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
1220 const Packet C_hi = pset1<Packet>(0.0400377511598501157);
1221 const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
1222 const Packet one = pset1<Packet>(1.0);
1224 const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
1225 const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
1227 Packet num_hi, num_lo;
1228 twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
1232 Packet denom_hi, denom_lo;
1233 doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
1236 twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
1238 Packet r2_hi, r2_lo;
1239 twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
1241 Packet r4_hi, r4_lo;
1242 twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
1246 Packet q_even = pmadd(q12, r4_hi, q8);
1247 Packet q_odd = pmadd(q10, r4_hi, q6);
1248 q_even = pmadd(q_even, r4_hi, q4);
1249 q_odd = pmadd(q_odd, r4_hi, q2);
1250 q_even = pmadd(q_even, r4_hi, q0);
1251 Packet q = pmadd(q_odd, r2_hi, q_even);
1259 twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
1261 Packet p1_hi, p1_lo;
1262 fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
1264 Packet p2_hi, p2_lo;
1265 twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
1267 Packet p3_hi, p3_lo;
1268 fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);
1271 twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
1276template <
typename Scalar>
1277struct fast_accurate_exp2 {
1278 template <
typename Packet>
1280 Packet operator()(
const Packet& x) {
1282 return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));
1291struct fast_accurate_exp2<float> {
1292 template <
typename Packet>
1294 Packet operator()(
const Packet& x) {
1306 const Packet p4 = pset1<Packet>(1.539513905e-4f);
1307 const Packet p3 = pset1<Packet>(1.340007293e-3f);
1308 const Packet p2 = pset1<Packet>(9.618283249e-3f);
1309 const Packet p1 = pset1<Packet>(5.550328270e-2f);
1310 const Packet p0 = pset1<Packet>(0.2402264923f);
1312 const Packet C_hi = pset1<Packet>(0.6931471825f);
1313 const Packet C_lo = pset1<Packet>(2.36836577e-08f);
1314 const Packet one = pset1<Packet>(1.0f);
1319 Packet x2 = pmul(x,x);
1320 Packet p_even = pmadd(p4, x2, p2);
1321 Packet p_odd = pmadd(p3, x2, p1);
1322 p_even = pmadd(p_even, x2, p0);
1323 Packet p = pmadd(p_odd, x, p_even);
1329 twoprod(p, x, p_hi, p_lo);
1331 Packet q1_hi, q1_lo;
1332 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1334 Packet q2_hi, q2_lo;
1335 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1337 Packet q3_hi, q3_lo;
1340 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1341 return padd(q3_hi, padd(q2_lo, q3_lo));
1349struct fast_accurate_exp2<double> {
1350 template <
typename Packet>
1352 Packet operator()(
const Packet& x) {
1364 const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
1365 const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
1366 const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
1367 const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
1368 const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
1369 const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
1370 const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
1371 const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
1372 const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
1373 const Packet p0 = pset1<Packet>(0.240226506959101332);
1374 const Packet C_hi = pset1<Packet>(0.693147180559945286);
1375 const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
1376 const Packet one = pset1<Packet>(1.0);
1381 Packet x2 = pmul(x,x);
1382 Packet p_even = pmadd(p8, x2, p6);
1383 Packet p_odd = pmadd(p9, x2, p7);
1384 p_even = pmadd(p_even, x2, p4);
1385 p_odd = pmadd(p_odd, x2, p5);
1386 p_even = pmadd(p_even, x2, p2);
1387 p_odd = pmadd(p_odd, x2, p3);
1388 p_even = pmadd(p_even, x2, p0);
1389 p_odd = pmadd(p_odd, x2, p1);
1390 Packet p = pmadd(p_odd, x, p_even);
1396 twoprod(p, x, p_hi, p_lo);
1398 Packet q1_hi, q1_lo;
1399 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1401 Packet q2_hi, q2_lo;
1402 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1404 Packet q3_hi, q3_lo;
1407 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1408 return padd(q3_hi, padd(q2_lo, q3_lo));
1417template <
typename Packet>
1418EIGEN_STRONG_INLINE Packet generic_pow_impl(
const Packet& x,
const Packet& y) {
1419 typedef typename unpacket_traits<Packet>::type Scalar;
1422 Packet m_x = pfrexp(x, e_x);
1425 EIGEN_CONSTEXPR Scalar sqrt_half = Scalar(0.70710678118654752440);
1426 const Packet m_x_scale_mask = pcmp_lt(m_x, pset1<Packet>(sqrt_half));
1427 m_x = pselect(m_x_scale_mask, pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
1428 e_x = pselect(m_x_scale_mask, psub(e_x, pset1<Packet>(Scalar(1))), e_x);
1431 Packet rx_hi, rx_lo;
1432 accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);
1436 Packet f1_hi, f1_lo, f2_hi, f2_lo;
1437 twoprod(e_x, y, f1_hi, f1_lo);
1438 twoprod(rx_hi, rx_lo, y, f2_hi, f2_lo);
1446 fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);
1450 absolute_split(f_hi, n_z, r_z);
1451 r_z = padd(r_z, f_lo);
1453 absolute_split(r_z, n_r, r_z);
1454 n_z = padd(n_z, n_r);
1461 const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
1462 return pldexp(e_r, n_z);
1466template <
typename Packet>
1467EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(
const Packet& x,
const Packet& y) {
1468 typedef typename unpacket_traits<Packet>::type Scalar;
1470 const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
1471 const Packet cst_neg_inf = pset1<Packet>(-NumTraits<Scalar>::infinity());
1472 const Packet cst_zero = pset1<Packet>(Scalar(0));
1473 const Packet cst_one = pset1<Packet>(Scalar(1));
1474 const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
1476 const Packet abs_x = pabs(x);
1478 const Packet abs_x_is_zero = pcmp_eq(abs_x, cst_zero);
1479 const Packet x_has_signbit = pcmp_eq(por(pand(x, cst_neg_inf), cst_pos_inf), cst_neg_inf);
1480 const Packet x_is_neg = pandnot(x_has_signbit, abs_x_is_zero);
1481 const Packet x_is_neg_zero = pand(x_has_signbit, abs_x_is_zero);
1482 const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
1483 const Packet abs_x_is_one = pcmp_eq(abs_x, cst_one);
1484 const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);
1485 const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
1486 const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg);
1487 const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg);
1488 const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));
1491 const Packet abs_y = pabs(y);
1492 const Packet y_is_one = pcmp_eq(y, cst_one);
1493 const Packet abs_y_is_zero = pcmp_eq(abs_y, cst_zero);
1494 const Packet y_is_neg = pcmp_lt(y, cst_zero);
1495 const Packet y_is_pos = pandnot(ptrue(y), por(abs_y_is_zero, y_is_neg));
1496 const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
1497 const Packet abs_y_is_inf = pcmp_eq(abs_y, cst_pos_inf);
1498 EIGEN_CONSTEXPR Scalar huge_exponent =
1499 (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) / NumTraits<Scalar>::epsilon();
1500 const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));
1503 const Packet y_is_int = pcmp_eq(pfloor(y), y);
1504 const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5)));
1505 const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
1508 const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf), y_is_int), abs_y_is_inf);
1509 const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));
1510 const Packet pow_is_one =
1511 por(por(x_is_one, abs_y_is_zero), pand(x_is_neg_one, por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
1512 const Packet pow_is_zero = por(por(por(pand(abs_x_is_zero, y_is_pos), pand(abs_x_is_inf, y_is_neg)),
1513 pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_pos)),
1514 pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_neg));
1515 const Packet pow_is_inf = por(por(por(pand(abs_x_is_zero, y_is_neg), pand(abs_x_is_inf, y_is_pos)),
1516 pand(pand(abs_x_is_lt_one, abs_y_is_huge), y_is_neg)),
1517 pand(pand(abs_x_is_gt_one, abs_y_is_huge), y_is_pos));
1518 const Packet inf_val =
1519 pselect(pandnot(pand(por(pand(abs_x_is_inf, x_is_neg), pand(x_is_neg_zero, y_is_neg)), y_is_int), y_is_even),
1520 cst_neg_inf, cst_pos_inf);
1523 const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);
1524 const Packet pow_abs = generic_pow_impl(abs_x, y);
1527 pselect(pow_is_one, cst_one,
1528 pselect(pow_is_nan, cst_nan,
1529 pselect(pow_is_inf, inf_val,
1530 pselect(pow_is_zero, cst_zero, pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
1572template <
typename Packet,
int N>
1574 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet& x,
const typename unpacket_traits<Packet>::type coeff[]) {
1575 EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
1576 return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
1580template <
typename Packet>
1581struct ppolevl<Packet, 0> {
1582 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet& x,
const typename unpacket_traits<Packet>::type coeff[]) {
1583 EIGEN_UNUSED_VARIABLE(x);
1584 return pset1<Packet>(coeff[0]);
1640template <
typename Packet,
int N>
1643 static EIGEN_STRONG_INLINE Packet run(Packet x,
const typename unpacket_traits<Packet>::type coef[]) {
1644 typedef typename unpacket_traits<Packet>::type Scalar;
1645 Packet b0 = pset1<Packet>(coef[0]);
1646 Packet b1 = pset1<Packet>(
static_cast<Scalar
>(0.f));
1649 for (
int i = 1; i < N; i++) {
1652 b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);
1655 return pmul(pset1<Packet>(
static_cast<Scalar
>(0.5f)), psub(b0, b2));
Namespace containing all symbols from the Eigen library.
Definition A05_PortingFrom2To3.dox:1