Eigen  3.4.90 (git rev 9589cc4e7fd8e4538bedef80dd36c7738977a8be)
 
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
Loading...
Searching...
No Matches
Complex.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_COMPLEX_AVX_H
11#define EIGEN_COMPLEX_AVX_H
12
13// IWYU pragma: private
14#include "../../InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20//---------- float ----------
21struct Packet4cf {
22 EIGEN_STRONG_INLINE Packet4cf() {}
23 EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
24 __m256 v;
25};
26
27#ifndef EIGEN_VECTORIZE_AVX512
28template <>
29struct packet_traits<std::complex<float> > : default_packet_traits {
30 typedef Packet4cf type;
31 typedef Packet2cf half;
32 enum {
33 Vectorizable = 1,
34 AlignedOnScalar = 1,
35 size = 4,
36
37 HasAdd = 1,
38 HasSub = 1,
39 HasMul = 1,
40 HasDiv = 1,
41 HasNegate = 1,
42 HasSqrt = 1,
43 HasLog = 1,
44 HasExp = 1,
45 HasAbs = 0,
46 HasAbs2 = 0,
47 HasMin = 0,
48 HasMax = 0,
49 HasSetLinear = 0
50 };
51};
52#endif
53
54template <>
55struct unpacket_traits<Packet4cf> {
56 typedef std::complex<float> type;
57 typedef Packet2cf half;
58 typedef Packet8f as_real;
59 enum {
60 size = 4,
61 alignment = Aligned32,
62 vectorizable = true,
63 masked_load_available = false,
64 masked_store_available = false
65 };
66};
67
68template <>
69EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
70 return Packet4cf(_mm256_add_ps(a.v, b.v));
71}
72template <>
73EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
74 return Packet4cf(_mm256_sub_ps(a.v, b.v));
75}
76template <>
77EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a) {
78 return Packet4cf(pnegate(a.v));
79}
80template <>
81EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) {
82 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000, 0x80000000, 0x00000000, 0x80000000, 0x00000000,
83 0x80000000, 0x00000000, 0x80000000));
84 return Packet4cf(_mm256_xor_ps(a.v, mask));
85}
86
87template <>
88EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) {
89 __m256 tmp1 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)));
90 __m256 tmp2 = _mm256_moveldup_ps(a.v);
91#ifdef EIGEN_VECTORIZE_FMA
92 __m256 result = _mm256_fmaddsub_ps(tmp2, b.v, tmp1);
93#else
94 __m256 result = _mm256_addsub_ps(_mm256_mul_ps(tmp2, b.v), tmp1);
95#endif
96 return Packet4cf(result);
97}
98
99template <>
100EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
101 __m256 eq = _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
102 return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
103}
104
105template <>
106EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) {
107 return Packet4cf(ptrue(Packet8f(a.v)));
108}
109template <>
110EIGEN_STRONG_INLINE Packet4cf pand<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
111 return Packet4cf(_mm256_and_ps(a.v, b.v));
112}
113template <>
114EIGEN_STRONG_INLINE Packet4cf por<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
115 return Packet4cf(_mm256_or_ps(a.v, b.v));
116}
117template <>
118EIGEN_STRONG_INLINE Packet4cf pxor<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
119 return Packet4cf(_mm256_xor_ps(a.v, b.v));
120}
121template <>
122EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
123 return Packet4cf(_mm256_andnot_ps(b.v, a.v));
124}
125
126template <>
127EIGEN_STRONG_INLINE Packet4cf pload<Packet4cf>(const std::complex<float>* from) {
128 EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(_mm256_load_ps(&numext::real_ref(*from)));
129}
130template <>
131EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) {
132 EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(_mm256_loadu_ps(&numext::real_ref(*from)));
133}
134
135template <>
136EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from) {
137 const float re = std::real(from);
138 const float im = std::imag(from);
139 return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
140}
141
142template <>
143EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from) {
144 // FIXME The following might be optimized using _mm256_movedup_pd
145 Packet2cf a = ploaddup<Packet2cf>(from);
146 Packet2cf b = ploaddup<Packet2cf>(from + 1);
147 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
148}
149
150template <>
151EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
152 EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(&numext::real_ref(*to), from.v);
153}
154template <>
155EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) {
156 EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(&numext::real_ref(*to), from.v);
157}
158
159template <>
160EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from,
161 Index stride) {
162 return Packet4cf(_mm256_set_ps(std::imag(from[3 * stride]), std::real(from[3 * stride]), std::imag(from[2 * stride]),
163 std::real(from[2 * stride]), std::imag(from[1 * stride]), std::real(from[1 * stride]),
164 std::imag(from[0 * stride]), std::real(from[0 * stride])));
165}
166
167template <>
168EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from,
169 Index stride) {
170 __m128 low = _mm256_extractf128_ps(from.v, 0);
171 to[stride * 0] =
172 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
173 to[stride * 1] =
174 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)), _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
175
176 __m128 high = _mm256_extractf128_ps(from.v, 1);
177 to[stride * 2] =
178 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
179 to[stride * 3] =
180 std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)), _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
181}
182
183template <>
184EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a) {
185 return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
186}
187
188template <>
189EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
190 __m128 low = _mm256_extractf128_ps(a.v, 0);
191 __m128 high = _mm256_extractf128_ps(a.v, 1);
192 __m128d lowd = _mm_castps_pd(low);
193 __m128d highd = _mm_castps_pd(high);
194 low = _mm_castpd_ps(_mm_shuffle_pd(lowd, lowd, 0x1));
195 high = _mm_castpd_ps(_mm_shuffle_pd(highd, highd, 0x1));
196 __m256 result = _mm256_setzero_ps();
197 result = _mm256_insertf128_ps(result, low, 1);
198 result = _mm256_insertf128_ps(result, high, 0);
199 return Packet4cf(result);
200}
201
202template <>
203EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a) {
204 return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
205}
206
207template <>
208EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a) {
209 return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)), Packet2cf(_mm256_extractf128_ps(a.v, 1))));
210}
211
212EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf, Packet8f)
213
214template <>
215EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) {
216 return pdiv_complex(a, b);
217}
218
219template <>
220EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x) {
221 return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0, 1)));
222}
223
224//---------- double ----------
225struct Packet2cd {
226 EIGEN_STRONG_INLINE Packet2cd() {}
227 EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
228 __m256d v;
229};
230
231#ifndef EIGEN_VECTORIZE_AVX512
232template <>
233struct packet_traits<std::complex<double> > : default_packet_traits {
234 typedef Packet2cd type;
235 typedef Packet1cd half;
236 enum {
237 Vectorizable = 1,
238 AlignedOnScalar = 0,
239 size = 2,
240
241 HasAdd = 1,
242 HasSub = 1,
243 HasMul = 1,
244 HasDiv = 1,
245 HasNegate = 1,
246 HasSqrt = 1,
247 HasLog = 1,
248 HasAbs = 0,
249 HasAbs2 = 0,
250 HasMin = 0,
251 HasMax = 0,
252 HasSetLinear = 0
253 };
254};
255#endif
256
257template <>
258struct unpacket_traits<Packet2cd> {
259 typedef std::complex<double> type;
260 typedef Packet1cd half;
261 typedef Packet4d as_real;
262 enum {
263 size = 2,
264 alignment = Aligned32,
265 vectorizable = true,
266 masked_load_available = false,
267 masked_store_available = false
268 };
269};
270
271template <>
272EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
273 return Packet2cd(_mm256_add_pd(a.v, b.v));
274}
275template <>
276EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
277 return Packet2cd(_mm256_sub_pd(a.v, b.v));
278}
279template <>
280EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) {
281 return Packet2cd(pnegate(a.v));
282}
283template <>
284EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) {
285 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000, 0x0, 0x0, 0x0, 0x80000000, 0x0, 0x0, 0x0));
286 return Packet2cd(_mm256_xor_pd(a.v, mask));
287}
288
289template <>
290EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) {
291 __m256d tmp1 = _mm256_mul_pd(_mm256_permute_pd(a.v, 0xF), _mm256_permute_pd(b.v, 0x5));
292 __m256d tmp2 = _mm256_movedup_pd(a.v);
293#ifdef EIGEN_VECTORIZE_FMA
294 __m256d result = _mm256_fmaddsub_pd(tmp2, b.v, tmp1);
295#else
296 __m256d result = _mm256_addsub_pd(_mm256_mul_pd(tmp2, b.v), tmp1);
297#endif
298 return Packet2cd(result);
299}
300
301template <>
302EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
303 __m256d eq = _mm256_cmp_pd(a.v, b.v, _CMP_EQ_OQ);
304 return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
305}
306
307template <>
308EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) {
309 return Packet2cd(ptrue(Packet4d(a.v)));
310}
311template <>
312EIGEN_STRONG_INLINE Packet2cd pand<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
313 return Packet2cd(_mm256_and_pd(a.v, b.v));
314}
315template <>
316EIGEN_STRONG_INLINE Packet2cd por<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
317 return Packet2cd(_mm256_or_pd(a.v, b.v));
318}
319template <>
320EIGEN_STRONG_INLINE Packet2cd pxor<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
321 return Packet2cd(_mm256_xor_pd(a.v, b.v));
322}
323template <>
324EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
325 return Packet2cd(_mm256_andnot_pd(b.v, a.v));
326}
327
328template <>
329EIGEN_STRONG_INLINE Packet2cd pload<Packet2cd>(const std::complex<double>* from) {
330 EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(_mm256_load_pd((const double*)from));
331}
332template <>
333EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from) {
334 EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(_mm256_loadu_pd((const double*)from));
335}
336
337template <>
338EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from) {
339 // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
340 // return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
341 return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
342}
343
344template <>
345EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) {
346 return pset1<Packet2cd>(*from);
347}
348
349template <>
350EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
351 EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd((double*)to, from.v);
352}
353template <>
354EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet2cd& from) {
355 EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd((double*)to, from.v);
356}
357
358template <>
359EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from,
360 Index stride) {
361 return Packet2cd(_mm256_set_pd(std::imag(from[1 * stride]), std::real(from[1 * stride]), std::imag(from[0 * stride]),
362 std::real(from[0 * stride])));
363}
364
365template <>
366EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from,
367 Index stride) {
368 __m128d low = _mm256_extractf128_pd(from.v, 0);
369 to[stride * 0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
370 __m128d high = _mm256_extractf128_pd(from.v, 1);
371 to[stride * 1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
372}
373
374template <>
375EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a) {
376 __m128d low = _mm256_extractf128_pd(a.v, 0);
377 EIGEN_ALIGN16 double res[2];
378 _mm_store_pd(res, low);
379 return std::complex<double>(res[0], res[1]);
380}
381
382template <>
383EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
384 __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
385 return Packet2cd(result);
386}
387
388template <>
389EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a) {
390 return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
391}
392
393template <>
394EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a) {
395 return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v, 0)), Packet1cd(_mm256_extractf128_pd(a.v, 1))));
396}
397
398EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd, Packet4d)
399
400template <>
401EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) {
402 return pdiv_complex(a, b);
403}
404
405template <>
406EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x) {
407 return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
408}
409
410EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4cf, 4>& kernel) {
411 __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
412 __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
413 __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
414 __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
415
416 __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
417 __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
418 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
419 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
420
421 kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
422 kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
423 kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
424 kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
425}
426
427EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cd, 2>& kernel) {
428 __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0 + (2 << 4));
429 kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1 + (3 << 4));
430 kernel.packet[0].v = tmp;
431}
432
433template <>
434EIGEN_STRONG_INLINE Packet2cd psqrt<Packet2cd>(const Packet2cd& a) {
435 return psqrt_complex<Packet2cd>(a);
436}
437
438template <>
439EIGEN_STRONG_INLINE Packet4cf psqrt<Packet4cf>(const Packet4cf& a) {
440 return psqrt_complex<Packet4cf>(a);
441}
442
443template <>
444EIGEN_STRONG_INLINE Packet2cd plog<Packet2cd>(const Packet2cd& a) {
445 return plog_complex<Packet2cd>(a);
446}
447
448template <>
449EIGEN_STRONG_INLINE Packet4cf plog<Packet4cf>(const Packet4cf& a) {
450 return plog_complex<Packet4cf>(a);
451}
452
453template <>
454EIGEN_STRONG_INLINE Packet4cf pexp<Packet4cf>(const Packet4cf& a) {
455 return pexp_complex<Packet4cf>(a);
456}
457
458#ifdef EIGEN_VECTORIZE_FMA
459// std::complex<float>
460template <>
461EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
462 __m256 a_odd = _mm256_movehdup_ps(a.v);
463 __m256 a_even = _mm256_moveldup_ps(a.v);
464 __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
465 __m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmaddsub_ps(a_odd, b_swap, c.v));
466 return Packet4cf(result);
467}
468template <>
469EIGEN_STRONG_INLINE Packet4cf pmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
470 __m256 a_odd = _mm256_movehdup_ps(a.v);
471 __m256 a_even = _mm256_moveldup_ps(a.v);
472 __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
473 __m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmsubadd_ps(a_odd, b_swap, c.v));
474 return Packet4cf(result);
475}
476template <>
477EIGEN_STRONG_INLINE Packet4cf pnmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
478 __m256 a_odd = _mm256_movehdup_ps(a.v);
479 __m256 a_even = _mm256_moveldup_ps(a.v);
480 __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
481 __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmaddsub_ps(a_even, b.v, c.v));
482 return Packet4cf(result);
483}
484template <>
485EIGEN_STRONG_INLINE Packet4cf pnmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) {
486 __m256 a_odd = _mm256_movehdup_ps(a.v);
487 __m256 a_even = _mm256_moveldup_ps(a.v);
488 __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1));
489 __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmsubadd_ps(a_even, b.v, c.v));
490 return Packet4cf(result);
491}
492// std::complex<double>
493template <>
494EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
495 __m256d a_odd = _mm256_permute_pd(a.v, 0xF);
496 __m256d a_even = _mm256_movedup_pd(a.v);
497 __m256d b_swap = _mm256_permute_pd(b.v, 0x5);
498 __m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmaddsub_pd(a_odd, b_swap, c.v));
499 return Packet2cd(result);
500}
501template <>
502EIGEN_STRONG_INLINE Packet2cd pmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
503 __m256d a_odd = _mm256_permute_pd(a.v, 0xF);
504 __m256d a_even = _mm256_movedup_pd(a.v);
505 __m256d b_swap = _mm256_permute_pd(b.v, 0x5);
506 __m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmsubadd_pd(a_odd, b_swap, c.v));
507 return Packet2cd(result);
508}
509template <>
510EIGEN_STRONG_INLINE Packet2cd pnmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
511 __m256d a_odd = _mm256_permute_pd(a.v, 0xF);
512 __m256d a_even = _mm256_movedup_pd(a.v);
513 __m256d b_swap = _mm256_permute_pd(b.v, 0x5);
514 __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmaddsub_pd(a_even, b.v, c.v));
515 return Packet2cd(result);
516}
517template <>
518EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) {
519 __m256d a_odd = _mm256_permute_pd(a.v, 0xF);
520 __m256d a_even = _mm256_movedup_pd(a.v);
521 __m256d b_swap = _mm256_permute_pd(b.v, 0x5);
522 __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmsubadd_pd(a_even, b.v, c.v));
523 return Packet2cd(result);
524}
525#endif
526} // end namespace internal
527
528} // end namespace Eigen
529
530#endif // EIGEN_COMPLEX_AVX_H
@ Aligned32
Definition Constants.h:238
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:82