10#ifndef EIGEN_REDUCTIONS_AVX_H
11#define EIGEN_REDUCTIONS_AVX_H
14#include "../../InternalHeaderCheck.h"
23EIGEN_STRONG_INLINE
int predux(
const Packet8i& a) {
24 Packet4i lo = _mm256_castsi256_si128(a);
25 Packet4i hi = _mm256_extractf128_si256(a, 1);
26 return predux(padd(lo, hi));
30EIGEN_STRONG_INLINE
int predux_mul(
const Packet8i& a) {
31 Packet4i lo = _mm256_castsi256_si128(a);
32 Packet4i hi = _mm256_extractf128_si256(a, 1);
33 return predux_mul(pmul(lo, hi));
37EIGEN_STRONG_INLINE
int predux_min(
const Packet8i& a) {
38 Packet4i lo = _mm256_castsi256_si128(a);
39 Packet4i hi = _mm256_extractf128_si256(a, 1);
40 return predux_min(pmin(lo, hi));
44EIGEN_STRONG_INLINE
int predux_max(
const Packet8i& a) {
45 Packet4i lo = _mm256_castsi256_si128(a);
46 Packet4i hi = _mm256_extractf128_si256(a, 1);
47 return predux_max(pmax(lo, hi));
51EIGEN_STRONG_INLINE
bool predux_any(
const Packet8i& a) {
52#ifdef EIGEN_VECTORIZE_AVX2
53 return _mm256_movemask_epi8(a) != 0x0;
55 return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
62EIGEN_STRONG_INLINE uint32_t predux(
const Packet8ui& a) {
63 Packet4ui lo = _mm256_castsi256_si128(a);
64 Packet4ui hi = _mm256_extractf128_si256(a, 1);
65 return predux(padd(lo, hi));
69EIGEN_STRONG_INLINE uint32_t predux_mul(
const Packet8ui& a) {
70 Packet4ui lo = _mm256_castsi256_si128(a);
71 Packet4ui hi = _mm256_extractf128_si256(a, 1);
72 return predux_mul(pmul(lo, hi));
76EIGEN_STRONG_INLINE uint32_t predux_min(
const Packet8ui& a) {
77 Packet4ui lo = _mm256_castsi256_si128(a);
78 Packet4ui hi = _mm256_extractf128_si256(a, 1);
79 return predux_min(pmin(lo, hi));
83EIGEN_STRONG_INLINE uint32_t predux_max(
const Packet8ui& a) {
84 Packet4ui lo = _mm256_castsi256_si128(a);
85 Packet4ui hi = _mm256_extractf128_si256(a, 1);
86 return predux_max(pmax(lo, hi));
90EIGEN_STRONG_INLINE
bool predux_any(
const Packet8ui& a) {
91#ifdef EIGEN_VECTORIZE_AVX2
92 return _mm256_movemask_epi8(a) != 0x0;
94 return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
98#ifdef EIGEN_VECTORIZE_AVX2
103EIGEN_STRONG_INLINE int64_t predux(
const Packet4l& a) {
104 Packet2l lo = _mm256_castsi256_si128(a);
105 Packet2l hi = _mm256_extractf128_si256(a, 1);
106 return predux(padd(lo, hi));
110EIGEN_STRONG_INLINE
bool predux_any(
const Packet4l& a) {
111 return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
117EIGEN_STRONG_INLINE uint64_t predux(
const Packet4ul& a) {
118 return static_cast<uint64_t
>(predux(Packet4l(a)));
122EIGEN_STRONG_INLINE
bool predux_any(
const Packet4ul& a) {
123 return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
131EIGEN_STRONG_INLINE
float predux(
const Packet8f& a) {
132 Packet4f lo = _mm256_castps256_ps128(a);
133 Packet4f hi = _mm256_extractf128_ps(a, 1);
134 return predux(padd(lo, hi));
138EIGEN_STRONG_INLINE
float predux_mul(
const Packet8f& a) {
139 Packet4f lo = _mm256_castps256_ps128(a);
140 Packet4f hi = _mm256_extractf128_ps(a, 1);
141 return predux_mul(pmul(lo, hi));
145EIGEN_STRONG_INLINE
float predux_min(
const Packet8f& a) {
146 Packet4f lo = _mm256_castps256_ps128(a);
147 Packet4f hi = _mm256_extractf128_ps(a, 1);
148 return predux_min(pmin(lo, hi));
152EIGEN_STRONG_INLINE
float predux_min<PropagateNumbers>(
const Packet8f& a) {
153 Packet4f lo = _mm256_castps256_ps128(a);
154 Packet4f hi = _mm256_extractf128_ps(a, 1);
155 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
159EIGEN_STRONG_INLINE
float predux_min<PropagateNaN>(
const Packet8f& a) {
160 Packet4f lo = _mm256_castps256_ps128(a);
161 Packet4f hi = _mm256_extractf128_ps(a, 1);
162 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
166EIGEN_STRONG_INLINE
float predux_max(
const Packet8f& a) {
167 Packet4f lo = _mm256_castps256_ps128(a);
168 Packet4f hi = _mm256_extractf128_ps(a, 1);
169 return predux_max(pmax(lo, hi));
173EIGEN_STRONG_INLINE
float predux_max<PropagateNumbers>(
const Packet8f& a) {
174 Packet4f lo = _mm256_castps256_ps128(a);
175 Packet4f hi = _mm256_extractf128_ps(a, 1);
176 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
180EIGEN_STRONG_INLINE
float predux_max<PropagateNaN>(
const Packet8f& a) {
181 Packet4f lo = _mm256_castps256_ps128(a);
182 Packet4f hi = _mm256_extractf128_ps(a, 1);
183 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
187EIGEN_STRONG_INLINE
bool predux_any(
const Packet8f& a) {
188 return _mm256_movemask_ps(a) != 0x0;
194EIGEN_STRONG_INLINE
double predux(
const Packet4d& a) {
195 Packet2d lo = _mm256_castpd256_pd128(a);
196 Packet2d hi = _mm256_extractf128_pd(a, 1);
197 return predux(padd(lo, hi));
201EIGEN_STRONG_INLINE
double predux_mul(
const Packet4d& a) {
202 Packet2d lo = _mm256_castpd256_pd128(a);
203 Packet2d hi = _mm256_extractf128_pd(a, 1);
204 return predux_mul(pmul(lo, hi));
208EIGEN_STRONG_INLINE
double predux_min(
const Packet4d& a) {
209 Packet2d lo = _mm256_castpd256_pd128(a);
210 Packet2d hi = _mm256_extractf128_pd(a, 1);
211 return predux_min(pmin(lo, hi));
215EIGEN_STRONG_INLINE
double predux_min<PropagateNumbers>(
const Packet4d& a) {
216 Packet2d lo = _mm256_castpd256_pd128(a);
217 Packet2d hi = _mm256_extractf128_pd(a, 1);
218 return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
222EIGEN_STRONG_INLINE
double predux_min<PropagateNaN>(
const Packet4d& a) {
223 Packet2d lo = _mm256_castpd256_pd128(a);
224 Packet2d hi = _mm256_extractf128_pd(a, 1);
225 return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
229EIGEN_STRONG_INLINE
double predux_max(
const Packet4d& a) {
230 Packet2d lo = _mm256_castpd256_pd128(a);
231 Packet2d hi = _mm256_extractf128_pd(a, 1);
232 return predux_max(pmax(lo, hi));
236EIGEN_STRONG_INLINE
double predux_max<PropagateNumbers>(
const Packet4d& a) {
237 Packet2d lo = _mm256_castpd256_pd128(a);
238 Packet2d hi = _mm256_extractf128_pd(a, 1);
239 return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
243EIGEN_STRONG_INLINE
double predux_max<PropagateNaN>(
const Packet4d& a) {
244 Packet2d lo = _mm256_castpd256_pd128(a);
245 Packet2d hi = _mm256_extractf128_pd(a, 1);
246 return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
250EIGEN_STRONG_INLINE
bool predux_any(
const Packet4d& a) {
251 return _mm256_movemask_pd(a) != 0x0;
255#ifndef EIGEN_VECTORIZE_AVX512FP16
258EIGEN_STRONG_INLINE half predux(
const Packet8h& a) {
259 return static_cast<half
>(predux(half2float(a)));
263EIGEN_STRONG_INLINE half predux_mul(
const Packet8h& a) {
264 return static_cast<half
>(predux_mul(half2float(a)));
268EIGEN_STRONG_INLINE half predux_min(
const Packet8h& a) {
269 return static_cast<half
>(predux_min(half2float(a)));
273EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(
const Packet8h& a) {
274 return static_cast<half
>(predux_min<PropagateNumbers>(half2float(a)));
278EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(
const Packet8h& a) {
279 return static_cast<half
>(predux_min<PropagateNaN>(half2float(a)));
283EIGEN_STRONG_INLINE half predux_max(
const Packet8h& a) {
284 return static_cast<half
>(predux_max(half2float(a)));
288EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(
const Packet8h& a) {
289 return static_cast<half
>(predux_max<PropagateNumbers>(half2float(a)));
293EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(
const Packet8h& a) {
294 return static_cast<half
>(predux_max<PropagateNaN>(half2float(a)));
298EIGEN_STRONG_INLINE
bool predux_any(
const Packet8h& a) {
299 return _mm_movemask_epi8(a) != 0;
306EIGEN_STRONG_INLINE bfloat16 predux(
const Packet8bf& a) {
307 return static_cast<bfloat16
>(predux<Packet8f>(Bf16ToF32(a)));
311EIGEN_STRONG_INLINE bfloat16 predux_mul(
const Packet8bf& a) {
312 return static_cast<bfloat16
>(predux_mul<Packet8f>(Bf16ToF32(a)));
316EIGEN_STRONG_INLINE bfloat16 predux_min(
const Packet8bf& a) {
317 return static_cast<bfloat16
>(predux_min(Bf16ToF32(a)));
321EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(
const Packet8bf& a) {
322 return static_cast<bfloat16
>(predux_min<PropagateNumbers>(Bf16ToF32(a)));
326EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(
const Packet8bf& a) {
327 return static_cast<bfloat16
>(predux_min<PropagateNaN>(Bf16ToF32(a)));
331EIGEN_STRONG_INLINE bfloat16 predux_max(
const Packet8bf& a) {
332 return static_cast<bfloat16
>(predux_max<Packet8f>(Bf16ToF32(a)));
336EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(
const Packet8bf& a) {
337 return static_cast<bfloat16
>(predux_max<PropagateNumbers>(Bf16ToF32(a)));
341EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(
const Packet8bf& a) {
342 return static_cast<bfloat16
>(predux_max<PropagateNaN>(Bf16ToF32(a)));
346EIGEN_STRONG_INLINE
bool predux_any(
const Packet8bf& a) {
347 return _mm_movemask_epi8(a) != 0;
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1