10#ifndef EIGEN_REDUCTIONS_SSE_H
11#define EIGEN_REDUCTIONS_SSE_H
14#include "../../InternalHeaderCheck.h"
20template <
typename Packet>
21struct sse_add_wrapper {
22 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
return padd<Packet>(a, b); }
25template <
typename Packet>
26struct sse_mul_wrapper {
27 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
return pmul<Packet>(a, b); }
30template <
typename Packet>
31struct sse_min_wrapper {
32 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
return pmin<Packet>(a, b); }
35template <
int NaNPropagation,
typename Packet>
36struct sse_min_prop_wrapper {
37 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
38 return pmin<NaNPropagation, Packet>(a, b);
42template <
typename Packet>
43struct sse_max_wrapper {
44 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
return pmax<Packet>(a, b); }
47template <
int NaNPropagation,
typename Packet>
48struct sse_max_prop_wrapper {
49 static EIGEN_STRONG_INLINE Packet packetOp(
const Packet& a,
const Packet& b) {
50 return pmax<NaNPropagation, Packet>(a, b);
54template <
typename Packet,
typename Op>
55struct sse_predux_common;
57template <
typename Packet>
58struct sse_predux_impl : sse_predux_common<Packet, sse_add_wrapper<Packet>> {};
60template <
typename Packet>
61struct sse_predux_mul_impl : sse_predux_common<Packet, sse_mul_wrapper<Packet>> {};
63template <
typename Packet>
64struct sse_predux_min_impl : sse_predux_common<Packet, sse_min_wrapper<Packet>> {};
66template <
int NaNPropagation,
typename Packet>
67struct sse_predux_min_prop_impl : sse_predux_common<Packet, sse_min_prop_wrapper<NaNPropagation, Packet>> {};
69template <
typename Packet>
70struct sse_predux_max_impl : sse_predux_common<Packet, sse_max_wrapper<Packet>> {};
72template <
int NaNPropagation,
typename Packet>
73struct sse_predux_max_prop_impl : sse_predux_common<Packet, sse_max_prop_wrapper<NaNPropagation, Packet>> {};
78EIGEN_STRONG_INLINE
bool predux(
const Packet16b& a) {
79 Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a, a));
80 return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
84EIGEN_STRONG_INLINE
bool predux_mul(
const Packet16b& a) {
85 Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a, a));
86 return ((pfirst<Packet4i>(tmp) == 0x01010101) && (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
90EIGEN_STRONG_INLINE
bool predux_min(
const Packet16b& a) {
95EIGEN_STRONG_INLINE
bool predux_max(
const Packet16b& a) {
100EIGEN_STRONG_INLINE
bool predux_any(
const Packet16b& a) {
106template <
typename Op>
107struct sse_predux_common<Packet4i, Op> {
108 static EIGEN_STRONG_INLINE
int run(
const Packet4i& a) {
110 tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
111 tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
112 return _mm_cvtsi128_si32(tmp);
117EIGEN_STRONG_INLINE
int predux(
const Packet4i& a) {
118 return sse_predux_impl<Packet4i>::run(a);
122EIGEN_STRONG_INLINE
int predux_mul(
const Packet4i& a) {
123 return sse_predux_mul_impl<Packet4i>::run(a);
126#ifdef EIGEN_VECTORIZE_SSE4_1
128EIGEN_STRONG_INLINE
int predux_min(
const Packet4i& a) {
129 return sse_predux_min_impl<Packet4i>::run(a);
133EIGEN_STRONG_INLINE
int predux_max(
const Packet4i& a) {
134 return sse_predux_max_impl<Packet4i>::run(a);
139EIGEN_STRONG_INLINE
bool predux_any(
const Packet4i& a) {
140 return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
145template <
typename Op>
146struct sse_predux_common<Packet4ui, Op> {
147 static EIGEN_STRONG_INLINE uint32_t run(
const Packet4ui& a) {
149 tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
150 tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
151 return static_cast<uint32_t
>(_mm_cvtsi128_si32(tmp));
156EIGEN_STRONG_INLINE uint32_t predux(
const Packet4ui& a) {
157 return sse_predux_impl<Packet4ui>::run(a);
161EIGEN_STRONG_INLINE uint32_t predux_mul(
const Packet4ui& a) {
162 return sse_predux_mul_impl<Packet4ui>::run(a);
165#ifdef EIGEN_VECTORIZE_SSE4_1
167EIGEN_STRONG_INLINE uint32_t predux_min(
const Packet4ui& a) {
168 return sse_predux_min_impl<Packet4ui>::run(a);
172EIGEN_STRONG_INLINE uint32_t predux_max(
const Packet4ui& a) {
173 return sse_predux_max_impl<Packet4ui>::run(a);
178EIGEN_STRONG_INLINE
bool predux_any(
const Packet4ui& a) {
179 return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
184template <
typename Op>
185struct sse_predux_common<Packet2l, Op> {
186 static EIGEN_STRONG_INLINE int64_t run(
const Packet2l& a) {
188 tmp = Op::packetOp(a, _mm_unpackhi_epi64(a, a));
194EIGEN_STRONG_INLINE int64_t predux(
const Packet2l& a) {
195 return sse_predux_impl<Packet2l>::run(a);
199EIGEN_STRONG_INLINE
bool predux_any(
const Packet2l& a) {
200 return _mm_movemask_pd(_mm_castsi128_pd(a)) != 0x0;
205template <
typename Op>
206struct sse_predux_common<Packet4f, Op> {
207 static EIGEN_STRONG_INLINE
float run(
const Packet4f& a) {
209 tmp = Op::packetOp(a, _mm_movehl_ps(a, a));
210#ifdef EIGEN_VECTORIZE_SSE3
211 tmp = Op::packetOp(tmp, _mm_movehdup_ps(tmp));
213 tmp = Op::packetOp(tmp, _mm_shuffle_ps(tmp, tmp, 1));
215 return _mm_cvtss_f32(tmp);
220EIGEN_STRONG_INLINE
float predux(
const Packet4f& a) {
221 return sse_predux_impl<Packet4f>::run(a);
225EIGEN_STRONG_INLINE
float predux_mul(
const Packet4f& a) {
226 return sse_predux_mul_impl<Packet4f>::run(a);
230EIGEN_STRONG_INLINE
float predux_min(
const Packet4f& a) {
231 return sse_predux_min_impl<Packet4f>::run(a);
235EIGEN_STRONG_INLINE
float predux_min<PropagateNumbers>(
const Packet4f& a) {
236 return sse_predux_min_prop_impl<PropagateNumbers, Packet4f>::run(a);
240EIGEN_STRONG_INLINE
float predux_min<PropagateNaN>(
const Packet4f& a) {
241 return sse_predux_min_prop_impl<PropagateNaN, Packet4f>::run(a);
245EIGEN_STRONG_INLINE
float predux_max(
const Packet4f& a) {
246 return sse_predux_max_impl<Packet4f>::run(a);
250EIGEN_STRONG_INLINE
float predux_max<PropagateNumbers>(
const Packet4f& a) {
251 return sse_predux_max_prop_impl<PropagateNumbers, Packet4f>::run(a);
255EIGEN_STRONG_INLINE
float predux_max<PropagateNaN>(
const Packet4f& a) {
256 return sse_predux_max_prop_impl<PropagateNaN, Packet4f>::run(a);
260EIGEN_STRONG_INLINE
bool predux_any(
const Packet4f& a) {
261 return _mm_movemask_ps(a) != 0x0;
266template <
typename Op>
267struct sse_predux_common<Packet2d, Op> {
268 static EIGEN_STRONG_INLINE
double run(
const Packet2d& a) {
270 tmp = Op::packetOp(a, _mm_unpackhi_pd(a, a));
271 return _mm_cvtsd_f64(tmp);
276EIGEN_STRONG_INLINE
double predux(
const Packet2d& a) {
277 return sse_predux_impl<Packet2d>::run(a);
281EIGEN_STRONG_INLINE
double predux_mul(
const Packet2d& a) {
282 return sse_predux_mul_impl<Packet2d>::run(a);
286EIGEN_STRONG_INLINE
double predux_min(
const Packet2d& a) {
287 return sse_predux_min_impl<Packet2d>::run(a);
291EIGEN_STRONG_INLINE
double predux_min<PropagateNumbers>(
const Packet2d& a) {
292 return sse_predux_min_prop_impl<PropagateNumbers, Packet2d>::run(a);
296EIGEN_STRONG_INLINE
double predux_min<PropagateNaN>(
const Packet2d& a) {
297 return sse_predux_min_prop_impl<PropagateNaN, Packet2d>::run(a);
301EIGEN_STRONG_INLINE
double predux_max(
const Packet2d& a) {
302 return sse_predux_max_impl<Packet2d>::run(a);
306EIGEN_STRONG_INLINE
double predux_max<PropagateNumbers>(
const Packet2d& a) {
307 return sse_predux_max_prop_impl<PropagateNumbers, Packet2d>::run(a);
311EIGEN_STRONG_INLINE
double predux_max<PropagateNaN>(
const Packet2d& a) {
312 return sse_predux_max_prop_impl<PropagateNaN, Packet2d>::run(a);
316EIGEN_STRONG_INLINE
bool predux_any(
const Packet2d& a) {
317 return _mm_movemask_pd(a) != 0x0;
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1