Eigen  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
Reductions.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_REDUCTIONS_SSE_H
11#define EIGEN_REDUCTIONS_SSE_H
12
13// IWYU pragma: private
14#include "../../InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19
20template <typename Packet>
21struct sse_add_wrapper {
22 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return padd<Packet>(a, b); }
23};
24
25template <typename Packet>
26struct sse_mul_wrapper {
27 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmul<Packet>(a, b); }
28};
29
30template <typename Packet>
31struct sse_min_wrapper {
32 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmin<Packet>(a, b); }
33};
34
35template <int NaNPropagation, typename Packet>
36struct sse_min_prop_wrapper {
37 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
38 return pmin<NaNPropagation, Packet>(a, b);
39 }
40};
41
42template <typename Packet>
43struct sse_max_wrapper {
44 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmax<Packet>(a, b); }
45};
46
47template <int NaNPropagation, typename Packet>
48struct sse_max_prop_wrapper {
49 static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
50 return pmax<NaNPropagation, Packet>(a, b);
51 }
52};
53
54template <typename Packet, typename Op>
55struct sse_predux_common;
56
57template <typename Packet>
58struct sse_predux_impl : sse_predux_common<Packet, sse_add_wrapper<Packet>> {};
59
60template <typename Packet>
61struct sse_predux_mul_impl : sse_predux_common<Packet, sse_mul_wrapper<Packet>> {};
62
63template <typename Packet>
64struct sse_predux_min_impl : sse_predux_common<Packet, sse_min_wrapper<Packet>> {};
65
66template <int NaNPropagation, typename Packet>
67struct sse_predux_min_prop_impl : sse_predux_common<Packet, sse_min_prop_wrapper<NaNPropagation, Packet>> {};
68
69template <typename Packet>
70struct sse_predux_max_impl : sse_predux_common<Packet, sse_max_wrapper<Packet>> {};
71
72template <int NaNPropagation, typename Packet>
73struct sse_predux_max_prop_impl : sse_predux_common<Packet, sse_max_prop_wrapper<NaNPropagation, Packet>> {};
74
75/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16b -- -- -- -- -- -- -- -- -- -- -- -- */
76
77template <>
78EIGEN_STRONG_INLINE bool predux(const Packet16b& a) {
79 Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a, a));
80 return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
81}
82
83template <>
84EIGEN_STRONG_INLINE bool predux_mul(const Packet16b& a) {
85 Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a, a));
86 return ((pfirst<Packet4i>(tmp) == 0x01010101) && (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
87}
88
89template <>
90EIGEN_STRONG_INLINE bool predux_min(const Packet16b& a) {
91 return predux_mul(a);
92}
93
94template <>
95EIGEN_STRONG_INLINE bool predux_max(const Packet16b& a) {
96 return predux(a);
97}
98
99template <>
100EIGEN_STRONG_INLINE bool predux_any(const Packet16b& a) {
101 return predux(a);
102}
103
104/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4i -- -- -- -- -- -- -- -- -- -- -- -- */
105
106template <typename Op>
107struct sse_predux_common<Packet4i, Op> {
108 static EIGEN_STRONG_INLINE int run(const Packet4i& a) {
109 Packet4i tmp;
110 tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
111 tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
112 return _mm_cvtsi128_si32(tmp);
113 }
114};
115
116template <>
117EIGEN_STRONG_INLINE int predux(const Packet4i& a) {
118 return sse_predux_impl<Packet4i>::run(a);
119}
120
121template <>
122EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) {
123 return sse_predux_mul_impl<Packet4i>::run(a);
124}
125
126#ifdef EIGEN_VECTORIZE_SSE4_1
127template <>
128EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) {
129 return sse_predux_min_impl<Packet4i>::run(a);
130}
131
132template <>
133EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) {
134 return sse_predux_max_impl<Packet4i>::run(a);
135}
136#endif
137
138template <>
139EIGEN_STRONG_INLINE bool predux_any(const Packet4i& a) {
140 return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
141}
142
143/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4ui -- -- -- -- -- -- -- -- -- -- -- -- */
144
145template <typename Op>
146struct sse_predux_common<Packet4ui, Op> {
147 static EIGEN_STRONG_INLINE uint32_t run(const Packet4ui& a) {
148 Packet4ui tmp;
149 tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
150 tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
151 return static_cast<uint32_t>(_mm_cvtsi128_si32(tmp));
152 }
153};
154
155template <>
156EIGEN_STRONG_INLINE uint32_t predux(const Packet4ui& a) {
157 return sse_predux_impl<Packet4ui>::run(a);
158}
159
160template <>
161EIGEN_STRONG_INLINE uint32_t predux_mul(const Packet4ui& a) {
162 return sse_predux_mul_impl<Packet4ui>::run(a);
163}
164
165#ifdef EIGEN_VECTORIZE_SSE4_1
166template <>
167EIGEN_STRONG_INLINE uint32_t predux_min(const Packet4ui& a) {
168 return sse_predux_min_impl<Packet4ui>::run(a);
169}
170
171template <>
172EIGEN_STRONG_INLINE uint32_t predux_max(const Packet4ui& a) {
173 return sse_predux_max_impl<Packet4ui>::run(a);
174}
175#endif
176
177template <>
178EIGEN_STRONG_INLINE bool predux_any(const Packet4ui& a) {
179 return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
180}
181
182/* -- -- -- -- -- -- -- -- -- -- -- -- Packet2l -- -- -- -- -- -- -- -- -- -- -- -- */
183
184template <typename Op>
185struct sse_predux_common<Packet2l, Op> {
186 static EIGEN_STRONG_INLINE int64_t run(const Packet2l& a) {
187 Packet2l tmp;
188 tmp = Op::packetOp(a, _mm_unpackhi_epi64(a, a));
189 return pfirst(tmp);
190 }
191};
192
193template <>
194EIGEN_STRONG_INLINE int64_t predux(const Packet2l& a) {
195 return sse_predux_impl<Packet2l>::run(a);
196}
197
198template <>
199EIGEN_STRONG_INLINE bool predux_any(const Packet2l& a) {
200 return _mm_movemask_pd(_mm_castsi128_pd(a)) != 0x0;
201}
202
203/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4f -- -- -- -- -- -- -- -- -- -- -- -- */
204
205template <typename Op>
206struct sse_predux_common<Packet4f, Op> {
207 static EIGEN_STRONG_INLINE float run(const Packet4f& a) {
208 Packet4f tmp;
209 tmp = Op::packetOp(a, _mm_movehl_ps(a, a));
210#ifdef EIGEN_VECTORIZE_SSE3
211 tmp = Op::packetOp(tmp, _mm_movehdup_ps(tmp));
212#else
213 tmp = Op::packetOp(tmp, _mm_shuffle_ps(tmp, tmp, 1));
214#endif
215 return _mm_cvtss_f32(tmp);
216 }
217};
218
219template <>
220EIGEN_STRONG_INLINE float predux(const Packet4f& a) {
221 return sse_predux_impl<Packet4f>::run(a);
222}
223
224template <>
225EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) {
226 return sse_predux_mul_impl<Packet4f>::run(a);
227}
228
229template <>
230EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) {
231 return sse_predux_min_impl<Packet4f>::run(a);
232}
233
234template <>
235EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet4f& a) {
236 return sse_predux_min_prop_impl<PropagateNumbers, Packet4f>::run(a);
237}
238
239template <>
240EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet4f& a) {
241 return sse_predux_min_prop_impl<PropagateNaN, Packet4f>::run(a);
242}
243
244template <>
245EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) {
246 return sse_predux_max_impl<Packet4f>::run(a);
247}
248
249template <>
250EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet4f& a) {
251 return sse_predux_max_prop_impl<PropagateNumbers, Packet4f>::run(a);
252}
253
254template <>
255EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet4f& a) {
256 return sse_predux_max_prop_impl<PropagateNaN, Packet4f>::run(a);
257}
258
259template <>
260EIGEN_STRONG_INLINE bool predux_any(const Packet4f& a) {
261 return _mm_movemask_ps(a) != 0x0;
262}
263
264/* -- -- -- -- -- -- -- -- -- -- -- -- Packet2d -- -- -- -- -- -- -- -- -- -- -- -- */
265
266template <typename Op>
267struct sse_predux_common<Packet2d, Op> {
268 static EIGEN_STRONG_INLINE double run(const Packet2d& a) {
269 Packet2d tmp;
270 tmp = Op::packetOp(a, _mm_unpackhi_pd(a, a));
271 return _mm_cvtsd_f64(tmp);
272 }
273};
274
275template <>
276EIGEN_STRONG_INLINE double predux(const Packet2d& a) {
277 return sse_predux_impl<Packet2d>::run(a);
278}
279
280template <>
281EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) {
282 return sse_predux_mul_impl<Packet2d>::run(a);
283}
284
285template <>
286EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) {
287 return sse_predux_min_impl<Packet2d>::run(a);
288}
289
290template <>
291EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet2d& a) {
292 return sse_predux_min_prop_impl<PropagateNumbers, Packet2d>::run(a);
293}
294
295template <>
296EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet2d& a) {
297 return sse_predux_min_prop_impl<PropagateNaN, Packet2d>::run(a);
298}
299
300template <>
301EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) {
302 return sse_predux_max_impl<Packet2d>::run(a);
303}
304
305template <>
306EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet2d& a) {
307 return sse_predux_max_prop_impl<PropagateNumbers, Packet2d>::run(a);
308}
309
310template <>
311EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet2d& a) {
312 return sse_predux_max_prop_impl<PropagateNaN, Packet2d>::run(a);
313}
314
315template <>
316EIGEN_STRONG_INLINE bool predux_any(const Packet2d& a) {
317 return _mm_movemask_pd(a) != 0x0;
318}
319
320} // end namespace internal
321
322} // end namespace Eigen
323
324#endif // EIGEN_REDUCTIONS_SSE_H
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1