Eigen  5.0.1-dev+60122df6
 
Loading...
Searching...
No Matches
ConfigureVectorization.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5// Copyright (C) 2020, Arm Limited and Contributors
6//
7// This Source Code Form is subject to the terms of the Mozilla
8// Public License v. 2.0. If a copy of the MPL was not distributed
9// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12#define EIGEN_CONFIGURE_VECTORIZATION_H
13
14//------------------------------------------------------------------------------------------
15// Static and dynamic alignment control
16//
17// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
18// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
19// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
20// a default value is automatically computed based on architecture, compiler, and OS.
21//
22// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
23// to be used to declare statically aligned buffers.
24//------------------------------------------------------------------------------------------
25
26/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
27 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
28 * so that vectorization doesn't affect binary compatibility.
29 *
30 * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
31 * vectorized and non-vectorized code.
32 */
33#if (defined EIGEN_CUDACC)
34#define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
35#define EIGEN_ALIGNOF(x) __alignof(x)
36#else
37#define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
38#define EIGEN_ALIGNOF(x) alignof(x)
39#endif
40
41// Align to the boundary that avoids false sharing.
42// https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size
43// There is a bug in android NDK < r26 where the macro is defined but std::hardware_destructive_interference_size
44// still does not exist.
45#if defined(__cpp_lib_hardware_interference_size) && __cpp_lib_hardware_interference_size >= 201603 && \
46 (!EIGEN_OS_ANDROID || __NDK_MAJOR__ + 0 >= 26)
47#include <new>
48#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)
49#else
50// Overalign for the cache line size of 128 bytes (Apple M1)
51#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)
52#endif
53
54// If the user explicitly disable vectorization, then we also disable alignment
55#if defined(EIGEN_DONT_VECTORIZE)
56#if defined(EIGEN_GPUCC)
57// GPU code is always vectorized and requires memory alignment for
58// statically allocated buffers.
59#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
60#else
61#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
62#endif
63#elif defined(__AVX512F__)
64// 64 bytes static alignment is preferred only if really required
65#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
66#elif defined(__AVX__)
67// 32 bytes static alignment is preferred only if really required
68#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
69#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
70#define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
71#else
72#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
73#endif
74
75// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
76#define EIGEN_MIN_ALIGN_BYTES 16
77
78// Defined the boundary (in bytes) on which the data needs to be aligned. Note
79// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
80// aligned at all regardless of the value of this #define.
81
82#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && \
83 EIGEN_MAX_STATIC_ALIGN_BYTES > 0
84#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
85#endif
86
87// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
88// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
89#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
90#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
91#undef EIGEN_MAX_STATIC_ALIGN_BYTES
92#endif
93#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
94#endif
95
96#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
97
98// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
99
100// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
101// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
102// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
103// certain common platform (compiler+architecture combinations) to avoid these problems.
104// Only static alignment is really problematic (relies on nonstandard compiler extensions),
105// try to keep heap alignment even when we have to disable static alignment.
106#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
107 EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
108#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
109#else
110#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
111#endif
112
113// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
114#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_COMP_SUNCC && !EIGEN_OS_QNX
115#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
116#else
117#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
118#endif
119
120#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
121#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
122#else
123#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
124#endif
125
126#endif
127
128// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
129#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES < EIGEN_MAX_STATIC_ALIGN_BYTES
130#undef EIGEN_MAX_STATIC_ALIGN_BYTES
131#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
132#endif
133
134#if EIGEN_MAX_STATIC_ALIGN_BYTES == 0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
135#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
136#endif
137
138// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
139// It takes into account both the user choice to explicitly enable/disable alignment (by setting
140// EIGEN_MAX_STATIC_ALIGN_BYTES) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only
141// EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
142
143// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
144#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
145#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
146#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
147#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
148#if EIGEN_MAX_STATIC_ALIGN_BYTES > 0
149#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
150#else
151#define EIGEN_ALIGN_MAX
152#endif
153
154// Dynamic alignment control
155
156#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES > 0
157#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
158#endif
159
160#ifdef EIGEN_DONT_ALIGN
161#ifdef EIGEN_MAX_ALIGN_BYTES
162#undef EIGEN_MAX_ALIGN_BYTES
163#endif
164#define EIGEN_MAX_ALIGN_BYTES 0
165#elif !defined(EIGEN_MAX_ALIGN_BYTES)
166#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
167#endif
168
169#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
170#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
171#else
172#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
173#endif
174
175#ifndef EIGEN_UNALIGNED_VECTORIZE
176#define EIGEN_UNALIGNED_VECTORIZE 1
177#endif
178
179//----------------------------------------------------------------------
180
181// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
182// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
183#if EIGEN_MAX_ALIGN_BYTES == 0
184#ifndef EIGEN_DONT_VECTORIZE
185#define EIGEN_DONT_VECTORIZE
186#endif
187#endif
188
189// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
190// removed as gcc 4.1 and msvc 2008 are not supported anyways.
191#if EIGEN_COMP_MSVC
192#include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
193// a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
194#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
195#define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
196#endif
197#else
198#if defined(__SSE2__)
199#define EIGEN_SSE2_ON_NON_MSVC
200#endif
201#endif
202
203#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
204
205#if defined(EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
206
207// Defines symbols for compile-time detection of which instructions are
208// used.
209// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
210#define EIGEN_VECTORIZE
211#define EIGEN_VECTORIZE_SSE
212#define EIGEN_VECTORIZE_SSE2
213
214// Detect sse3/ssse3/sse4:
215// gcc and icc defines __SSE3__, ...
216// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
217// want to force the use of those instructions with msvc.
218#ifdef __SSE3__
219#define EIGEN_VECTORIZE_SSE3
220#endif
221#ifdef __SSSE3__
222#define EIGEN_VECTORIZE_SSSE3
223#endif
224#ifdef __SSE4_1__
225#define EIGEN_VECTORIZE_SSE4_1
226#endif
227#ifdef __SSE4_2__
228#define EIGEN_VECTORIZE_SSE4_2
229#endif
230#ifdef __AVX__
231#if !defined(EIGEN_USE_SYCL) && !EIGEN_COMP_EMSCRIPTEN
232#define EIGEN_VECTORIZE_AVX
233#endif
234#define EIGEN_VECTORIZE_SSE3
235#define EIGEN_VECTORIZE_SSSE3
236#define EIGEN_VECTORIZE_SSE4_1
237#define EIGEN_VECTORIZE_SSE4_2
238#endif
239#ifdef __AVX2__
240#ifndef EIGEN_USE_SYCL
241#define EIGEN_VECTORIZE_AVX2
242#define EIGEN_VECTORIZE_AVX
243#endif
244#define EIGEN_VECTORIZE_SSE3
245#define EIGEN_VECTORIZE_SSSE3
246#define EIGEN_VECTORIZE_SSE4_1
247#define EIGEN_VECTORIZE_SSE4_2
248#endif
249#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
250// MSVC does not expose a switch dedicated for FMA
251// For MSVC, AVX2 => FMA
252#define EIGEN_VECTORIZE_FMA
253#endif
254#if defined(__AVX512F__)
255#ifndef EIGEN_VECTORIZE_FMA
256#if EIGEN_COMP_GNUC
257#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
258#else
259#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
260#endif
261#endif
262#ifndef EIGEN_USE_SYCL
263#define EIGEN_VECTORIZE_AVX512
264#define EIGEN_VECTORIZE_AVX2
265#define EIGEN_VECTORIZE_AVX
266#endif
267#define EIGEN_VECTORIZE_FMA
268#define EIGEN_VECTORIZE_SSE3
269#define EIGEN_VECTORIZE_SSSE3
270#define EIGEN_VECTORIZE_SSE4_1
271#define EIGEN_VECTORIZE_SSE4_2
272#ifndef EIGEN_USE_SYCL
273#ifdef __AVX512DQ__
274#define EIGEN_VECTORIZE_AVX512DQ
275#endif
276#ifdef __AVX512ER__
277#define EIGEN_VECTORIZE_AVX512ER
278#endif
279#ifdef __AVX512BF16__
280#define EIGEN_VECTORIZE_AVX512BF16
281#endif
282#ifdef __AVX512VL__
283#define EIGEN_VECTORIZE_AVX512VL
284#endif
285#ifdef __AVX512FP16__
286#ifdef __AVX512VL__
287#define EIGEN_VECTORIZE_AVX512FP16
288// Built-in _Float16.
289#define EIGEN_HAS_BUILTIN_FLOAT16 1
290#else
291#if EIGEN_COMP_GNUC
292#error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
293#else
294#error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
295#endif
296#endif
297#endif
298#endif
299#endif
300
301// Disable AVX support on broken xcode versions
302#if (EIGEN_COMP_CLANGAPPLE == 11000033) && (__MAC_OS_X_VERSION_MIN_REQUIRED == 101500)
303// A nasty bug in the clang compiler shipped with xcode in a common compilation situation
304// when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
305#ifdef EIGEN_VECTORIZE_AVX
306#undef EIGEN_VECTORIZE_AVX
307#warning \
308 "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
309#ifdef EIGEN_VECTORIZE_AVX2
310#undef EIGEN_VECTORIZE_AVX2
311#endif
312#ifdef EIGEN_VECTORIZE_FMA
313#undef EIGEN_VECTORIZE_FMA
314#endif
315#ifdef EIGEN_VECTORIZE_AVX512
316#undef EIGEN_VECTORIZE_AVX512
317#endif
318#ifdef EIGEN_VECTORIZE_AVX512DQ
319#undef EIGEN_VECTORIZE_AVX512DQ
320#endif
321#ifdef EIGEN_VECTORIZE_AVX512ER
322#undef EIGEN_VECTORIZE_AVX512ER
323#endif
324#endif
325// NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
326// NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2
327// produce core dumps in 3 tests NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all
328// cases NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping
329// tests
330// with -macosx-version-min=10.15 and AVX
331// NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
332// -macosx-version-min=10.15 and AVX
333#endif
334
335// include files
336
337// This extern "C" works around a MINGW-w64 compilation issue
338// https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
339// In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
340// However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
341// with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
342// so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
343// notice that since these are C headers, the extern "C" is theoretically needed anyways.
344extern "C" {
345// In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
346// Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
347#if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
348#include <immintrin.h>
349#else
350#include <mmintrin.h>
351#include <emmintrin.h>
352#include <xmmintrin.h>
353#ifdef EIGEN_VECTORIZE_SSE3
354#include <pmmintrin.h>
355#endif
356#ifdef EIGEN_VECTORIZE_SSSE3
357#include <tmmintrin.h>
358#endif
359#ifdef EIGEN_VECTORIZE_SSE4_1
360#include <smmintrin.h>
361#endif
362#ifdef EIGEN_VECTORIZE_SSE4_2
363#include <nmmintrin.h>
364#endif
365#if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
366#include <immintrin.h>
367#endif
368#endif
369} // end extern "C"
370
371#elif defined(__VSX__) && !defined(__APPLE__)
372
373#define EIGEN_VECTORIZE
374#define EIGEN_VECTORIZE_VSX 1
375#define EIGEN_VECTORIZE_FMA
376#include <altivec.h>
377// We need to #undef all these ugly tokens defined in <altivec.h>
378// => use __vector instead of vector
379#undef bool
380#undef vector
381#undef pixel
382
383#elif defined __ALTIVEC__
384
385#define EIGEN_VECTORIZE
386#define EIGEN_VECTORIZE_ALTIVEC
387#define EIGEN_VECTORIZE_FMA
388#include <altivec.h>
389// We need to #undef all these ugly tokens defined in <altivec.h>
390// => use __vector instead of vector
391#undef bool
392#undef vector
393#undef pixel
394
395#elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
396
397#define EIGEN_VECTORIZE
398#define EIGEN_VECTORIZE_NEON
399#include <arm_neon.h>
400
401// We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
402// will not select the backend automatically
403#elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
404
405#define EIGEN_VECTORIZE
406#define EIGEN_VECTORIZE_SVE
407#include <arm_sve.h>
408
409// Since we depend on knowing SVE vector lengths at compile-time, we need
410// to ensure a fixed lengths is set
411#if defined __ARM_FEATURE_SVE_BITS
412#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
413#else
414#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
415#endif
416
417#elif (defined __s390x__ && defined __VEC__)
418
419#define EIGEN_VECTORIZE
420#define EIGEN_VECTORIZE_ZVECTOR
421#include <vecintrin.h>
422
423#elif defined __mips_msa
424
425// Limit MSA optimizations to little-endian CPUs for now.
426// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
427#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
428#if defined(__LP64__)
429#define EIGEN_MIPS_64
430#else
431#define EIGEN_MIPS_32
432#endif
433#define EIGEN_VECTORIZE
434#define EIGEN_VECTORIZE_MSA
435#include <msa.h>
436#endif
437
438#elif (defined __loongarch64 && defined __loongarch_sx)
439
440#define EIGEN_VECTORIZE
441#define EIGEN_VECTORIZE_LSX
442#include <lsxintrin.h>
443
444#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
445
446#define EIGEN_VECTORIZE
447#define EIGEN_VECTORIZE_HVX
448#include <hexagon_types.h>
449
450#endif
451#endif
452
453// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
454// compilers seem to follow this. We therefore include it explicitly.
455// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
456#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
457#include <arm_fp16.h>
458#endif
459
460// Enable FMA for ARM.
461#if defined(__ARM_FEATURE_FMA)
462#define EIGEN_VECTORIZE_FMA
463#endif
464
465#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
466// We can use the optimized fp16 to float and float to fp16 conversion routines
467#define EIGEN_HAS_FP16_C
468
469#if EIGEN_COMP_GNUC
470// Make sure immintrin.h is included, even if e.g. vectorization is
471// explicitly disabled (see also issue #2395).
472// Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
473// as opposed to emmintrin.h as suggested by Intel:
474// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
475#include <immintrin.h>
476#endif
477#endif
478
479#if defined EIGEN_CUDACC
480#define EIGEN_VECTORIZE_GPU
481#include <vector_types.h>
482#if EIGEN_CUDA_SDK_VER >= 70500
483#define EIGEN_HAS_CUDA_FP16
484#endif
485#endif
486
487#if defined(EIGEN_HAS_CUDA_FP16)
488#include <cuda_runtime_api.h>
489#include <cuda_fp16.h>
490#endif
491
492#if defined(EIGEN_HIPCC)
493#define EIGEN_VECTORIZE_GPU
494#include <hip/hip_vector_types.h>
495#define EIGEN_HAS_HIP_FP16
496#include <hip/hip_fp16.h>
497#define EIGEN_HAS_HIP_BF16
498#include <hip/hip_bfloat16.h>
499#endif
500
502// IWYU pragma: private
503#include "../InternalHeaderCheck.h"
504
505namespace Eigen {
506
507inline static const char *SimdInstructionSetsInUse(void) {
508#if defined(EIGEN_VECTORIZE_AVX512)
509 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
510#elif defined(EIGEN_VECTORIZE_AVX)
511 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
512#elif defined(EIGEN_VECTORIZE_SSE4_2)
513 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
514#elif defined(EIGEN_VECTORIZE_SSE4_1)
515 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
516#elif defined(EIGEN_VECTORIZE_SSSE3)
517 return "SSE, SSE2, SSE3, SSSE3";
518#elif defined(EIGEN_VECTORIZE_SSE3)
519 return "SSE, SSE2, SSE3";
520#elif defined(EIGEN_VECTORIZE_SSE2)
521 return "SSE, SSE2";
522#elif defined(EIGEN_VECTORIZE_ALTIVEC)
523 return "AltiVec";
524#elif defined(EIGEN_VECTORIZE_VSX)
525 return "VSX";
526#elif defined(EIGEN_VECTORIZE_NEON)
527 return "ARM NEON";
528#elif defined(EIGEN_VECTORIZE_SVE)
529 return "ARM SVE";
530#elif defined(EIGEN_VECTORIZE_ZVECTOR)
531 return "S390X ZVECTOR";
532#elif defined(EIGEN_VECTORIZE_MSA)
533 return "MIPS MSA";
534#elif defined(EIGEN_VECTORIZE_LSX)
535 return "LOONGARCH64 LSX";
536#else
537 return "None";
538#endif
539}
540
541} // end namespace Eigen
542
543#endif // EIGEN_CONFIGURE_VECTORIZATION_H
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1