Eigen
5.0.1-dev+60122df6
Loading...
Searching...
No Matches
Macros.h
1
// This file is part of Eigen, a lightweight C++ template library
2
// for linear algebra.
3
//
4
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6
//
7
// This Source Code Form is subject to the terms of the Mozilla
8
// Public License v. 2.0. If a copy of the MPL was not distributed
9
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10
11
#ifndef EIGEN_MACROS_H
12
#define EIGEN_MACROS_H
13
// IWYU pragma: private
14
#include "../InternalHeaderCheck.h"
15
16
//------------------------------------------------------------------------------------------
17
// Eigen version and basic defaults
18
//------------------------------------------------------------------------------------------
19
20
#define EIGEN_VERSION_AT_LEAST(x, y, z) \
21
(EIGEN_MAJOR_VERSION > x || \
22
(EIGEN_MAJOR_VERSION >= x && (EIGEN_MINOR_VERSION > y || (EIGEN_MINOR_VERSION >= y && EIGEN_PATCH_VERSION >= z))))
23
24
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
25
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
26
#else
27
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
28
#endif
29
30
#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
31
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
32
#endif
33
34
// Upperbound on the C++ version to use.
35
// Expected values are 03, 11, 14, 17, etc.
36
// By default, let's use an arbitrarily large C++ version.
37
#ifndef EIGEN_MAX_CPP_VER
38
#define EIGEN_MAX_CPP_VER 99
39
#endif
40
46
#ifndef EIGEN_FAST_MATH
47
#define EIGEN_FAST_MATH 1
48
#endif
49
50
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
51
// 131072 == 128 KB
52
#define EIGEN_STACK_ALLOCATION_LIMIT 131072
53
#endif
54
55
/* Specify whether to use std::fma for scalar multiply-add instructions.
56
*
57
* On machines that have FMA as a single instruction, this will generally
58
* improve precision without significant performance implications.
59
*
60
* Without a single instruction, performance has been found to be reduced 2-3x
61
* on Intel CPUs, and up to 30x for WASM.
62
*
63
* If unspecified, defaults to using FMA if hardware support is available.
64
* The default should be used in most cases to ensure consistency between
65
* vectorized and non-vectorized paths.
66
*/
67
#ifndef EIGEN_SCALAR_MADD_USE_FMA
68
#ifdef EIGEN_VECTORIZE_FMA
69
#define EIGEN_SCALAR_MADD_USE_FMA 1
70
#else
71
#define EIGEN_SCALAR_MADD_USE_FMA 0
72
#endif
73
#endif
74
75
//------------------------------------------------------------------------------------------
76
// Compiler identification, EIGEN_COMP_*
77
//------------------------------------------------------------------------------------------
78
80
#ifdef __GNUC__
81
#define EIGEN_COMP_GNUC (__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__)
82
#else
83
#define EIGEN_COMP_GNUC 0
84
#endif
85
87
#if defined(__clang__)
88
#define EIGEN_COMP_CLANG (__clang_major__ * 100 + __clang_minor__ * 10 + __clang_patchlevel__)
89
#else
90
#define EIGEN_COMP_CLANG 0
91
#endif
92
95
#if defined(__clang__) && defined(__apple_build_version__)
96
#define EIGEN_COMP_CLANGAPPLE __apple_build_version__
97
#else
98
#define EIGEN_COMP_CLANGAPPLE 0
99
#endif
100
102
#if defined(__castxml__)
103
#define EIGEN_COMP_CASTXML 1
104
#else
105
#define EIGEN_COMP_CASTXML 0
106
#endif
107
109
#if defined(__llvm__)
110
#define EIGEN_COMP_LLVM 1
111
#else
112
#define EIGEN_COMP_LLVM 0
113
#endif
114
116
#if defined(__INTEL_COMPILER)
117
#define EIGEN_COMP_ICC __INTEL_COMPILER
118
#else
119
#define EIGEN_COMP_ICC 0
120
#endif
121
123
#if defined(__INTEL_CLANG_COMPILER)
124
#define EIGEN_COMP_CLANGICC __INTEL_CLANG_COMPILER
125
#else
126
#define EIGEN_COMP_CLANGICC 0
127
#endif
128
130
#if defined(__MINGW32__)
131
#define EIGEN_COMP_MINGW 1
132
#else
133
#define EIGEN_COMP_MINGW 0
134
#endif
135
137
#if defined(__SUNPRO_CC)
138
#define EIGEN_COMP_SUNCC 1
139
#else
140
#define EIGEN_COMP_SUNCC 0
141
#endif
142
144
#if defined(_MSC_VER)
145
#define EIGEN_COMP_MSVC _MSC_VER
146
#else
147
#define EIGEN_COMP_MSVC 0
148
#endif
149
150
#if defined(__NVCC__)
151
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
152
#define EIGEN_COMP_NVCC ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
153
#elif defined(__CUDACC_VER__)
154
#define EIGEN_COMP_NVCC __CUDACC_VER__
155
#else
156
#error "NVCC did not define compiler version."
157
#endif
158
#else
159
#define EIGEN_COMP_NVCC 0
160
#endif
161
162
// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
163
// name ver MSC_VER
164
// 2015 14 1900
165
// "15" 15 1900
166
// 2017-14.1 15.0 1910
167
// 2017-14.11 15.3 1911
168
// 2017-14.12 15.5 1912
169
// 2017-14.13 15.6 1913
170
// 2017-14.14 15.7 1914
171
// 2017 15.8 1915
172
// 2017 15.9 1916
173
// 2019 RTW 16.0 1920
174
176
#if defined(_MSVC_LANG)
177
#define EIGEN_COMP_MSVC_LANG _MSVC_LANG
178
#else
179
#define EIGEN_COMP_MSVC_LANG 0
180
#endif
181
182
// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC_LANG:
183
// MSVC option Standard MSVC_LANG
184
// /std:c++14 (default as of VS 2019) C++14 201402L
185
// /std:c++17 C++17 201703L
186
// /std:c++latest >C++17 >201703L
187
190
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
191
#define EIGEN_COMP_MSVC_STRICT _MSC_VER
192
#else
193
#define EIGEN_COMP_MSVC_STRICT 0
194
#endif
195
197
// XLC version
198
// 3.1 0x0301
199
// 4.5 0x0405
200
// 5.0 0x0500
201
// 12.1 0x0C01
202
#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__)
203
#define EIGEN_COMP_IBM __xlC__
204
#else
205
#define EIGEN_COMP_IBM 0
206
#endif
207
209
#if defined(__PGI)
210
#define EIGEN_COMP_PGI (__PGIC__ * 100 + __PGIC_MINOR__)
211
#else
212
#define EIGEN_COMP_PGI 0
213
#endif
214
216
#if defined(__NVCOMPILER)
217
#define EIGEN_COMP_NVHPC (__NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__)
218
#else
219
#define EIGEN_COMP_NVHPC 0
220
#endif
221
223
#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
224
#define EIGEN_COMP_ARM 1
225
#else
226
#define EIGEN_COMP_ARM 0
227
#endif
228
230
#if defined(__EMSCRIPTEN__)
231
#define EIGEN_COMP_EMSCRIPTEN 1
232
#else
233
#define EIGEN_COMP_EMSCRIPTEN 0
234
#endif
235
239
#if defined(__FUJITSU)
240
#define EIGEN_COMP_FCC (__FCC_major__ * 100 + __FCC_minor__ * 10 + __FCC_patchlevel__)
241
#else
242
#define EIGEN_COMP_FCC 0
243
#endif
244
248
#if defined(__CLANG_FUJITSU)
249
#define EIGEN_COMP_CLANGFCC (__FCC_major__ * 100 + __FCC_minor__ * 10 + __FCC_patchlevel__)
250
#else
251
#define EIGEN_COMP_CLANGFCC 0
252
#endif
253
257
#if defined(_CRAYC) && !defined(__clang__)
258
#define EIGEN_COMP_CPE (_RELEASE_MAJOR * 100 + _RELEASE_MINOR * 10 + _RELEASE_PATCHLEVEL)
259
#else
260
#define EIGEN_COMP_CPE 0
261
#endif
262
266
#if defined(_CRAYC) && defined(__clang__)
267
#define EIGEN_COMP_CLANGCPE (_RELEASE_MAJOR * 100 + _RELEASE_MINOR * 10 + _RELEASE_PATCHLEVEL)
268
#else
269
#define EIGEN_COMP_CLANGCPE 0
270
#endif
271
273
#if defined(__LCC__) && defined(__MCST__)
274
#define EIGEN_COMP_LCC (__LCC__ * 100 + __LCC_MINOR__)
275
#else
276
#define EIGEN_COMP_LCC 0
277
#endif
278
281
#if EIGEN_COMP_GNUC && \
282
!(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_CLANGICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || \
283
EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN || EIGEN_COMP_FCC || EIGEN_COMP_CLANGFCC || \
284
EIGEN_COMP_CPE || EIGEN_COMP_CLANGCPE || EIGEN_COMP_LCC)
285
#define EIGEN_COMP_GNUC_STRICT 1
286
#else
287
#define EIGEN_COMP_GNUC_STRICT 0
288
#endif
289
290
// GCC, and compilers that pretend to be it, have different version schemes, so this only makes sense to use with the
291
// real GCC.
292
#if EIGEN_COMP_GNUC_STRICT
293
#define EIGEN_GNUC_STRICT_AT_LEAST(x, y, z) \
294
((__GNUC__ > x) || (__GNUC__ == x && __GNUC_MINOR__ > y) || \
295
(__GNUC__ == x && __GNUC_MINOR__ == y && __GNUC_PATCHLEVEL__ >= z))
296
#define EIGEN_GNUC_STRICT_LESS_THAN(x, y, z) \
297
((__GNUC__ < x) || (__GNUC__ == x && __GNUC_MINOR__ < y) || \
298
(__GNUC__ == x && __GNUC_MINOR__ == y && __GNUC_PATCHLEVEL__ < z))
299
#else
300
#define EIGEN_GNUC_STRICT_AT_LEAST(x, y, z) 0
301
#define EIGEN_GNUC_STRICT_LESS_THAN(x, y, z) 0
302
#endif
303
306
#if EIGEN_COMP_CLANG && !(EIGEN_COMP_CLANGAPPLE || EIGEN_COMP_CLANGICC || EIGEN_COMP_CLANGFCC || EIGEN_COMP_CLANGCPE)
307
#define EIGEN_COMP_CLANG_STRICT 1
308
#else
309
#define EIGEN_COMP_CLANG_STRICT 0
310
#endif
311
312
// Clang, and compilers forked from it, have different version schemes, so this only makes sense to use with the real
313
// Clang.
314
#if EIGEN_COMP_CLANG_STRICT
315
#define EIGEN_CLANG_STRICT_AT_LEAST(x, y, z) \
316
((__clang_major__ > x) || (__clang_major__ == x && __clang_minor__ > y) || \
317
(__clang_major__ == x && __clang_minor__ == y && __clang_patchlevel__ >= z))
318
#define EIGEN_CLANG_STRICT_LESS_THAN(x, y, z) \
319
((__clang_major__ < x) || (__clang_major__ == x && __clang_minor__ < y) || \
320
(__clang_major__ == x && __clang_minor__ == y && __clang_patchlevel__ < z))
321
#else
322
#define EIGEN_CLANG_STRICT_AT_LEAST(x, y, z) 0
323
#define EIGEN_CLANG_STRICT_LESS_THAN(x, y, z) 0
324
#endif
325
326
//------------------------------------------------------------------------------------------
327
// Architecture identification, EIGEN_ARCH_*
328
//------------------------------------------------------------------------------------------
329
330
#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__amd64)
331
#define EIGEN_ARCH_x86_64 1
332
#else
333
#define EIGEN_ARCH_x86_64 0
334
#endif
335
336
#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
337
#define EIGEN_ARCH_i386 1
338
#else
339
#define EIGEN_ARCH_i386 0
340
#endif
341
342
#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
343
#define EIGEN_ARCH_i386_OR_x86_64 1
344
#else
345
#define EIGEN_ARCH_i386_OR_x86_64 0
346
#endif
347
349
#if defined(__arm__)
350
#define EIGEN_ARCH_ARM 1
351
#else
352
#define EIGEN_ARCH_ARM 0
353
#endif
354
356
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
357
#define EIGEN_ARCH_ARM64 1
358
#else
359
#define EIGEN_ARCH_ARM64 0
360
#endif
361
363
#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
364
#define EIGEN_ARCH_ARM_OR_ARM64 1
365
#else
366
#define EIGEN_ARCH_ARM_OR_ARM64 0
367
#endif
368
370
#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8
371
#define EIGEN_ARCH_ARMV8 1
372
#else
373
#define EIGEN_ARCH_ARMV8 0
374
#endif
375
378
#if EIGEN_ARCH_ARM_OR_ARM64
379
#ifndef EIGEN_HAS_ARM64_FP16
380
#if defined(__ARM_FP16_FORMAT_IEEE)
381
#define EIGEN_HAS_ARM64_FP16 1
382
#else
383
#define EIGEN_HAS_ARM64_FP16 0
384
#endif
385
#endif
386
#endif
387
389
#if defined(__mips__) || defined(__mips)
390
#define EIGEN_ARCH_MIPS 1
391
#else
392
#define EIGEN_ARCH_MIPS 0
393
#endif
394
396
#if defined(__loongarch64)
397
#define EIGEN_ARCH_LOONGARCH64 1
398
#else
399
#define EIGEN_ARCH_LOONGARCH64 0
400
#endif
401
403
#if defined(__sparc__) || defined(__sparc)
404
#define EIGEN_ARCH_SPARC 1
405
#else
406
#define EIGEN_ARCH_SPARC 0
407
#endif
408
410
#if defined(__ia64__)
411
#define EIGEN_ARCH_IA64 1
412
#else
413
#define EIGEN_ARCH_IA64 0
414
#endif
415
417
#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) || defined(__POWERPC__)
418
#define EIGEN_ARCH_PPC 1
419
#else
420
#define EIGEN_ARCH_PPC 0
421
#endif
422
423
//------------------------------------------------------------------------------------------
424
// Operating system identification, EIGEN_OS_*
425
//------------------------------------------------------------------------------------------
426
428
#if defined(__unix__) || defined(__unix)
429
#define EIGEN_OS_UNIX 1
430
#else
431
#define EIGEN_OS_UNIX 0
432
#endif
433
435
#if defined(__linux__)
436
#define EIGEN_OS_LINUX 1
437
#else
438
#define EIGEN_OS_LINUX 0
439
#endif
440
442
// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
443
#if defined(__ANDROID__) || defined(ANDROID)
444
#define EIGEN_OS_ANDROID 1
445
446
// Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in
447
// <android/ndk-version.h>. For NDK < r16, users should define these macros,
448
// e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11.
449
#if defined __has_include
450
#if __has_include(<android/ndk-version.h>)
451
#include <android/ndk-version.h>
452
#endif
453
#endif
454
455
#else
456
#define EIGEN_OS_ANDROID 0
457
#endif
458
460
#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
461
#define EIGEN_OS_GNULINUX 1
462
#else
463
#define EIGEN_OS_GNULINUX 0
464
#endif
465
467
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
468
#define EIGEN_OS_BSD 1
469
#else
470
#define EIGEN_OS_BSD 0
471
#endif
472
474
#if defined(__APPLE__)
475
#define EIGEN_OS_MAC 1
476
#else
477
#define EIGEN_OS_MAC 0
478
#endif
479
481
#if defined(__QNX__)
482
#define EIGEN_OS_QNX 1
483
#else
484
#define EIGEN_OS_QNX 0
485
#endif
486
488
#if defined(_WIN32)
489
#define EIGEN_OS_WIN 1
490
#else
491
#define EIGEN_OS_WIN 0
492
#endif
493
495
#if defined(_WIN64)
496
#define EIGEN_OS_WIN64 1
497
#else
498
#define EIGEN_OS_WIN64 0
499
#endif
500
502
#if defined(_WIN32_WCE)
503
#define EIGEN_OS_WINCE 1
504
#else
505
#define EIGEN_OS_WINCE 0
506
#endif
507
509
#if defined(__CYGWIN__)
510
#define EIGEN_OS_CYGWIN 1
511
#else
512
#define EIGEN_OS_CYGWIN 0
513
#endif
514
516
#if EIGEN_OS_WIN && !(EIGEN_OS_WINCE || EIGEN_OS_CYGWIN)
517
#define EIGEN_OS_WIN_STRICT 1
518
#else
519
#define EIGEN_OS_WIN_STRICT 0
520
#endif
521
523
// compiler solaris __SUNPRO_C
524
// version studio
525
// 5.7 10 0x570
526
// 5.8 11 0x580
527
// 5.9 12 0x590
528
// 5.10 12.1 0x5100
529
// 5.11 12.2 0x5110
530
// 5.12 12.3 0x5120
531
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
532
#define EIGEN_OS_SUN __SUNPRO_C
533
#else
534
#define EIGEN_OS_SUN 0
535
#endif
536
538
#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
539
#define EIGEN_OS_SOLARIS 1
540
#else
541
#define EIGEN_OS_SOLARIS 0
542
#endif
543
544
//------------------------------------------------------------------------------------------
545
// Detect GPU compilers and architectures
546
//------------------------------------------------------------------------------------------
547
548
// NVCC is not supported as the target platform for HIPCC
549
// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive
550
#if defined(__NVCC__) && defined(__HIPCC__)
551
#error "NVCC as the target platform for HIPCC is currently not supported."
552
#endif
553
554
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) && !defined(__SYCL_DEVICE_ONLY__)
555
// Means the compiler is either nvcc or clang with CUDA enabled
556
#define EIGEN_CUDACC __CUDACC__
557
#endif
558
559
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) && !defined(__SYCL_DEVICE_ONLY__)
560
// Means we are generating code for the device
561
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
562
#endif
563
564
#if defined(EIGEN_CUDACC)
565
#include <cuda.h>
566
#define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
567
#else
568
#define EIGEN_CUDA_SDK_VER 0
569
#endif
570
571
#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP) && !defined(__SYCL_DEVICE_ONLY__)
572
// Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)
573
#define EIGEN_HIPCC __HIPCC__
574
575
// We need to include hip_runtime.h here because it pulls in
576
// ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__
577
// ++ host_defines.h which contains the defines for the __host__ and __device__ macros
578
#include <hip/hip_runtime.h>
579
580
#if defined(__HIP_DEVICE_COMPILE__) && !defined(__SYCL_DEVICE_ONLY__)
581
// analogous to EIGEN_CUDA_ARCH, but for HIP
582
#define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__
583
#endif
584
585
// For HIP (ROCm 3.5 and higher), we need to explicitly set the launch_bounds attribute
586
// value to 1024. The compiler assigns a default value of 256 when the attribute is not
587
// specified. This results in failures on the HIP platform, for cases when a GPU kernel
588
// without an explicit launch_bounds attribute is called with a threads_per_block value
589
// greater than 256.
590
//
591
// This is a regression in functioanlity and is expected to be fixed within the next
592
// couple of ROCm releases (compiler will go back to using 1024 value as the default)
593
//
594
// In the meantime, we will use a "only enabled for HIP" macro to set the launch_bounds
595
// attribute.
596
597
#define EIGEN_HIP_LAUNCH_BOUNDS_1024 __launch_bounds__(1024)
598
599
#endif
600
601
#if !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)
602
#define EIGEN_HIP_LAUNCH_BOUNDS_1024
603
#endif
// !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)
604
605
// Unify CUDA/HIPCC
606
607
#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
608
//
609
// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC
610
//
611
#define EIGEN_GPUCC
612
//
613
// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels
614
// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels
615
//
616
// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels.
617
// For those cases, the corresponding code should be guarded with
618
// #if defined(EIGEN_GPUCC)
619
// instead of
620
// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
621
//
622
// For cases where the tweak is specific to HIP, the code should be guarded with
623
// #if defined(EIGEN_HIPCC)
624
//
625
// For cases where the tweak is specific to CUDA, the code should be guarded with
626
// #if defined(EIGEN_CUDACC)
627
//
628
#endif
629
630
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
631
//
632
// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE
633
//
634
#define EIGEN_GPU_COMPILE_PHASE
635
//
636
// GPU compilers (HIPCC, NVCC) typically do two passes over the source code,
637
// + one to compile the source for the "host" (ie CPU)
638
// + another to compile the source for the "device" (ie. GPU)
639
//
640
// Code that needs to enabled only during the either the "host" or "device" compilation phase
641
// needs to be guarded with a macro that indicates the current compilation phase
642
//
643
// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP
644
// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA
645
//
646
// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA
647
// For those cases, the code should be guarded with
648
// #if defined(EIGEN_GPU_COMPILE_PHASE)
649
// instead of
650
// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
651
//
652
// For cases where the tweak is specific to HIP, the code should be guarded with
653
// #if defined(EIGEN_HIP_DEVICE_COMPILE)
654
//
655
// For cases where the tweak is specific to CUDA, the code should be guarded with
656
// #if defined(EIGEN_CUDA_ARCH)
657
//
658
#endif
659
662
#if EIGEN_ARCH_ARM_OR_ARM64
663
#ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
664
// Clang only supports FP16 on aarch64, and not all intrinsics are available
665
// on A32 anyways even in GCC (e.g. vdiv_f16, vsqrt_f16).
666
#if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
667
#define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1
668
#else
669
#define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0
670
#endif
671
#endif
672
#endif
673
676
#if EIGEN_ARCH_ARM_OR_ARM64
677
#ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
678
// Clang only supports FP16 on aarch64, and not all intrinsics are available
679
// on A32 anyways, even in GCC (e.g. vceqh_f16).
680
#if EIGEN_ARCH_ARM64 && defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && !defined(EIGEN_GPU_COMPILE_PHASE)
681
#define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1
682
#endif
683
#endif
684
#endif
685
686
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
687
// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro.
688
// In most cases we want to check if both macros are defined which can be done using the define below.
689
#define SYCL_DEVICE_ONLY
690
#endif
691
692
//------------------------------------------------------------------------------------------
693
// Detect Compiler/Architecture/OS specific features
694
//------------------------------------------------------------------------------------------
695
696
// Cross compiler wrapper around LLVM's __has_builtin
697
#ifdef __has_builtin
698
#define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
699
#else
700
#define EIGEN_HAS_BUILTIN(x) 0
701
#endif
702
703
// A Clang feature extension to determine compiler features.
704
// We use it to determine 'cxx_rvalue_references'
705
#ifndef __has_feature
706
#define __has_feature(x) 0
707
#endif
708
709
// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that
710
// works for both platforms, indicating the C++ standard version number.
711
//
712
// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will
713
// report 199711L regardless of the language standard specified via /std.
714
// We need to rely on _MSVC_LANG instead, which is only available after
715
// VS2015.3.
716
#if EIGEN_COMP_MSVC_LANG > 0
717
#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG
718
#elif EIGEN_COMP_MSVC >= 1900
719
#define EIGEN_CPLUSPLUS 201103L
720
#elif defined(__cplusplus)
721
#define EIGEN_CPLUSPLUS __cplusplus
722
#else
723
#define EIGEN_CPLUSPLUS 0
724
#endif
725
726
// The macro EIGEN_COMP_CXXVER defines the c++ version expected by the compiler.
727
// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
728
// is defined to 17.
729
#if EIGEN_CPLUSPLUS >= 202002L
730
#define EIGEN_COMP_CXXVER 20
731
#elif EIGEN_CPLUSPLUS >= 201703L
732
#define EIGEN_COMP_CXXVER 17
733
#elif EIGEN_CPLUSPLUS >= 201402L
734
#define EIGEN_COMP_CXXVER 14
735
#elif EIGEN_CPLUSPLUS >= 201103L
736
#define EIGEN_COMP_CXXVER 11
737
#else
738
#define EIGEN_COMP_CXXVER 03
739
#endif
740
741
// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features
742
// but in practice we should not rely on them but rather on the availability of
743
// individual features as defined later.
744
// This is why there is no EIGEN_HAS_CXX17.
745
#if EIGEN_MAX_CPP_VER < 14 || EIGEN_COMP_CXXVER < 14 || (EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1900) || \
746
(EIGEN_COMP_ICC && EIGEN_COMP_ICC < 1500) || (EIGEN_COMP_NVCC && EIGEN_COMP_NVCC < 80000) || \
747
(EIGEN_COMP_CLANG_STRICT && EIGEN_COMP_CLANG < 390) || \
748
(EIGEN_COMP_CLANGAPPLE && EIGEN_COMP_CLANGAPPLE < 9000000) || (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC < 510)
749
#error Eigen requires at least c++14 support.
750
#endif
751
752
// Does the compiler support C99?
753
// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined
754
#include <cmath>
755
#ifndef EIGEN_HAS_C99_MATH
756
#if ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) || \
757
(defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) || \
758
(EIGEN_COMP_MSVC) || defined(SYCL_DEVICE_ONLY))
759
#define EIGEN_HAS_C99_MATH 1
760
#else
761
#define EIGEN_HAS_C99_MATH 0
762
#endif
763
#endif
764
765
// Does the compiler support std::hash?
766
#ifndef EIGEN_HAS_STD_HASH
767
// The std::hash struct is defined in C++11 but is not labelled as a __device__
768
// function and is not constexpr, so cannot be used on device.
769
#if !defined(EIGEN_GPU_COMPILE_PHASE)
770
#define EIGEN_HAS_STD_HASH 1
771
#else
772
#define EIGEN_HAS_STD_HASH 0
773
#endif
774
#endif
// EIGEN_HAS_STD_HASH
775
776
#ifndef EIGEN_HAS_STD_INVOKE_RESULT
777
#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17
778
#define EIGEN_HAS_STD_INVOKE_RESULT 1
779
#else
780
#define EIGEN_HAS_STD_INVOKE_RESULT 0
781
#endif
782
#endif
783
784
#define EIGEN_CONSTEXPR constexpr
785
786
// NOTE: the required Apple's clang version is very conservative
787
// and it could be that XCode 9 works just fine.
788
// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support
789
// and not tested.
790
// NOTE: Intel C++ Compiler Classic (icc) Version 19.0 and later supports dynamic allocation
791
// for over-aligned data, but not in a manner that is compatible with Eigen.
792
// See https://gitlab.com/libeigen/eigen/-/issues/2575
793
#ifndef EIGEN_HAS_CXX17_OVERALIGN
794
#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17 && \
795
((EIGEN_COMP_MSVC >= 1912) || (EIGEN_GNUC_STRICT_AT_LEAST(7, 0, 0)) || (EIGEN_CLANG_STRICT_AT_LEAST(5, 0, 0)) || \
796
(EIGEN_COMP_CLANGAPPLE && EIGEN_COMP_CLANGAPPLE >= 10000000)) && \
797
!EIGEN_COMP_ICC
798
#define EIGEN_HAS_CXX17_OVERALIGN 1
799
#else
800
#define EIGEN_HAS_CXX17_OVERALIGN 0
801
#endif
802
#endif
803
804
#if defined(EIGEN_CUDACC)
805
// While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
806
#if defined(__NVCC__)
807
// nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
808
#ifdef __CUDACC_RELAXED_CONSTEXPR__
809
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
810
#endif
811
#elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr)
812
// clang++ always considers constexpr functions as implicitly __host__ __device__
813
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
814
#endif
815
#endif
816
817
// Does the compiler support the __int128 and __uint128_t extensions for 128-bit
818
// integer arithmetic?
819
//
820
// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported,
821
// but we avoid using them in certain cases:
822
//
823
// * Building using Clang for Windows, where the Clang runtime library has
824
// 128-bit support only on LP64 architectures, but Windows is LLP64.
825
#ifndef EIGEN_HAS_BUILTIN_INT128
826
#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG)
827
#define EIGEN_HAS_BUILTIN_INT128 1
828
#else
829
#define EIGEN_HAS_BUILTIN_INT128 0
830
#endif
831
#endif
832
833
//------------------------------------------------------------------------------------------
834
// Preprocessor programming helpers
835
//------------------------------------------------------------------------------------------
836
837
// This macro can be used to prevent from macro expansion, e.g.:
838
// std::max EIGEN_NOT_A_MACRO(a,b)
839
#define EIGEN_NOT_A_MACRO
840
841
#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
842
843
// concatenate two tokens
844
#define EIGEN_CAT2(a, b) a##b
845
#define EIGEN_CAT(a, b) EIGEN_CAT2(a, b)
846
847
#define EIGEN_COMMA ,
848
849
// convert a token to a string
850
#define EIGEN_MAKESTRING2(a) #a
851
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
852
853
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
854
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
855
// but GCC is still doing fine with just inline.
856
#ifndef EIGEN_STRONG_INLINE
857
#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC)
858
#define EIGEN_STRONG_INLINE __forceinline
859
#else
860
#define EIGEN_STRONG_INLINE inline
861
#endif
862
#endif
863
864
// EIGEN_ALWAYS_INLINE is the strongest, it has the effect of making the function inline and adding every possible
865
// attribute to maximize inlining. This should only be used when really necessary: in particular,
866
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
867
// FIXME with the always_inline attribute,
868
#if EIGEN_COMP_GNUC && !defined(SYCL_DEVICE_ONLY)
869
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
870
#else
871
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
872
#endif
873
874
#if EIGEN_COMP_GNUC
875
#define EIGEN_DONT_INLINE __attribute__((noinline))
876
#elif EIGEN_COMP_MSVC
877
#define EIGEN_DONT_INLINE __declspec(noinline)
878
#else
879
#define EIGEN_DONT_INLINE
880
#endif
881
882
#if EIGEN_COMP_GNUC
883
#define EIGEN_PERMISSIVE_EXPR __extension__
884
#else
885
#define EIGEN_PERMISSIVE_EXPR
886
#endif
887
888
// GPU stuff
889
890
// Disable some features when compiling with GPU compilers (SYCL/HIPCC)
891
#if defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIP_DEVICE_COMPILE)
892
// Do not try asserts on device code
893
#ifndef EIGEN_NO_DEBUG
894
#define EIGEN_NO_DEBUG
895
#endif
896
897
#ifdef EIGEN_INTERNAL_DEBUGGING
898
#undef EIGEN_INTERNAL_DEBUGGING
899
#endif
900
#endif
901
902
// No exceptions on device.
903
#if defined(SYCL_DEVICE_ONLY) || defined(EIGEN_GPU_COMPILE_PHASE)
904
#ifdef EIGEN_EXCEPTIONS
905
#undef EIGEN_EXCEPTIONS
906
#endif
907
#endif
908
909
#if defined(SYCL_DEVICE_ONLY)
910
#ifndef EIGEN_DONT_VECTORIZE
911
#define EIGEN_DONT_VECTORIZE
912
#endif
913
#define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline))
914
// All functions callable from CUDA/HIP code must be qualified with __device__
915
#elif defined(EIGEN_GPUCC)
916
#define EIGEN_DEVICE_FUNC __host__ __device__
917
#else
918
#define EIGEN_DEVICE_FUNC
919
#endif
920
921
// this macro allows to get rid of linking errors about multiply defined functions.
922
// - static is not very good because it prevents definitions from different object files to be merged.
923
// So static causes the resulting linked executable to be bloated with multiple copies of the same function.
924
// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
925
#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
926
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline
927
928
#ifdef NDEBUG
929
#ifndef EIGEN_NO_DEBUG
930
#define EIGEN_NO_DEBUG
931
#endif
932
#endif
933
934
// eigen_assert can be overridden
935
#ifndef eigen_assert
936
#define eigen_assert(x) eigen_plain_assert(x)
937
#endif
938
939
#ifdef EIGEN_INTERNAL_DEBUGGING
940
#define eigen_internal_assert(x) eigen_assert(x)
941
#else
942
#define eigen_internal_assert(x) ((void)0)
943
#endif
944
945
#if defined(EIGEN_NO_DEBUG) || (defined(EIGEN_GPU_COMPILE_PHASE) && defined(EIGEN_NO_DEBUG_GPU))
946
#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
947
#else
948
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
949
#endif
950
951
#ifndef EIGEN_NO_DEPRECATED_WARNING
952
#if EIGEN_COMP_GNUC
953
#define EIGEN_DEPRECATED __attribute__((deprecated))
954
#elif EIGEN_COMP_MSVC
955
#define EIGEN_DEPRECATED __declspec(deprecated)
956
#else
957
#define EIGEN_DEPRECATED
958
#endif
959
#else
960
#define EIGEN_DEPRECATED
961
#endif
962
963
#ifndef EIGEN_NO_DEPRECATED_WARNING
964
#if EIGEN_COMP_GNUC
965
#define EIGEN_DEPRECATED_WITH_REASON(message) __attribute__((deprecated(message)))
966
#elif EIGEN_COMP_MSVC
967
#define EIGEN_DEPRECATED_WITH_REASON(message) __declspec(deprecated(message))
968
#else
969
#define EIGEN_DEPRECATED_WITH_REASON(message)
970
#endif
971
#else
972
#define EIGEN_DEPRECATED_WITH_REASON(message)
973
#endif
974
975
#if EIGEN_COMP_GNUC
976
#define EIGEN_UNUSED __attribute__((unused))
977
#else
978
#define EIGEN_UNUSED
979
#endif
980
981
#if EIGEN_COMP_GNUC
982
#define EIGEN_PRAGMA(tokens) _Pragma(#tokens)
983
#define EIGEN_DIAGNOSTICS(tokens) EIGEN_PRAGMA(GCC diagnostic tokens)
984
#define EIGEN_DIAGNOSTICS_OFF(msc, gcc) EIGEN_DIAGNOSTICS(gcc)
985
#elif EIGEN_COMP_MSVC
986
#define EIGEN_PRAGMA(tokens) __pragma(tokens)
987
#define EIGEN_DIAGNOSTICS(tokens) EIGEN_PRAGMA(warning(tokens))
988
#define EIGEN_DIAGNOSTICS_OFF(msc, gcc) EIGEN_DIAGNOSTICS(msc)
989
#else
990
#define EIGEN_PRAGMA(tokens)
991
#define EIGEN_DIAGNOSTICS(tokens)
992
#define EIGEN_DIAGNOSTICS_OFF(msc, gcc)
993
#endif
994
995
#define EIGEN_DISABLE_DEPRECATED_WARNING EIGEN_DIAGNOSTICS_OFF(disable : 4996, ignored "-Wdeprecated-declarations")
996
997
// Suppresses 'unused variable' warnings.
998
namespace
Eigen
{
999
namespace
internal {
1000
template
<
typename
T>
1001
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
constexpr
void
ignore_unused_variable(
const
T&) {}
1002
}
// namespace internal
1003
}
// namespace Eigen
1004
#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
1005
1006
#if !defined(EIGEN_ASM_COMMENT)
1007
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
1008
#define EIGEN_ASM_COMMENT(X) __asm__("#" X)
1009
#else
1010
#define EIGEN_ASM_COMMENT(X)
1011
#endif
1012
#endif
1013
1014
// Acts as a barrier preventing operations involving `X` from crossing. This
1015
// occurs, for example, in the fast rounding trick where a magic constant is
1016
// added then subtracted, which is otherwise compiled away with -ffast-math.
1017
//
1018
// See bug 1674
1019
#if defined(EIGEN_GPU_COMPILE_PHASE)
1020
#define EIGEN_OPTIMIZATION_BARRIER(X)
1021
#endif
1022
1023
#if !defined(EIGEN_OPTIMIZATION_BARRIER)
1024
// Implement the barrier on GNUC compilers or clang-cl.
1025
#if EIGEN_COMP_GNUC || (defined(__clang__) && defined(_MSC_VER))
1026
// According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html:
1027
// X: Any operand whatsoever.
1028
// r: A register operand is allowed provided that it is in a general
1029
// register.
1030
// g: Any register, memory or immediate integer operand is allowed, except
1031
// for registers that are not general registers.
1032
// w: (AArch32/AArch64) Floating point register, Advanced SIMD vector
1033
// register or SVE vector register.
1034
// x: (SSE) Any SSE register.
1035
// (AArch64) Like w, but restricted to registers 0 to 15 inclusive.
1036
// v: (PowerPC) An Altivec vector register.
1037
// wa:(PowerPC) A VSX register.
1038
//
1039
// "X" (uppercase) should work for all cases, though this seems to fail for
1040
// some versions of GCC for arm/aarch64 with
1041
// "error: inconsistent operand constraints in an 'asm'"
1042
// Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and
1043
// vectors, otherwise
1044
// "error: non-trivial scalar-to-vector conversion, possible invalid
1045
// constraint for vector type"
1046
//
1047
// GCC for ppc64le generates an internal compiler error with x/X/g.
1048
// GCC for AVX generates an internal compiler error with X.
1049
//
1050
// Tested on icc/gcc/clang for sse, avx, avx2, avx512dq
1051
// gcc for arm, aarch64,
1052
// gcc for ppc64le,
1053
// both vectors and scalars.
1054
//
1055
// Note that this is restricted to plain types - this will not work
1056
// directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these,
1057
// you will need to apply to the underlying POD type.
1058
#if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT
1059
// This seems to be broken on clang. Packet4f is loaded into a single
1060
// register rather than a vector, zeroing out some entries. Integer
1061
// types also generate a compile error.
1062
#if EIGEN_OS_MAC
1063
// General, Altivec for Apple (VSX were added in ISA v2.06):
1064
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__("" : "+r,v"(X));
1065
#else
1066
// General, Altivec, VSX otherwise:
1067
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__("" : "+r,v,wa"(X));
1068
#endif
1069
#elif EIGEN_ARCH_ARM_OR_ARM64
1070
#ifdef __ARM_FP
1071
// General, VFP or NEON.
1072
// Clang doesn't like "r",
1073
// error: non-trivial scalar-to-vector conversion, possible invalid
1074
// constraint for vector typ
1075
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__("" : "+g,w"(X));
1076
#else
1077
// Arm without VFP or NEON.
1078
// "w" constraint will not compile.
1079
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__("" : "+g"(X));
1080
#endif
1081
#elif EIGEN_ARCH_i386_OR_x86_64
1082
// General, SSE.
1083
#define EIGEN_OPTIMIZATION_BARRIER(X) __asm__("" : "+g,x"(X));
1084
#else
1085
// Not implemented for other architectures.
1086
#define EIGEN_OPTIMIZATION_BARRIER(X)
1087
#endif
1088
#else
1089
// Not implemented for other compilers.
1090
#define EIGEN_OPTIMIZATION_BARRIER(X)
1091
#endif
1092
#endif
1093
1094
#if EIGEN_COMP_MSVC
1095
// NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.
1096
// This workaround is ugly, but it does the job.
1097
#define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond
1098
#else
1099
#define EIGEN_CONST_CONDITIONAL(cond) cond
1100
#endif
1101
1102
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
1103
#define EIGEN_RESTRICT
1104
#endif
1105
#ifndef EIGEN_RESTRICT
1106
#define EIGEN_RESTRICT __restrict
1107
#endif
1108
1109
#ifndef EIGEN_DEFAULT_IO_FORMAT
1110
#ifdef EIGEN_MAKING_DOCS
1111
// format used in Eigen's documentation
1112
// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
1113
#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
1114
#else
1115
#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
1116
#endif
1117
#endif
1118
1119
// just an empty macro !
1120
#define EIGEN_EMPTY
1121
1122
// When compiling CUDA/HIP device code with NVCC or HIPCC
1123
// pull in math functions from the global namespace.
1124
// In host mode, and when device code is compiled with clang,
1125
// use the std versions.
1126
#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || defined(EIGEN_HIP_DEVICE_COMPILE)
1127
#define EIGEN_USING_STD(FUNC) using ::FUNC;
1128
#else
1129
#define EIGEN_USING_STD(FUNC) using std::FUNC;
1130
#endif
1131
1132
#if EIGEN_COMP_CLANG
// workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
1133
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
1134
using Base::operator=; \
1135
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { \
1136
Base::operator=(other); \
1137
return *this; \
1138
} \
1139
template <typename OtherDerived> \
1140
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { \
1141
Base::operator=(other.derived()); \
1142
return *this; \
1143
}
1144
#else
1145
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
1146
using Base::operator=; \
1147
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { \
1148
Base::operator=(other); \
1149
return *this; \
1150
}
1151
#endif
1152
1158
#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) EIGEN_DEVICE_FUNC CLASS(const CLASS&) = default;
1159
1165
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
1166
EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
1167
EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived)
1168
1176
#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \
1177
EIGEN_DEVICE_FUNC Derived() = default; \
1178
EIGEN_DEVICE_FUNC ~Derived() = default;
1179
1187
1188
#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
1189
typedef typename Eigen::internal::traits<Derived>::Scalar \
1190
Scalar;
\
1191
typedef typename Eigen::NumTraits<Scalar>::Real \
1192
RealScalar;
\
1194
typedef typename Base::CoeffReturnType \
1195
CoeffReturnType;
\
1198
typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
1199
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
1200
typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
1201
enum CompileTimeTraits { \
1202
RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
1203
ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
1204
Flags = Eigen::internal::traits<Derived>::Flags, \
1205
SizeAtCompileTime = Base::SizeAtCompileTime, \
1206
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
1207
IsVectorAtCompileTime = Base::IsVectorAtCompileTime \
1208
}; \
1209
using Base::derived; \
1210
using Base::const_cast_derived;
1211
1212
// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed
1213
#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
1214
EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
1215
typedef typename Base::PacketScalar PacketScalar;
1216
1217
#if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC
1218
#define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false))
1219
#define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))
1220
#else
1221
#define EIGEN_PREDICT_FALSE(x) (x)
1222
#define EIGEN_PREDICT_TRUE(x) (x)
1223
#endif
1224
1225
// the expression type of a standard coefficient wise binary operation
1226
#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS, RHS, OPNAME) \
1227
CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_, OPNAME), _op) < typename internal::traits<LHS>::Scalar, \
1228
typename internal::traits<RHS>::Scalar>, \
1229
const LHS, const RHS >
1230
1231
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD, OPNAME) \
1232
template <typename OtherDerived> \
1233
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE( \
1234
Derived, OtherDerived, OPNAME)(METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived>& other) const { \
1235
return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived, OtherDerived, OPNAME)(derived(), other.derived()); \
1236
}
1237
1238
#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME, TYPEA, TYPEB) \
1239
(Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits< \
1240
TYPEA, TYPEB, EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_, OPNAME), _op) < TYPEA, TYPEB> > > ::value)
1241
1242
#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR, SCALAR, OPNAME) \
1243
CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_, OPNAME), _op) < typename internal::traits<EXPR>::Scalar, \
1244
SCALAR>, \
1245
const EXPR, const typename internal::plain_constant_type<EXPR, SCALAR>::type >
1246
1247
#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR, EXPR, OPNAME) \
1248
CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_, OPNAME), _op) < SCALAR, \
1249
typename internal::traits<EXPR>::Scalar>, \
1250
const typename internal::plain_constant_type<EXPR, SCALAR>::type, const EXPR >
1251
1252
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD, OPNAME) \
1253
template <typename T> \
1254
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE( \
1255
Derived, \
1256
typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED( \
1257
OPNAME, Scalar, T)>::type, \
1258
OPNAME)(METHOD)(const T& scalar) const { \
1259
typedef typename internal::promote_scalar_arg<Scalar, T, EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME, Scalar, T)>::type \
1260
PromotedT; \
1261
return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived, PromotedT, OPNAME)( \
1262
derived(), typename internal::plain_constant_type<Derived, PromotedT>::type( \
1263
derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
1264
}
1265
1266
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD, OPNAME) \
1267
template <typename T> \
1268
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE( \
1269
typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED( \
1270
OPNAME, T, Scalar)>::type, \
1271
Derived, OPNAME)(METHOD)(const T& scalar, const StorageBaseType& matrix) { \
1272
typedef typename internal::promote_scalar_arg<Scalar, T, EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME, T, Scalar)>::type \
1273
PromotedT; \
1274
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT, Derived, OPNAME)( \
1275
typename internal::plain_constant_type<Derived, PromotedT>::type( \
1276
matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), \
1277
matrix.derived()); \
1278
}
1279
1280
#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD, OPNAME) \
1281
EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD, OPNAME) \
1282
EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD, OPNAME)
1283
1284
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && \
1285
!defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE)
1286
#define EIGEN_EXCEPTIONS
1287
#endif
1288
1289
#ifdef EIGEN_EXCEPTIONS
1290
#define EIGEN_THROW_X(X) throw X
1291
#define EIGEN_THROW throw
1292
#define EIGEN_TRY try
1293
#define EIGEN_CATCH(X) catch (X)
1294
#else
1295
#if defined(EIGEN_CUDA_ARCH)
1296
#define EIGEN_THROW_X(X) asm("trap;")
1297
#define EIGEN_THROW asm("trap;")
1298
#elif defined(EIGEN_HIP_DEVICE_COMPILE)
1299
#define EIGEN_THROW_X(X) asm("s_trap 0")
1300
#define EIGEN_THROW asm("s_trap 0")
1301
#else
1302
#define EIGEN_THROW_X(X) std::abort()
1303
#define EIGEN_THROW std::abort()
1304
#endif
1305
#define EIGEN_TRY if (true)
1306
#define EIGEN_CATCH(X) else
1307
#endif
1308
1309
// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.
1310
namespace
Eigen
{
1311
namespace
internal {
1312
1313
EIGEN_DEVICE_FUNC
inline
bool
all() {
return
true
; }
1314
1315
template
<
typename
T,
typename
... Ts>
1316
EIGEN_DEVICE_FUNC
bool
all(T t, Ts... ts) {
1317
return
t && all(ts...);
1318
}
1319
1320
}
// namespace internal
1321
}
// namespace Eigen
1322
1323
// provide override and final specifiers if they are available:
1324
#define EIGEN_OVERRIDE override
1325
#define EIGEN_FINAL final
1326
1327
// Wrapping #pragma unroll in a macro since it is required for SYCL
1328
#if defined(SYCL_DEVICE_ONLY)
1329
#if defined(_MSC_VER)
1330
#define EIGEN_UNROLL_LOOP __pragma(unroll)
1331
#else
1332
#define EIGEN_UNROLL_LOOP _Pragma("unroll")
1333
#endif
1334
#else
1335
#define EIGEN_UNROLL_LOOP
1336
#endif
1337
1338
// Notice: Use this macro with caution. The code in the if body should still
1339
// compile with C++14.
1340
#if defined(EIGEN_HAS_CXX17_IFCONSTEXPR)
1341
#define EIGEN_IF_CONSTEXPR(X) if constexpr (X)
1342
#else
1343
#define EIGEN_IF_CONSTEXPR(X) if (X)
1344
#endif
1345
1346
#endif
// EIGEN_MACROS_H
Eigen
Namespace containing all symbols from the Eigen library.
Definition
B01_Experimental.dox:1
Eigen
src
Core
util
Macros.h
Generated by
1.13.2