34#ifndef EIGEN_ASSIGN_VML_H
35#define EIGEN_ASSIGN_VML_H
38#include "./InternalHeaderCheck.h"
44template <
typename Dst,
typename Src>
45class vml_assign_traits {
50 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
51 InnerSize =
int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52 : int(Dst::Flags) &
RowMajorBit ? int(Dst::ColsAtCompileTime)
53 : int(Dst::RowsAtCompileTime),
54 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55 : int(Dst::Flags) &
RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56 : int(Dst::MaxRowsAtCompileTime),
57 MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
59 MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess &&
60 Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1,
61 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) &
LinearAccessBit),
62 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
63 LargeEnough = VmlSize ==
Dynamic || VmlSize >= EIGEN_MKL_VML_THRESHOLD
67 enum { EnableVml = MightEnableVml && LargeEnough, Traversal = MightLinearize ? LinearTraversal : DefaultTraversal };
70#define EIGEN_PP_EXPAND(ARG) ARG
71#if !defined(EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
72#define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
74#define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
77#define EIGEN_VMLMODE_EXPAND_x_
79#define EIGEN_VMLMODE_PREFIX_xLA vm
80#define EIGEN_VMLMODE_PREFIX_x_ v
81#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x, VMLMODE)
83#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
84 template <typename DstXprType, typename SrcXprNested> \
85 struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, \
86 assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
87 std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
88 typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
89 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
90 resize_if_allowed(dst, src, func); \
91 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
92 if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == (int)LinearTraversal) { \
93 VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(), \
94 (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
96 const Index outerSize = dst.outerSize(); \
97 for (Index outer = 0; outer < outerSize; ++outer) { \
98 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer, 0)) \
99 : &(src.nestedExpression().coeffRef(0, outer)); \
100 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
101 VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, \
102 (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
108#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
109 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), s##VMLOP), float, float, VMLMODE) \
110 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), d##VMLOP), double, double, VMLMODE)
112#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
113 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), c##VMLOP), scomplex, \
114 MKL_Complex8, VMLMODE) \
115 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), z##VMLOP), dcomplex, \
116 MKL_Complex16, VMLMODE)
118#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
119 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
120 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
122EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
sin, Sin, LA)
123EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
asin, Asin, LA)
124EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
sinh, Sinh, LA)
125EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
cos, Cos, LA)
126EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
acos, Acos, LA)
127EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
cosh, Cosh, LA)
128EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
tan, Tan, LA)
129EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
atan, Atan, LA)
130EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
tanh, Tanh, LA)
132EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
exp, Exp, LA)
133EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
log, Ln, LA)
134EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
log10, Log10, LA)
135EIGEN_MKL_VML_DECLARE_UNARY_CALLS(
sqrt, Sqrt, _)
137EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(
square, Sqr, _)
138EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(
arg, Arg, _)
139EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(
round, Round, _)
140EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(
floor, Floor, _)
141EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(
ceil, Ceil, _)
142EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(
cbrt, Cbrt, _)
144#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
145 template <typename DstXprType, typename SrcXprNested, typename Plain> \
146 struct Assignment<DstXprType, \
147 CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
148 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>>, \
149 assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense, \
150 std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> { \
151 typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested, \
152 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>> \
154 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \
155 resize_if_allowed(dst, src, func); \
156 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
157 VMLTYPE exponent = reinterpret_cast<const VMLTYPE &>(src.rhs().functor().m_other); \
158 if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == LinearTraversal) { \
159 VMLOP(dst.size(), (const VMLTYPE *)src.lhs().data(), exponent, \
160 (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
162 const Index outerSize = dst.outerSize(); \
163 for (Index outer = 0; outer < outerSize; ++outer) { \
164 const EIGENTYPE *src_ptr = \
165 src.IsRowMajor ? &(src.lhs().coeffRef(outer, 0)) : &(src.lhs().coeffRef(0, outer)); \
166 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \
167 VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, exponent, \
168 (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
174EIGEN_MKL_VML_DECLARE_POW_CALL(
pow, vmsPowx,
float,
float, LA)
175EIGEN_MKL_VML_DECLARE_POW_CALL(
pow, vmdPowx,
double,
double, LA)
176EIGEN_MKL_VML_DECLARE_POW_CALL(
pow, vmcPowx, scomplex, MKL_Complex8, LA)
177EIGEN_MKL_VML_DECLARE_POW_CALL(
pow, vmzPowx, dcomplex, MKL_Complex16, LA)
const GlobalUnaryPowReturnType< Derived, ScalarExponent > pow(const Eigen::ArrayBase< Derived > &x, const ScalarExponent &exponent)
const unsigned int LinearAccessBit
Definition Constants.h:133
const unsigned int DirectAccessBit
Definition Constants.h:159
const unsigned int RowMajorBit
Definition Constants.h:70
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_square_op< typename Derived::Scalar >, const Derived > square(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_round_op< typename Derived::Scalar >, const Derived > round(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
const int Dynamic
Definition Constants.h:25
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_floor_op< typename Derived::Scalar >, const Derived > floor(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cbrt_op< typename Derived::Scalar >, const Derived > cbrt(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_ceil_op< typename Derived::Scalar >, const Derived > ceil(const Eigen::ArrayBase< Derived > &x)