10#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
26template <
typename Derived,
typename Device>
27struct TensorEvaluator {
28 typedef typename Derived::Index Index;
29 typedef typename Derived::Scalar Scalar;
30 typedef typename Derived::Scalar CoeffReturnType;
31 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
32 typedef typename Derived::Dimensions Dimensions;
33 typedef Derived XprType;
34 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
35 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
36 typedef StorageMemory<Scalar, Device> Storage;
37 typedef typename Storage::Type EvaluatorPointerType;
40 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
41 internal::traits<Derived>::NumDimensions : 0;
44 IsAligned = Derived::IsAligned,
45 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
46 BlockAccess = internal::is_arithmetic<
typename internal::remove_const<Scalar>::type>::value,
47 PreferBlockAccess =
false,
48 Layout = Derived::Layout,
49 CoordAccess = NumCoords > 0,
53 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
56 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
57 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
59 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
64 EIGEN_STRONG_INLINE TensorEvaluator(
const Derived& m,
const Device& device)
65 : m_data(device.get((
const_cast<TensorPointerType
>(m.data())))),
66 m_dims(m.dimensions()),
71 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
73 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
74 if (!
NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
75 m_device.memcpy((
void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
81#ifdef EIGEN_USE_THREADS
82 template <
typename EvalSubExprsCallback>
83 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
84 EvaluatorPointerType dest, EvalSubExprsCallback done) {
86 done(evalSubExprsIfNeeded(dest));
90 EIGEN_STRONG_INLINE
void cleanup() {}
92 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
93 eigen_assert(m_data != NULL);
97 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
98 eigen_assert(m_data != NULL);
102 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
103 PacketReturnType packet(Index index)
const
105 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
113 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
114 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
115 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
117 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
120 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
121 void writePacket(Index index,
const PacketReturnType& x)
123 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
126 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
127 eigen_assert(m_data != NULL);
128 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
129 return m_data[m_dims.IndexOfColMajor(coords)];
131 return m_data[m_dims.IndexOfRowMajor(coords)];
135 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
136 coeffRef(
const array<DenseIndex, NumCoords>& coords) {
137 eigen_assert(m_data != NULL);
138 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
139 return m_data[m_dims.IndexOfColMajor(coords)];
141 return m_data[m_dims.IndexOfRowMajor(coords)];
145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
146 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
147 PacketType<CoeffReturnType, Device>::size);
150 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
151 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
152 return internal::TensorBlockResourceRequirements::any();
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
156 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
157 bool =
false)
const {
158 assert(m_data != NULL);
159 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
162 template<
typename TensorBlock>
163 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
164 const TensorBlockDesc& desc,
const TensorBlock& block) {
165 assert(m_data != NULL);
167 typedef typename TensorBlock::XprType TensorBlockExpr;
168 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
172 TensorBlockAssign::Run(
173 TensorBlockAssign::target(desc.dimensions(),
174 internal::strides<Layout>(m_dims), m_data,
179 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
188 EvaluatorPointerType m_data;
190 const Device EIGEN_DEVICE_REF m_device;
194template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
195T loadConstant(
const T* address) {
199#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
200template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
201float loadConstant(
const float* address) {
202 return __ldg(address);
204template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
205double loadConstant(
const double* address) {
206 return __ldg(address);
208template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
209Eigen::half loadConstant(
const Eigen::half* address) {
210 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
215template <cl::sycl::access::mode AcMd,
typename T>
216T &loadConstant(
const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
223template<
typename Derived,
typename Device>
226 typedef typename Derived::Index Index;
227 typedef typename Derived::Scalar Scalar;
228 typedef typename Derived::Scalar CoeffReturnType;
229 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
230 typedef typename Derived::Dimensions Dimensions;
231 typedef const Derived XprType;
232 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
233 typedef StorageMemory<const Scalar, Device> Storage;
234 typedef typename Storage::Type EvaluatorPointerType;
236 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
239 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
240 internal::traits<Derived>::NumDimensions : 0;
241 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
244 IsAligned = Derived::IsAligned,
245 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
246 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
247 PreferBlockAccess =
false,
248 Layout = Derived::Layout,
249 CoordAccess = NumCoords > 0,
254 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
255 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
257 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
262 EIGEN_STRONG_INLINE TensorEvaluator(
const Derived& m,
const Device& device)
263 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
266 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
268 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
269 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
270 m_device.memcpy((
void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
276#ifdef EIGEN_USE_THREADS
277 template <
typename EvalSubExprsCallback>
278 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
279 EvaluatorPointerType dest, EvalSubExprsCallback done) {
281 done(evalSubExprsIfNeeded(dest));
285 EIGEN_STRONG_INLINE
void cleanup() { }
287 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
288 eigen_assert(m_data != NULL);
289 return internal::loadConstant(m_data+index);
292 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
293 PacketReturnType packet(Index index)
const
295 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
303 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
304 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
305 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
307 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
310 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
311 eigen_assert(m_data != NULL);
312 const Index index = (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) ? m_dims.IndexOfColMajor(coords)
313 : m_dims.IndexOfRowMajor(coords);
314 return internal::loadConstant(m_data+index);
317 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
318 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
319 PacketType<CoeffReturnType, Device>::size);
322 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
323 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
324 return internal::TensorBlockResourceRequirements::any();
327 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
328 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
329 bool =
false)
const {
330 assert(m_data != NULL);
331 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
334 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
337 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
342 EvaluatorPointerType m_data;
344 const Device EIGEN_DEVICE_REF m_device;
352template<
typename NullaryOp,
typename ArgType,
typename Device>
355 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
357 TensorEvaluator(
const XprType& op,
const Device& device)
358 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
361 typedef typename XprType::Index Index;
362 typedef typename XprType::Scalar Scalar;
363 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
364 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
365 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
366 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
367 typedef StorageMemory<CoeffReturnType, Device> Storage;
368 typedef typename Storage::Type EvaluatorPointerType;
372 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
373 #ifdef EIGEN_USE_SYCL
374 && (PacketType<CoeffReturnType, Device>::size >1)
378 PreferBlockAccess =
false,
379 Layout = TensorEvaluator<ArgType, Device>::Layout,
385 typedef internal::TensorBlockNotImplemented TensorBlock;
388 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
390 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true; }
392#ifdef EIGEN_USE_THREADS
393 template <
typename EvalSubExprsCallback>
394 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
395 EvaluatorPointerType, EvalSubExprsCallback done) {
400 EIGEN_STRONG_INLINE
void cleanup() { }
402 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
404 return m_wrapper(m_functor, index);
407 template<
int LoadMode>
408 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
410 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
413 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
414 costPerCoeff(
bool vectorized)
const {
415 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
416 PacketType<CoeffReturnType, Device>::size);
419 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
423 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
429 const NullaryOp m_functor;
430 TensorEvaluator<ArgType, Device> m_argImpl;
431 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
438template<
typename UnaryOp,
typename ArgType,
typename Device>
441 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
444 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
445 PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
446 int(internal::functor_traits<UnaryOp>::PacketAccess),
447 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
448 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
449 Layout = TensorEvaluator<ArgType, Device>::Layout,
454 TensorEvaluator(
const XprType& op,
const Device& device)
456 m_functor(op.functor()),
457 m_argImpl(op.nestedExpression(), device)
460 typedef typename XprType::Index Index;
461 typedef typename XprType::Scalar Scalar;
462 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
463 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
464 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
465 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
466 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
467 typedef StorageMemory<CoeffReturnType, Device> Storage;
468 typedef typename Storage::Type EvaluatorPointerType;
469 static const int NumDims = internal::array_size<Dimensions>::value;
472 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
473 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
475 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
478 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
482 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
484 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
485 m_argImpl.evalSubExprsIfNeeded(NULL);
489#ifdef EIGEN_USE_THREADS
490 template <
typename EvalSubExprsCallback>
491 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
492 EvaluatorPointerType, EvalSubExprsCallback done) {
493 m_argImpl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
497 EIGEN_STRONG_INLINE
void cleanup() {
501 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
503 return m_functor(m_argImpl.coeff(index));
506 template<
int LoadMode>
507 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
509 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
512 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
513 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
514 return m_argImpl.costPerCoeff(vectorized) +
515 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
518 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
519 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
520 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
521 return m_argImpl.getResourceRequirements().addCostPerCoeff(
522 {0, 0, functor_cost / PacketSize});
525 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
526 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
527 bool =
false)
const {
528 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
531 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
535 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const{
542 const Device EIGEN_DEVICE_REF m_device;
543 const UnaryOp m_functor;
544 TensorEvaluator<ArgType, Device> m_argImpl;
550template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
553 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
556 IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
557 int(TensorEvaluator<RightArgType, Device>::IsAligned),
558 PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
559 int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
560 int(internal::functor_traits<BinaryOp>::PacketAccess),
561 BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
562 int(TensorEvaluator<RightArgType, Device>::BlockAccess),
563 PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
564 int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
565 Layout = TensorEvaluator<LeftArgType, Device>::Layout,
570 TensorEvaluator(
const XprType& op,
const Device& device)
572 m_functor(op.functor()),
573 m_leftImpl(op.lhsExpression(), device),
574 m_rightImpl(op.rhsExpression(), device)
576 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
577 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
580 typedef typename XprType::Index Index;
581 typedef typename XprType::Scalar Scalar;
582 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
583 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
584 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
585 typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
586 typedef StorageMemory<CoeffReturnType, Device> Storage;
587 typedef typename Storage::Type EvaluatorPointerType;
589 static const int NumDims = internal::array_size<
590 typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
593 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
594 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
596 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
598 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
601 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
606 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
609 return m_leftImpl.dimensions();
612 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
613 m_leftImpl.evalSubExprsIfNeeded(NULL);
614 m_rightImpl.evalSubExprsIfNeeded(NULL);
618#ifdef EIGEN_USE_THREADS
619 template <
typename EvalSubExprsCallback>
620 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
621 EvaluatorPointerType, EvalSubExprsCallback done) {
623 m_leftImpl.evalSubExprsIfNeededAsync(
nullptr, [
this, done](
bool) {
624 m_rightImpl.evalSubExprsIfNeededAsync(
nullptr,
625 [done](
bool) { done(
true); });
630 EIGEN_STRONG_INLINE
void cleanup() {
631 m_leftImpl.cleanup();
632 m_rightImpl.cleanup();
635 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
637 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
639 template<
int LoadMode>
640 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
642 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
645 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
646 costPerCoeff(
bool vectorized)
const {
647 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
648 return m_leftImpl.costPerCoeff(vectorized) +
649 m_rightImpl.costPerCoeff(vectorized) +
650 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
653 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
654 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
655 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
656 return internal::TensorBlockResourceRequirements::merge(
657 m_leftImpl.getResourceRequirements(),
658 m_rightImpl.getResourceRequirements())
659 .addCostPerCoeff({0, 0, functor_cost / PacketSize});
662 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
663 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
664 bool =
false)
const {
665 desc.DropDestinationBuffer();
666 return TensorBlock(m_leftImpl.block(desc, scratch),
667 m_rightImpl.block(desc, scratch), m_functor);
670 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
672 #ifdef EIGEN_USE_SYCL
674 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
675 m_leftImpl.bind(cgh);
676 m_rightImpl.bind(cgh);
680 const Device EIGEN_DEVICE_REF m_device;
681 const BinaryOp m_functor;
682 TensorEvaluator<LeftArgType, Device> m_leftImpl;
683 TensorEvaluator<RightArgType, Device> m_rightImpl;
688template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
689struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
691 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
694 IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
695 PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
696 TensorEvaluator<Arg2Type, Device>::PacketAccess &&
697 TensorEvaluator<Arg3Type, Device>::PacketAccess &&
698 internal::functor_traits<TernaryOp>::PacketAccess,
700 PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
701 TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
702 TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
703 Layout = TensorEvaluator<Arg1Type, Device>::Layout,
708 TensorEvaluator(
const XprType& op,
const Device& device)
709 : m_functor(op.functor()),
710 m_arg1Impl(op.arg1Expression(), device),
711 m_arg2Impl(op.arg2Expression(), device),
712 m_arg3Impl(op.arg3Expression(), device)
714 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) ==
static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
716 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
717 typename internal::traits<Arg2Type>::StorageKind>::value),
718 STORAGE_KIND_MUST_MATCH)
719 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
720 typename internal::traits<Arg3Type>::StorageKind>::value),
721 STORAGE_KIND_MUST_MATCH)
722 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
723 typename internal::traits<Arg2Type>::Index>::value),
724 STORAGE_INDEX_MUST_MATCH)
725 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
726 typename internal::traits<Arg3Type>::Index>::value),
727 STORAGE_INDEX_MUST_MATCH)
729 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
732 typedef typename XprType::Index Index;
733 typedef typename XprType::Scalar Scalar;
734 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
735 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
736 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
737 typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
738 typedef StorageMemory<CoeffReturnType, Device> Storage;
739 typedef typename Storage::Type EvaluatorPointerType;
742 typedef internal::TensorBlockNotImplemented TensorBlock;
745 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
748 return m_arg1Impl.dimensions();
751 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
752 m_arg1Impl.evalSubExprsIfNeeded(NULL);
753 m_arg2Impl.evalSubExprsIfNeeded(NULL);
754 m_arg3Impl.evalSubExprsIfNeeded(NULL);
757 EIGEN_STRONG_INLINE
void cleanup() {
758 m_arg1Impl.cleanup();
759 m_arg2Impl.cleanup();
760 m_arg3Impl.cleanup();
763 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
765 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
767 template<
int LoadMode>
768 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
770 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
771 m_arg2Impl.template packet<LoadMode>(index),
772 m_arg3Impl.template packet<LoadMode>(index));
775 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
776 costPerCoeff(
bool vectorized)
const {
777 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
778 return m_arg1Impl.costPerCoeff(vectorized) +
779 m_arg2Impl.costPerCoeff(vectorized) +
780 m_arg3Impl.costPerCoeff(vectorized) +
781 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
784 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
788 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
789 m_arg1Impl.bind(cgh);
790 m_arg2Impl.bind(cgh);
791 m_arg3Impl.bind(cgh);
796 const TernaryOp m_functor;
797 TensorEvaluator<Arg1Type, Device> m_arg1Impl;
798 TensorEvaluator<Arg2Type, Device> m_arg2Impl;
799 TensorEvaluator<Arg3Type, Device> m_arg3Impl;
805template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
806struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
808 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
809 typedef typename XprType::Scalar Scalar;
812 IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
813 TensorEvaluator<ElseArgType, Device>::IsAligned,
814 PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
815 TensorEvaluator<ElseArgType, Device>::PacketAccess &
816 PacketType<Scalar, Device>::HasBlend,
817 BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
818 TensorEvaluator<ThenArgType, Device>::BlockAccess &&
819 TensorEvaluator<ElseArgType, Device>::BlockAccess,
820 PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
821 TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
822 TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
823 Layout = TensorEvaluator<IfArgType, Device>::Layout,
828 TensorEvaluator(
const XprType& op,
const Device& device)
829 : m_condImpl(op.ifExpression(), device),
830 m_thenImpl(op.thenExpression(), device),
831 m_elseImpl(op.elseExpression(), device)
833 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
834 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
835 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
836 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
839 typedef typename XprType::Index Index;
840 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
841 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
842 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
843 typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
844 typedef StorageMemory<CoeffReturnType, Device> Storage;
845 typedef typename Storage::Type EvaluatorPointerType;
847 static const int NumDims = internal::array_size<Dimensions>::value;
850 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
851 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
853 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
855 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
857 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
860 struct TensorSelectOpBlockFactory {
861 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
863 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
866 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
867 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
868 const IfArgXprType& if_expr,
const ThenArgXprType& then_expr,
const ElseArgXprType& else_expr)
const {
869 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
873 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
874 IfArgTensorBlock, ThenArgTensorBlock,
879 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
882 return m_condImpl.dimensions();
885 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
886 m_condImpl.evalSubExprsIfNeeded(NULL);
887 m_thenImpl.evalSubExprsIfNeeded(NULL);
888 m_elseImpl.evalSubExprsIfNeeded(NULL);
892#ifdef EIGEN_USE_THREADS
893 template <
typename EvalSubExprsCallback>
894 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
895 EvaluatorPointerType, EvalSubExprsCallback done) {
896 m_condImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
897 m_thenImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
898 m_elseImpl.evalSubExprsIfNeeded(
nullptr, [done](
bool) { done(
true); });
904 EIGEN_STRONG_INLINE
void cleanup() {
905 m_condImpl.cleanup();
906 m_thenImpl.cleanup();
907 m_elseImpl.cleanup();
910 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
912 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
914 template<
int LoadMode>
915 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const
917 internal::Selector<PacketSize> select;
919 for (Index i = 0; i < PacketSize; ++i) {
920 select.select[i] = m_condImpl.coeff(index+i);
922 return internal::pblend(select,
923 m_thenImpl.template packet<LoadMode>(index),
924 m_elseImpl.template packet<LoadMode>(index));
928 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
929 costPerCoeff(
bool vectorized)
const {
930 return m_condImpl.costPerCoeff(vectorized) +
931 m_thenImpl.costPerCoeff(vectorized)
932 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
935 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
936 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
937 auto then_req = m_thenImpl.getResourceRequirements();
938 auto else_req = m_elseImpl.getResourceRequirements();
941 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
942 merged_req.cost_per_coeff =
943 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
945 return internal::TensorBlockResourceRequirements::merge(
946 m_condImpl.getResourceRequirements(), merged_req);
949 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
950 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
951 bool =
false)
const {
954 desc.DropDestinationBuffer();
957 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
958 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
961 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
965 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
966 m_condImpl.bind(cgh);
967 m_thenImpl.bind(cgh);
968 m_elseImpl.bind(cgh);
972 TensorEvaluator<IfArgType, Device> m_condImpl;
973 TensorEvaluator<ThenArgType, Device> m_thenImpl;
974 TensorEvaluator<ElseArgType, Device> m_elseImpl;
Tensor binary expression.
Definition TensorExpr.h:198
Tensor nullary expression.
Definition TensorExpr.h:43
Tensor unary expression.
Definition TensorExpr.h:111
Namespace containing all symbols from the Eigen library.
The tensor evaluator class.
Definition TensorEvaluator.h:27