Eigen-unsupported  3.4.1 (git rev 28ded8800c26864e537852658428ab44c8399e87)
 
Loading...
Searching...
No Matches
TensorEvaluator.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
12
13namespace Eigen {
14
15// Generic evaluator
26template <typename Derived, typename Device>
27struct TensorEvaluator {
28 typedef typename Derived::Index Index;
29 typedef typename Derived::Scalar Scalar;
30 typedef typename Derived::Scalar CoeffReturnType;
31 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
32 typedef typename Derived::Dimensions Dimensions;
33 typedef Derived XprType;
34 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
35 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
36 typedef StorageMemory<Scalar, Device> Storage;
37 typedef typename Storage::Type EvaluatorPointerType;
38
39 // NumDimensions is -1 for variable dim tensors
40 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
41 internal::traits<Derived>::NumDimensions : 0;
42
43 enum {
44 IsAligned = Derived::IsAligned,
45 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
46 BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
47 PreferBlockAccess = false,
48 Layout = Derived::Layout,
49 CoordAccess = NumCoords > 0,
50 RawAccess = true
51 };
52
53 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
54
55 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
56 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
57 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
58
59 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
60 Layout, Index>
61 TensorBlock;
62 //===--------------------------------------------------------------------===//
63
64 EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
65 : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
66 m_dims(m.dimensions()),
67 m_device(device)
68 { }
69
70
71 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
72
73 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
74 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
75 m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
76 return false;
77 }
78 return true;
79 }
80
81#ifdef EIGEN_USE_THREADS
82 template <typename EvalSubExprsCallback>
83 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
84 EvaluatorPointerType dest, EvalSubExprsCallback done) {
85 // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
86 done(evalSubExprsIfNeeded(dest));
87 }
88#endif // EIGEN_USE_THREADS
89
90 EIGEN_STRONG_INLINE void cleanup() {}
91
92 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
93 eigen_assert(m_data != NULL);
94 return m_data[index];
95 }
96
97 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
98 eigen_assert(m_data != NULL);
99 return m_data[index];
100 }
101
102 template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
103 PacketReturnType packet(Index index) const
104 {
105 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
106 }
107
108 // Return a packet starting at `index` where `umask` specifies which elements
109 // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
110 // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
111 // float element will be loaded, otherwise 0 will be loaded.
112 // Function has been templatized to enable Sfinae.
113 template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
114 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
115 partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
116 {
117 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
118 }
119
120 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
121 void writePacket(Index index, const PacketReturnType& x)
122 {
123 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
124 }
125
126 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
127 eigen_assert(m_data != NULL);
128 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
129 return m_data[m_dims.IndexOfColMajor(coords)];
130 } else {
131 return m_data[m_dims.IndexOfRowMajor(coords)];
132 }
133 }
134
135 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
136 coeffRef(const array<DenseIndex, NumCoords>& coords) {
137 eigen_assert(m_data != NULL);
138 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
139 return m_data[m_dims.IndexOfColMajor(coords)];
140 } else {
141 return m_data[m_dims.IndexOfRowMajor(coords)];
142 }
143 }
144
145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
146 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
147 PacketType<CoeffReturnType, Device>::size);
148 }
149
150 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
151 internal::TensorBlockResourceRequirements getResourceRequirements() const {
152 return internal::TensorBlockResourceRequirements::any();
153 }
154
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
156 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
157 bool /*root_of_expr_ast*/ = false) const {
158 assert(m_data != NULL);
159 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
160 }
161
162 template<typename TensorBlock>
163 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
164 const TensorBlockDesc& desc, const TensorBlock& block) {
165 assert(m_data != NULL);
166
167 typedef typename TensorBlock::XprType TensorBlockExpr;
168 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
169 Index>
170 TensorBlockAssign;
171
172 TensorBlockAssign::Run(
173 TensorBlockAssign::target(desc.dimensions(),
174 internal::strides<Layout>(m_dims), m_data,
175 desc.offset()),
176 block.expr());
177 }
178
179 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
180
181#ifdef EIGEN_USE_SYCL
182 // binding placeholder accessors to a command group handler for SYCL
183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
184 m_data.bind(cgh);
185 }
186#endif
187 protected:
188 EvaluatorPointerType m_data;
189 Dimensions m_dims;
190 const Device EIGEN_DEVICE_REF m_device;
191};
192
193namespace internal {
194template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
195T loadConstant(const T* address) {
196 return *address;
197}
198// Use the texture cache on CUDA devices whenever possible
199#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
200template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
201float loadConstant(const float* address) {
202 return __ldg(address);
203}
204template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
205double loadConstant(const double* address) {
206 return __ldg(address);
207}
208template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
209Eigen::half loadConstant(const Eigen::half* address) {
210 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
211}
212#endif
213#ifdef EIGEN_USE_SYCL
214// overload of load constant should be implemented here based on range access
215template <cl::sycl::access::mode AcMd, typename T>
216T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
217 return *address;
218}
219#endif
220} // namespace internal
221
222// Default evaluator for rvalues
223template<typename Derived, typename Device>
224struct TensorEvaluator<const Derived, Device>
225{
226 typedef typename Derived::Index Index;
227 typedef typename Derived::Scalar Scalar;
228 typedef typename Derived::Scalar CoeffReturnType;
229 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
230 typedef typename Derived::Dimensions Dimensions;
231 typedef const Derived XprType;
232 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
233 typedef StorageMemory<const Scalar, Device> Storage;
234 typedef typename Storage::Type EvaluatorPointerType;
235
236 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
237
238 // NumDimensions is -1 for variable dim tensors
239 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
240 internal::traits<Derived>::NumDimensions : 0;
241 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
242
243 enum {
244 IsAligned = Derived::IsAligned,
245 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
246 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
247 PreferBlockAccess = false,
248 Layout = Derived::Layout,
249 CoordAccess = NumCoords > 0,
250 RawAccess = true
251 };
252
253 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
254 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
255 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
256
257 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
258 Layout, Index>
259 TensorBlock;
260 //===--------------------------------------------------------------------===//
261
262 EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
263 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
264 { }
265
266 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
267
268 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
269 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
270 m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
271 return false;
272 }
273 return true;
274 }
275
276#ifdef EIGEN_USE_THREADS
277 template <typename EvalSubExprsCallback>
278 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
279 EvaluatorPointerType dest, EvalSubExprsCallback done) {
280 // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
281 done(evalSubExprsIfNeeded(dest));
282 }
283#endif // EIGEN_USE_THREADS
284
285 EIGEN_STRONG_INLINE void cleanup() { }
286
287 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
288 eigen_assert(m_data != NULL);
289 return internal::loadConstant(m_data+index);
290 }
291
292 template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
293 PacketReturnType packet(Index index) const
294 {
295 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
296 }
297
298 // Return a packet starting at `index` where `umask` specifies which elements
299 // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
300 // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
301 // float element will be loaded, otherwise 0 will be loaded.
302 // Function has been templatized to enable Sfinae.
303 template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
304 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
305 partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
306 {
307 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
308 }
309
310 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
311 eigen_assert(m_data != NULL);
312 const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
313 : m_dims.IndexOfRowMajor(coords);
314 return internal::loadConstant(m_data+index);
315 }
316
317 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
318 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
319 PacketType<CoeffReturnType, Device>::size);
320 }
321
322 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
323 internal::TensorBlockResourceRequirements getResourceRequirements() const {
324 return internal::TensorBlockResourceRequirements::any();
325 }
326
327 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
328 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
329 bool /*root_of_expr_ast*/ = false) const {
330 assert(m_data != NULL);
331 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
332 }
333
334 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
335#ifdef EIGEN_USE_SYCL
336 // binding placeholder accessors to a command group handler for SYCL
337 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
338 m_data.bind(cgh);
339 }
340#endif
341 protected:
342 EvaluatorPointerType m_data;
343 Dimensions m_dims;
344 const Device EIGEN_DEVICE_REF m_device;
345};
346
347
348
349
350// -------------------- CwiseNullaryOp --------------------
351
352template<typename NullaryOp, typename ArgType, typename Device>
353struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
354{
355 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
356
357 TensorEvaluator(const XprType& op, const Device& device)
358 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
359 { }
360
361 typedef typename XprType::Index Index;
362 typedef typename XprType::Scalar Scalar;
363 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
364 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
365 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
366 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
367 typedef StorageMemory<CoeffReturnType, Device> Storage;
368 typedef typename Storage::Type EvaluatorPointerType;
369
370 enum {
371 IsAligned = true,
372 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
373 #ifdef EIGEN_USE_SYCL
374 && (PacketType<CoeffReturnType, Device>::size >1)
375 #endif
376 ,
377 BlockAccess = false,
378 PreferBlockAccess = false,
379 Layout = TensorEvaluator<ArgType, Device>::Layout,
380 CoordAccess = false, // to be implemented
381 RawAccess = false
382 };
383
384 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
385 typedef internal::TensorBlockNotImplemented TensorBlock;
386 //===--------------------------------------------------------------------===//
387
388 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
389
390 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
391
392#ifdef EIGEN_USE_THREADS
393 template <typename EvalSubExprsCallback>
394 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
395 EvaluatorPointerType, EvalSubExprsCallback done) {
396 done(true);
397 }
398#endif // EIGEN_USE_THREADS
399
400 EIGEN_STRONG_INLINE void cleanup() { }
401
402 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
403 {
404 return m_wrapper(m_functor, index);
405 }
406
407 template<int LoadMode>
408 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
409 {
410 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
411 }
412
413 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
414 costPerCoeff(bool vectorized) const {
415 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
416 PacketType<CoeffReturnType, Device>::size);
417 }
418
419 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
420
421#ifdef EIGEN_USE_SYCL
422 // binding placeholder accessors to a command group handler for SYCL
423 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
424 m_argImpl.bind(cgh);
425 }
426#endif
427
428 private:
429 const NullaryOp m_functor;
430 TensorEvaluator<ArgType, Device> m_argImpl;
431 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
432};
433
434
435
436// -------------------- CwiseUnaryOp --------------------
437
438template<typename UnaryOp, typename ArgType, typename Device>
439struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
440{
441 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
442
443 enum {
444 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
445 PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
446 int(internal::functor_traits<UnaryOp>::PacketAccess),
447 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
448 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
449 Layout = TensorEvaluator<ArgType, Device>::Layout,
450 CoordAccess = false, // to be implemented
451 RawAccess = false
452 };
453
454 TensorEvaluator(const XprType& op, const Device& device)
455 : m_device(device),
456 m_functor(op.functor()),
457 m_argImpl(op.nestedExpression(), device)
458 { }
459
460 typedef typename XprType::Index Index;
461 typedef typename XprType::Scalar Scalar;
462 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
463 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
464 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
465 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
466 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
467 typedef StorageMemory<CoeffReturnType, Device> Storage;
468 typedef typename Storage::Type EvaluatorPointerType;
469 static const int NumDims = internal::array_size<Dimensions>::value;
470
471 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
472 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
473 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
474
475 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
476 ArgTensorBlock;
477
478 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
479 TensorBlock;
480 //===--------------------------------------------------------------------===//
481
482 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
483
484 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
485 m_argImpl.evalSubExprsIfNeeded(NULL);
486 return true;
487 }
488
489#ifdef EIGEN_USE_THREADS
490 template <typename EvalSubExprsCallback>
491 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
492 EvaluatorPointerType, EvalSubExprsCallback done) {
493 m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
494 }
495#endif // EIGEN_USE_THREADS
496
497 EIGEN_STRONG_INLINE void cleanup() {
498 m_argImpl.cleanup();
499 }
500
501 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
502 {
503 return m_functor(m_argImpl.coeff(index));
504 }
505
506 template<int LoadMode>
507 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
508 {
509 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
510 }
511
512 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
513 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
514 return m_argImpl.costPerCoeff(vectorized) +
515 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
516 }
517
518 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
519 internal::TensorBlockResourceRequirements getResourceRequirements() const {
520 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
521 return m_argImpl.getResourceRequirements().addCostPerCoeff(
522 {0, 0, functor_cost / PacketSize});
523 }
524
525 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
526 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
527 bool /*root_of_expr_ast*/ = false) const {
528 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
529 }
530
531 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
532
533#ifdef EIGEN_USE_SYCL
534 // binding placeholder accessors to a command group handler for SYCL
535 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
536 m_argImpl.bind(cgh);
537 }
538#endif
539
540
541 private:
542 const Device EIGEN_DEVICE_REF m_device;
543 const UnaryOp m_functor;
544 TensorEvaluator<ArgType, Device> m_argImpl;
545};
546
547
548// -------------------- CwiseBinaryOp --------------------
549
550template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
551struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
552{
553 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
554
555 enum {
556 IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
557 int(TensorEvaluator<RightArgType, Device>::IsAligned),
558 PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
559 int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
560 int(internal::functor_traits<BinaryOp>::PacketAccess),
561 BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
562 int(TensorEvaluator<RightArgType, Device>::BlockAccess),
563 PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
564 int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
565 Layout = TensorEvaluator<LeftArgType, Device>::Layout,
566 CoordAccess = false, // to be implemented
567 RawAccess = false
568 };
569
570 TensorEvaluator(const XprType& op, const Device& device)
571 : m_device(device),
572 m_functor(op.functor()),
573 m_leftImpl(op.lhsExpression(), device),
574 m_rightImpl(op.rhsExpression(), device)
575 {
576 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
577 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
578 }
579
580 typedef typename XprType::Index Index;
581 typedef typename XprType::Scalar Scalar;
582 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
583 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
584 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
585 typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
586 typedef StorageMemory<CoeffReturnType, Device> Storage;
587 typedef typename Storage::Type EvaluatorPointerType;
588
589 static const int NumDims = internal::array_size<
590 typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
591
592 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
593 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
594 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
595
596 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
597 LeftTensorBlock;
598 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
599 RightTensorBlock;
600
601 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
602 RightTensorBlock>
603 TensorBlock;
604 //===--------------------------------------------------------------------===//
605
606 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
607 {
608 // TODO: use right impl instead if right impl dimensions are known at compile time.
609 return m_leftImpl.dimensions();
610 }
611
612 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
613 m_leftImpl.evalSubExprsIfNeeded(NULL);
614 m_rightImpl.evalSubExprsIfNeeded(NULL);
615 return true;
616 }
617
618#ifdef EIGEN_USE_THREADS
619 template <typename EvalSubExprsCallback>
620 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
621 EvaluatorPointerType, EvalSubExprsCallback done) {
622 // TODO(ezhulenev): Evaluate two expression in parallel?
623 m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
624 m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
625 [done](bool) { done(true); });
626 });
627 }
628#endif // EIGEN_USE_THREADS
629
630 EIGEN_STRONG_INLINE void cleanup() {
631 m_leftImpl.cleanup();
632 m_rightImpl.cleanup();
633 }
634
635 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
636 {
637 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
638 }
639 template<int LoadMode>
640 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
641 {
642 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
643 }
644
645 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
646 costPerCoeff(bool vectorized) const {
647 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
648 return m_leftImpl.costPerCoeff(vectorized) +
649 m_rightImpl.costPerCoeff(vectorized) +
650 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
651 }
652
653 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
654 internal::TensorBlockResourceRequirements getResourceRequirements() const {
655 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
656 return internal::TensorBlockResourceRequirements::merge(
657 m_leftImpl.getResourceRequirements(),
658 m_rightImpl.getResourceRequirements())
659 .addCostPerCoeff({0, 0, functor_cost / PacketSize});
660 }
661
662 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
663 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
664 bool /*root_of_expr_ast*/ = false) const {
665 desc.DropDestinationBuffer();
666 return TensorBlock(m_leftImpl.block(desc, scratch),
667 m_rightImpl.block(desc, scratch), m_functor);
668 }
669
670 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
671
672 #ifdef EIGEN_USE_SYCL
673 // binding placeholder accessors to a command group handler for SYCL
674 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
675 m_leftImpl.bind(cgh);
676 m_rightImpl.bind(cgh);
677 }
678 #endif
679 private:
680 const Device EIGEN_DEVICE_REF m_device;
681 const BinaryOp m_functor;
682 TensorEvaluator<LeftArgType, Device> m_leftImpl;
683 TensorEvaluator<RightArgType, Device> m_rightImpl;
684};
685
686// -------------------- CwiseTernaryOp --------------------
687
688template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
689struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
690{
691 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
692
693 enum {
694 IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
695 PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
696 TensorEvaluator<Arg2Type, Device>::PacketAccess &&
697 TensorEvaluator<Arg3Type, Device>::PacketAccess &&
698 internal::functor_traits<TernaryOp>::PacketAccess,
699 BlockAccess = false,
700 PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
701 TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
702 TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
703 Layout = TensorEvaluator<Arg1Type, Device>::Layout,
704 CoordAccess = false, // to be implemented
705 RawAccess = false
706 };
707
708 TensorEvaluator(const XprType& op, const Device& device)
709 : m_functor(op.functor()),
710 m_arg1Impl(op.arg1Expression(), device),
711 m_arg2Impl(op.arg2Expression(), device),
712 m_arg3Impl(op.arg3Expression(), device)
713 {
714 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
715
716 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
717 typename internal::traits<Arg2Type>::StorageKind>::value),
718 STORAGE_KIND_MUST_MATCH)
719 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
720 typename internal::traits<Arg3Type>::StorageKind>::value),
721 STORAGE_KIND_MUST_MATCH)
722 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
723 typename internal::traits<Arg2Type>::Index>::value),
724 STORAGE_INDEX_MUST_MATCH)
725 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
726 typename internal::traits<Arg3Type>::Index>::value),
727 STORAGE_INDEX_MUST_MATCH)
728
729 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
730 }
731
732 typedef typename XprType::Index Index;
733 typedef typename XprType::Scalar Scalar;
734 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
735 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
736 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
737 typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
738 typedef StorageMemory<CoeffReturnType, Device> Storage;
739 typedef typename Storage::Type EvaluatorPointerType;
740
741 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
742 typedef internal::TensorBlockNotImplemented TensorBlock;
743 //===--------------------------------------------------------------------===//
744
745 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
746 {
747 // TODO: use arg2 or arg3 dimensions if they are known at compile time.
748 return m_arg1Impl.dimensions();
749 }
750
751 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
752 m_arg1Impl.evalSubExprsIfNeeded(NULL);
753 m_arg2Impl.evalSubExprsIfNeeded(NULL);
754 m_arg3Impl.evalSubExprsIfNeeded(NULL);
755 return true;
756 }
757 EIGEN_STRONG_INLINE void cleanup() {
758 m_arg1Impl.cleanup();
759 m_arg2Impl.cleanup();
760 m_arg3Impl.cleanup();
761 }
762
763 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
764 {
765 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
766 }
767 template<int LoadMode>
768 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
769 {
770 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
771 m_arg2Impl.template packet<LoadMode>(index),
772 m_arg3Impl.template packet<LoadMode>(index));
773 }
774
775 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
776 costPerCoeff(bool vectorized) const {
777 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
778 return m_arg1Impl.costPerCoeff(vectorized) +
779 m_arg2Impl.costPerCoeff(vectorized) +
780 m_arg3Impl.costPerCoeff(vectorized) +
781 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
782 }
783
784 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
785
786#ifdef EIGEN_USE_SYCL
787 // binding placeholder accessors to a command group handler for SYCL
788 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
789 m_arg1Impl.bind(cgh);
790 m_arg2Impl.bind(cgh);
791 m_arg3Impl.bind(cgh);
792 }
793#endif
794
795 private:
796 const TernaryOp m_functor;
797 TensorEvaluator<Arg1Type, Device> m_arg1Impl;
798 TensorEvaluator<Arg2Type, Device> m_arg2Impl;
799 TensorEvaluator<Arg3Type, Device> m_arg3Impl;
800};
801
802
803// -------------------- SelectOp --------------------
804
805template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
806struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
807{
808 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
809 typedef typename XprType::Scalar Scalar;
810
811 enum {
812 IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
813 TensorEvaluator<ElseArgType, Device>::IsAligned,
814 PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
815 TensorEvaluator<ElseArgType, Device>::PacketAccess &
816 PacketType<Scalar, Device>::HasBlend,
817 BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
818 TensorEvaluator<ThenArgType, Device>::BlockAccess &&
819 TensorEvaluator<ElseArgType, Device>::BlockAccess,
820 PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
821 TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
822 TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
823 Layout = TensorEvaluator<IfArgType, Device>::Layout,
824 CoordAccess = false, // to be implemented
825 RawAccess = false
826 };
827
828 TensorEvaluator(const XprType& op, const Device& device)
829 : m_condImpl(op.ifExpression(), device),
830 m_thenImpl(op.thenExpression(), device),
831 m_elseImpl(op.elseExpression(), device)
832 {
833 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
834 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
835 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
836 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
837 }
838
839 typedef typename XprType::Index Index;
840 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
841 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
842 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
843 typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
844 typedef StorageMemory<CoeffReturnType, Device> Storage;
845 typedef typename Storage::Type EvaluatorPointerType;
846
847 static const int NumDims = internal::array_size<Dimensions>::value;
848
849 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
850 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
851 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
852
853 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
854 IfArgTensorBlock;
855 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
856 ThenArgTensorBlock;
857 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
858 ElseArgTensorBlock;
859
860 struct TensorSelectOpBlockFactory {
861 template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
862 struct XprType {
863 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
864 };
865
866 template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
867 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
868 const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
869 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
870 }
871 };
872
873 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
874 IfArgTensorBlock, ThenArgTensorBlock,
875 ElseArgTensorBlock>
876 TensorBlock;
877 //===--------------------------------------------------------------------===//
878
879 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
880 {
881 // TODO: use then or else impl instead if they happen to be known at compile time.
882 return m_condImpl.dimensions();
883 }
884
885 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
886 m_condImpl.evalSubExprsIfNeeded(NULL);
887 m_thenImpl.evalSubExprsIfNeeded(NULL);
888 m_elseImpl.evalSubExprsIfNeeded(NULL);
889 return true;
890 }
891
892#ifdef EIGEN_USE_THREADS
893 template <typename EvalSubExprsCallback>
894 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
895 EvaluatorPointerType, EvalSubExprsCallback done) {
896 m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
897 m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
898 m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
899 });
900 });
901 }
902#endif // EIGEN_USE_THREADS
903
904 EIGEN_STRONG_INLINE void cleanup() {
905 m_condImpl.cleanup();
906 m_thenImpl.cleanup();
907 m_elseImpl.cleanup();
908 }
909
910 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
911 {
912 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
913 }
914 template<int LoadMode>
915 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
916 {
917 internal::Selector<PacketSize> select;
918 EIGEN_UNROLL_LOOP
919 for (Index i = 0; i < PacketSize; ++i) {
920 select.select[i] = m_condImpl.coeff(index+i);
921 }
922 return internal::pblend(select,
923 m_thenImpl.template packet<LoadMode>(index),
924 m_elseImpl.template packet<LoadMode>(index));
925
926 }
927
928 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
929 costPerCoeff(bool vectorized) const {
930 return m_condImpl.costPerCoeff(vectorized) +
931 m_thenImpl.costPerCoeff(vectorized)
932 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
933 }
934
935 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
936 internal::TensorBlockResourceRequirements getResourceRequirements() const {
937 auto then_req = m_thenImpl.getResourceRequirements();
938 auto else_req = m_elseImpl.getResourceRequirements();
939
940 auto merged_req =
941 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
942 merged_req.cost_per_coeff =
943 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
944
945 return internal::TensorBlockResourceRequirements::merge(
946 m_condImpl.getResourceRequirements(), merged_req);
947 }
948
949 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
950 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
951 bool /*root_of_expr_ast*/ = false) const {
952 // It's unsafe to pass destination buffer to underlying expressions, because
953 // output might be aliased with one of the inputs.
954 desc.DropDestinationBuffer();
955
956 return TensorBlock(
957 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
958 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
959 }
960
961 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
962
963#ifdef EIGEN_USE_SYCL
964 // binding placeholder accessors to a command group handler for SYCL
965 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
966 m_condImpl.bind(cgh);
967 m_thenImpl.bind(cgh);
968 m_elseImpl.bind(cgh);
969 }
970#endif
971 private:
972 TensorEvaluator<IfArgType, Device> m_condImpl;
973 TensorEvaluator<ThenArgType, Device> m_thenImpl;
974 TensorEvaluator<ElseArgType, Device> m_elseImpl;
975};
976
977
978} // end namespace Eigen
979
980#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
Tensor binary expression.
Definition TensorExpr.h:198
Tensor nullary expression.
Definition TensorExpr.h:43
Tensor unary expression.
Definition TensorExpr.h:111
Namespace containing all symbols from the Eigen library.
The tensor evaluator class.
Definition TensorEvaluator.h:27