Eigen-unsupported  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
TensorConversion.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
12
13// IWYU pragma: private
14#include "./InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19template <typename TargetType, typename XprType>
20struct traits<TensorConversionOp<TargetType, XprType> > {
21 // Type promotion to handle the case where the types of the lhs and the rhs are different.
22 typedef TargetType Scalar;
23 typedef typename traits<XprType>::StorageKind StorageKind;
24 typedef typename traits<XprType>::Index Index;
25 typedef typename XprType::Nested Nested;
26 typedef std::remove_reference_t<Nested> Nested_;
27 static constexpr int NumDimensions = traits<XprType>::NumDimensions;
28 static constexpr int Layout = traits<XprType>::Layout;
29 enum { Flags = 0 };
30 typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
31};
32
33template <typename TargetType, typename XprType>
34struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> {
35 typedef const TensorConversionOp<TargetType, XprType>& type;
36};
37
38template <typename TargetType, typename XprType>
39struct nested<TensorConversionOp<TargetType, XprType>, 1,
40 typename eval<TensorConversionOp<TargetType, XprType> >::type> {
41 typedef TensorConversionOp<TargetType, XprType> type;
42};
43
44} // end namespace internal
45
46template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
47struct PacketConverter;
48
49template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
50struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
51 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
52
53 template <int LoadMode, typename Index>
54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
55 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
56 }
57
58 private:
59 const TensorEvaluator& m_impl;
60};
61
62template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
63struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
65
66 template <int LoadMode, typename Index>
67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
68 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
69
70 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
71 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
72 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
73 return result;
74 }
75
76 private:
77 const TensorEvaluator& m_impl;
78};
79
80template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
81struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
82 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
83
84 template <int LoadMode, typename Index>
85 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
86 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
87
88 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
89 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
90 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
91 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
92 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
93 return result;
94 }
95
96 private:
97 const TensorEvaluator& m_impl;
98};
99
100template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
101struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
102 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {}
103
104 template <int LoadMode, typename Index>
105 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
106 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
107
108 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
109 SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
110 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
111 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
112 SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
113 SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
114 SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
115 SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
116 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
117 return result;
118 }
119
120 private:
121 const TensorEvaluator& m_impl;
122};
123
124template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
125struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
126 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl)
127 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
128
129 template <int LoadMode, typename Index>
130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
131 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
132 // Only call m_impl.packet() when we have direct access to the underlying data. This
133 // ensures that we don't compute the subexpression twice. We may however load some
134 // coefficients twice, but in practice this doesn't negatively impact performance.
135 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
136 // Force unaligned memory loads since we can't ensure alignment anymore
137 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
138 } else {
139 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
140 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
141 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
142 internal::scalar_cast_op<SrcType, TgtType> converter;
143 EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
144 EIGEN_UNROLL_LOOP
145 for (int i = 0; i < TgtPacketSize; ++i) {
146 values[i] = converter(m_impl.coeff(index + i));
147 }
148 TgtPacket rslt = internal::pload<TgtPacket>(values);
149 return rslt;
150 }
151 }
152
153 private:
154 const TensorEvaluator& m_impl;
155 const typename TensorEvaluator::Index m_maxIndex;
156};
157
165template <typename TargetType, typename XprType>
166class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> {
167 public:
168 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
169 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
170 typedef typename internal::traits<TensorConversionOp>::Index Index;
171 typedef typename internal::nested<TensorConversionOp>::type Nested;
172 typedef Scalar CoeffReturnType;
173 typedef typename NumTraits<Scalar>::Real RealScalar;
174
175 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) : m_xpr(xpr) {}
176
177 EIGEN_DEVICE_FUNC const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
178
179 protected:
180 typename XprType::Nested m_xpr;
181};
182
183template <bool SameType, typename Eval, typename EvalPointerType>
184struct ConversionSubExprEval {
185 static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
186 impl.evalSubExprsIfNeeded(NULL);
187 return true;
188 }
189};
190
191template <typename Eval, typename EvalPointerType>
192struct ConversionSubExprEval<true, Eval, EvalPointerType> {
193 static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) { return impl.evalSubExprsIfNeeded(data); }
194};
195
196#ifdef EIGEN_USE_THREADS
197template <bool SameType, typename Eval, typename EvalPointerType, typename EvalSubExprsCallback>
198struct ConversionSubExprEvalAsync {
199 static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
200 impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
201 }
202};
203
204template <typename Eval, typename EvalPointerType, typename EvalSubExprsCallback>
205struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType, EvalSubExprsCallback> {
206 static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
207 impl.evalSubExprsIfNeededAsync(data, std::move(done));
208 }
209};
210#endif
211
212namespace internal {
213
214template <typename SrcType, typename TargetType, bool IsSameT>
215struct CoeffConv {
216 template <typename ArgType, typename Device>
217 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl,
218 Index index) {
219 internal::scalar_cast_op<SrcType, TargetType> converter;
220 return converter(impl.coeff(index));
221 }
222};
223
224template <typename SrcType, typename TargetType>
225struct CoeffConv<SrcType, TargetType, true> {
226 template <typename ArgType, typename Device>
227 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl,
228 Index index) {
229 return impl.coeff(index);
230 }
231};
232
233template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
234struct PacketConv {
235 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
236 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
237
238 static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
239
240 template <typename ArgType, typename Device>
241 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
242 Index index) {
243 internal::scalar_cast_op<SrcType, TargetType> converter;
244 EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
245 EIGEN_UNROLL_LOOP
246 for (int i = 0; i < PacketSize; ++i) {
247 values[i] = converter(impl.coeff(index + i));
248 }
249 TargetPacket rslt = internal::pload<TargetPacket>(values);
250 return rslt;
251 }
252};
253
254template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
255struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
256 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
257 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
258
259 template <typename ArgType, typename Device>
260 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
261 Index index) {
262 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
263 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
264 PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket, SrcCoeffRatio, TgtCoeffRatio> converter(
265 impl);
266 return converter.template packet<LoadMode>(index);
267 }
268};
269
270template <typename SrcPacket, typename TargetPacket, int LoadMode>
271struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
272 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
273 static constexpr int PacketSize = internal::unpacket_traits<TargetPacket>::size;
274
275 template <typename ArgType, typename Device>
276 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
277 Index index) {
278 EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
279 for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index + i);
280 return internal::pload<TargetPacket>(values);
281 }
282};
283
284template <typename SrcPacket, typename TargetPacket, int LoadMode>
285struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
286 template <typename ArgType, typename Device>
287 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl,
288 Index index) {
289 return impl.template packet<LoadMode>(index);
290 }
291};
292
293} // namespace internal
294
295// Eval as rvalue
296template <typename TargetType, typename ArgType, typename Device>
297struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> {
298 typedef TensorConversionOp<TargetType, ArgType> XprType;
299 typedef typename XprType::Index Index;
300 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
301 typedef TargetType Scalar;
302 typedef TargetType CoeffReturnType;
303 typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
304 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
305 typedef typename PacketType<SrcType, Device>::type PacketSourceType;
306 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
307 static constexpr bool IsSameType = internal::is_same<TargetType, SrcType>::value;
308 typedef StorageMemory<CoeffReturnType, Device> Storage;
309 typedef typename Storage::Type EvaluatorPointerType;
310
311 enum {
312 IsAligned = false,
313 PacketAccess =
314#ifndef EIGEN_USE_SYCL
315 true,
316#else
317 TensorEvaluator<ArgType, Device>::PacketAccess &
318 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
319#endif
320 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
321 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
322 RawAccess = false
323 };
324
325 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
326 static constexpr int NumDims = internal::array_size<Dimensions>::value;
327
328 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
329 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
330 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
331
332 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock ArgTensorBlock;
333
334 struct TensorConversionOpBlockFactory {
335 template <typename ArgXprType>
336 struct XprType {
337 typedef TensorConversionOp<TargetType, const ArgXprType> type;
338 };
339
340 template <typename ArgXprType>
341 typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const {
342 return typename XprType<ArgXprType>::type(expr);
343 }
344 };
345
346 typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory, ArgTensorBlock> TensorBlock;
347 //===--------------------------------------------------------------------===//
348
349 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) {}
350
351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
352
353 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
354 return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
355 }
356
357#ifdef EIGEN_USE_THREADS
358 template <typename EvalSubExprsCallback>
359 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType data, EvalSubExprsCallback done) {
360 ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType,
361 EvalSubExprsCallback>::run(m_impl, data, std::move(done));
362 }
363#endif
364
365 EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
366
367 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
368 return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl, index);
369 }
370
371 template <int LoadMode>
372 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
373 // If we are not going to do the cast, we just need to check that base
374 // TensorEvaluator has packet access. Otherwise we also need to make sure,
375 // that we have an implementation of vectorized cast.
376 const bool Vectorizable = IsSameType ? TensorEvaluator<ArgType, Device>::PacketAccess
377 : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
378 int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
379
380 return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode, Vectorizable, IsSameType>::run(m_impl,
381 index);
382 }
383
384 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
385 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
386 if (vectorized) {
387 const double SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
388 const double TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
389 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
390 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
391 } else {
392 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
393 }
394 }
395
396 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const {
397 return m_impl.getResourceRequirements();
398 }
399
400 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
401 bool /*root_of_expr_ast*/ = false) const {
402 return TensorBlock(m_impl.block(desc, scratch), TensorConversionOpBlockFactory());
403 }
404
405 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
406
408 const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
409
410 protected:
411 TensorEvaluator<ArgType, Device> m_impl;
412};
413
414} // end namespace Eigen
415
416#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition TensorConversion.h:166
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30