Loading...
Searching...
No Matches
TensorConversion.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
12
13namespace Eigen {
14
15namespace internal {
16template<typename TargetType, typename XprType>
17struct traits<TensorConversionOp<TargetType, XprType> >
18{
19 // Type promotion to handle the case where the types of the lhs and the rhs are different.
20 typedef TargetType Scalar;
21 typedef typename traits<XprType>::StorageKind StorageKind;
22 typedef typename traits<XprType>::Index Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = traits<XprType>::NumDimensions;
26 static const int Layout = traits<XprType>::Layout;
27 enum { Flags = 0 };
28};
29
30template<typename TargetType, typename XprType>
31struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
32{
33 typedef const TensorConversionOp<TargetType, XprType>& type;
34};
35
36template<typename TargetType, typename XprType>
37struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
38{
39 typedef TensorConversionOp<TargetType, XprType> type;
40};
41
42} // end namespace internal
43
44
45template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
46struct PacketConverter {
47 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
48 PacketConverter(const TensorEvaluator& impl)
49 : m_impl(impl) {}
50
51 template<int LoadMode, typename Index>
52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
53 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
54 }
55
56 private:
57 const TensorEvaluator& m_impl;
58};
59
60
61template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
62struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
63 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
64 PacketConverter(const TensorEvaluator& impl)
65 : m_impl(impl) {}
66
67 template<int LoadMode, typename Index>
68 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
69 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
70
71 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
72 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
73 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
74 return result;
75 }
76
77 private:
78 const TensorEvaluator& m_impl;
79};
80
81template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
82struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
83 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
84 PacketConverter(const TensorEvaluator& impl)
85 : m_impl(impl) {}
86
87 template<int LoadMode, typename Index>
88 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
89 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
90
91 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
92 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
93 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
94 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
95 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
96 return result;
97 }
98
99 private:
100 const TensorEvaluator& m_impl;
101};
102
103template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
104struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
105 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
106 PacketConverter(const TensorEvaluator& impl)
107 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
108
109 template<int LoadMode, typename Index>
110 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
111 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
112 // Only call m_impl.packet() when we have direct access to the underlying data. This
113 // ensures that we don't compute the subexpression twice. We may however load some
114 // coefficients twice, but in practice this doesn't negatively impact performance.
115 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
116 // Force unaligned memory loads since we can't ensure alignment anymore
117 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
118 } else {
119 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
120 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
121 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
122 internal::scalar_cast_op<SrcType, TgtType> converter;
123 EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
124 for (int i = 0; i < TgtPacketSize; ++i) {
125 values[i] = converter(m_impl.coeff(index+i));
126 }
127 TgtPacket rslt = internal::pload<TgtPacket>(values);
128 return rslt;
129 }
130 }
131
132 private:
133 const TensorEvaluator& m_impl;
134 const typename TensorEvaluator::Index m_maxIndex;
135};
136
144template<typename TargetType, typename XprType>
145class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
146{
147 public:
148 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
149 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
150 typedef typename internal::traits<TensorConversionOp>::Index Index;
151 typedef typename internal::nested<TensorConversionOp>::type Nested;
152 typedef Scalar CoeffReturnType;
153 typedef typename NumTraits<Scalar>::Real RealScalar;
154
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
156 : m_xpr(xpr) {}
157
158 EIGEN_DEVICE_FUNC
159 const typename internal::remove_all<typename XprType::Nested>::type&
160 expression() const { return m_xpr; }
161
162 protected:
163 typename XprType::Nested m_xpr;
164};
165
166template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
167 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
168 impl.evalSubExprsIfNeeded(NULL);
169 return true;
170 }
171};
172
173template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
174 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
175 return impl.evalSubExprsIfNeeded(data);
176 }
177};
178
179
180// Eval as rvalue
181template<typename TargetType, typename ArgType, typename Device>
182struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
183{
184 typedef TensorConversionOp<TargetType, ArgType> XprType;
185 typedef typename XprType::Index Index;
186 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
187 typedef TargetType Scalar;
188 typedef TargetType CoeffReturnType;
189 typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
190 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
191 typedef typename PacketType<SrcType, Device>::type PacketSourceType;
192 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
193
194 enum {
195 IsAligned = false,
196 PacketAccess = true,
197 Layout = TensorEvaluator<ArgType, Device>::Layout,
198 RawAccess = false
199 };
200
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
202 : m_impl(op.expression(), device)
203 {
204 }
205
206 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
207
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
209 {
210 return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
211 }
212
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
214 {
215 m_impl.cleanup();
216 }
217
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
219 {
220 internal::scalar_cast_op<SrcType, TargetType> converter;
221 return converter(m_impl.coeff(index));
222 }
223
224 template<int LoadMode>
225 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
226 {
227 const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
228 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
229 return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
230 }
231
232 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
233 costPerCoeff(bool vectorized) const {
234 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
235 if (vectorized) {
236 const double SrcCoeffRatio =
237 internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
238 const double TgtCoeffRatio =
239 internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
240 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
241 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
242 } else {
243 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
244 }
245 }
246
247 EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
248
249 protected:
250 template <int LoadMode, bool ActuallyVectorize>
251 struct PacketConv {
252 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
253 internal::scalar_cast_op<SrcType, TargetType> converter;
254 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
255 for (int i = 0; i < PacketSize; ++i) {
256 values[i] = converter(impl.coeff(index+i));
257 }
258 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
259 return rslt;
260 }
261 };
262
263 template <int LoadMode>
264 struct PacketConv<LoadMode, true> {
265 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
266 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
267 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
268 PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
269 SrcCoeffRatio, TgtCoeffRatio> converter(impl);
270 return converter.template packet<LoadMode>(index);
271 }
272 };
273
274 TensorEvaluator<ArgType, Device> m_impl;
275};
276
277} // end namespace Eigen
278
279#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
The tensor base class.
Definition TensorForwardDeclarations.h:29
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition TensorConversion.h:146
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition TensorEvaluator.h:112