Eigen-unsupported  3.4.1 (git rev 28ded8800c26864e537852658428ab44c8399e87)
 
Loading...
Searching...
No Matches
TensorShuffling.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
12
13namespace Eigen {
14
15namespace internal {
16template<typename Shuffle, typename XprType>
17struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
18{
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = XprTraits::NumDimensions;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
28};
29
30template<typename Shuffle, typename XprType>
31struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense>
32{
33 typedef const TensorShufflingOp<Shuffle, XprType>& type;
34};
35
36template<typename Shuffle, typename XprType>
37struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
38{
39 typedef TensorShufflingOp<Shuffle, XprType> type;
40};
41
42} // end namespace internal
43
49template <typename Shuffle, typename XprType>
50class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> > {
51 public:
53 typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
54 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
55 typedef typename XprType::CoeffReturnType CoeffReturnType;
56 typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
57 typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
58 typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
59
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
61 : m_xpr(expr), m_shuffle(shfl) {}
62
63 EIGEN_DEVICE_FUNC
64 const Shuffle& shufflePermutation() const { return m_shuffle; }
65
66 EIGEN_DEVICE_FUNC
67 const typename internal::remove_all<typename XprType::Nested>::type&
68 expression() const { return m_xpr; }
69
70 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
71
72
73 protected:
74 typename XprType::Nested m_xpr;
75 const Shuffle m_shuffle;
76};
77
78
79// Eval as rvalue
80template<typename Shuffle, typename ArgType, typename Device>
81struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
82{
85 typedef typename XprType::Index Index;
86 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
87 typedef DSizes<Index, NumDims> Dimensions;
88 typedef typename XprType::Scalar Scalar;
89 typedef typename XprType::CoeffReturnType CoeffReturnType;
90 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
91 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
92 typedef StorageMemory<CoeffReturnType, Device> Storage;
93 typedef typename Storage::Type EvaluatorPointerType;
94
95 enum {
96 IsAligned = false,
97 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
99 PreferBlockAccess = true,
100 Layout = TensorEvaluator<ArgType, Device>::Layout,
101 CoordAccess = false, // to be implemented
102 RawAccess = false
103 };
104
105 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
106
107 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
108 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
109 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
110
111 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
112 Layout, Index>
113 TensorBlock;
114 //===--------------------------------------------------------------------===//
115
116 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
117 : m_device(device),
118 m_impl(op.expression(), device)
119 {
120 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
121 const Shuffle& shuffle = op.shufflePermutation();
122 m_is_identity = true;
123 for (int i = 0; i < NumDims; ++i) {
124 m_shuffle[i] = static_cast<int>(shuffle[i]);
125 m_dimensions[i] = input_dims[shuffle[i]];
126 m_inverseShuffle[shuffle[i]] = i;
127 if (m_is_identity && shuffle[i] != i) {
128 m_is_identity = false;
129 }
130 }
131
132 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
133 m_unshuffledInputStrides[0] = 1;
134 m_outputStrides[0] = 1;
135
136 for (int i = 1; i < NumDims; ++i) {
137 m_unshuffledInputStrides[i] =
138 m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
139 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
140 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
141 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
142 }
143 } else {
144 m_unshuffledInputStrides[NumDims - 1] = 1;
145 m_outputStrides[NumDims - 1] = 1;
146 for (int i = NumDims - 2; i >= 0; --i) {
147 m_unshuffledInputStrides[i] =
148 m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
149 m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
150 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
151 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
152 }
153 }
154
155 for (int i = 0; i < NumDims; ++i) {
156 m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
157 }
158 }
159
160 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
161
162 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
163 m_impl.evalSubExprsIfNeeded(NULL);
164 return true;
165 }
166
167#ifdef EIGEN_USE_THREADS
168 template <typename EvalSubExprsCallback>
169 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
170 EvaluatorPointerType, EvalSubExprsCallback done) {
171 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
172 }
173#endif // EIGEN_USE_THREADS
174
175 EIGEN_STRONG_INLINE void cleanup() {
176 m_impl.cleanup();
177 }
178
179 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
180 {
181 if (m_is_identity) {
182 return m_impl.coeff(index);
183 } else {
184 return m_impl.coeff(srcCoeff(index));
185 }
186 }
187
188 template <int LoadMode, typename Self, bool ImplPacketAccess>
189 struct PacketLoader {
190 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
191 static PacketReturnType Run(const Self& self, Index index) {
192 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
193 EIGEN_UNROLL_LOOP
194 for (int i = 0; i < PacketSize; ++i) {
195 values[i] = self.coeff(index + i);
196 }
197 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
198 return rslt;
199 }
200 };
201
202 template<int LoadMode, typename Self>
203 struct PacketLoader<LoadMode, Self, true> {
204 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
205 static PacketReturnType Run(const Self& self, Index index) {
206 if (self.m_is_identity) {
207 return self.m_impl.template packet<LoadMode>(index);
208 } else {
209 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
210 EIGEN_UNROLL_LOOP
211 for (int i = 0; i < PacketSize; ++i) {
212 values[i] = self.coeff(index + i);
213 }
214 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
215 return rslt;
216 }
217 }
218 };
219
220 template<int LoadMode>
221 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
222 {
223 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
224 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
225 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*this, index);
226 }
227
228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
229 internal::TensorBlockResourceRequirements getResourceRequirements() const {
230 static const int inner_dim =
231 Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
232
233 const size_t target_size = m_device.firstLevelCacheSize();
234 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
235
236 // Shuffled inner dimensions leads to a random memory access, which is not
237 // captured by default cost model bytes loaded/stored. We add this cost
238 // explicitly. The number of cycles picked based on the benchmarks.
239 // TODO(ezhulenev): This number was picked based on a very questionable
240 // benchmarks, add benchmarks that are representative of real workloads.
241 using BlockRequirements = internal::TensorBlockResourceRequirements;
242 if (inner_dim_shuffled) {
243 return BlockRequirements::uniform<Scalar>(target_size)
244 .addCostPerCoeff({0, 0, NumDims * 28});
245 } else {
246 return BlockRequirements::skewed<Scalar>(target_size);
247 }
248 }
249
250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
251 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
252 bool root_of_expr_ast = false) const {
253 assert(m_impl.data() != NULL);
254
255 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
256 TensorBlockIO;
257 typedef typename TensorBlockIO::Dst TensorBlockIODst;
258 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
259
260 const typename TensorBlock::Storage block_storage =
261 TensorBlock::prepareStorage(
262 desc, scratch, /*allow_strided_storage=*/root_of_expr_ast);
263
264 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
265 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.offset()));
266
267 TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(),
268 block_storage.data());
269
270 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
271 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
272
273 return block_storage.AsTensorMaterializedBlock();
274 }
275
276 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
277 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
278 NumDims * (2 * TensorOpCost::AddCost<Index>() +
279 2 * TensorOpCost::MulCost<Index>() +
280 TensorOpCost::DivCost<Index>());
281 return m_impl.costPerCoeff(vectorized) +
282 TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize);
283 }
284
285 EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
286
287#ifdef EIGEN_USE_SYCL
288 // binding placeholder accessors to a command group handler for SYCL
289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
290 m_impl.bind(cgh);
291 }
292#endif
293 protected:
294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
295 Index input_index,
296 const DSizes<Index, NumDims>& input_block_strides,
297 const DSizes<Index, NumDims>& output_block_strides,
298 const DSizes<internal::TensorIntDivisor<Index>, NumDims>& fast_input_block_strides) const {
299 Index output_index = 0;
300 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
301 for (int i = NumDims - 1; i > 0; --i) {
302 const Index idx = input_index / fast_input_block_strides[i];
303 output_index += idx * output_block_strides[m_inverseShuffle[i]];
304 input_index -= idx * input_block_strides[i];
305 }
306 return output_index + input_index *
307 output_block_strides[m_inverseShuffle[0]];
308 } else {
309 for (int i = 0; i < NumDims - 1; ++i) {
310 const Index idx = input_index / fast_input_block_strides[i];
311 output_index += idx * output_block_strides[m_inverseShuffle[i]];
312 input_index -= idx * input_block_strides[i];
313 }
314 return output_index + input_index *
315 output_block_strides[m_inverseShuffle[NumDims - 1]];
316 }
317 }
318
319 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
320 Index inputIndex = 0;
321 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
322 for (int i = NumDims - 1; i > 0; --i) {
323 const Index idx = index / m_fastOutputStrides[i];
324 inputIndex += idx * m_inputStrides[i];
325 index -= idx * m_outputStrides[i];
326 }
327 return inputIndex + index * m_inputStrides[0];
328 } else {
329 for (int i = 0; i < NumDims - 1; ++i) {
330 const Index idx = index / m_fastOutputStrides[i];
331 inputIndex += idx * m_inputStrides[i];
332 index -= idx * m_outputStrides[i];
333 }
334 return inputIndex + index * m_inputStrides[NumDims - 1];
335 }
336 }
337
338 Dimensions m_dimensions;
339 bool m_is_identity;
340 array<int, NumDims> m_shuffle;
341 array<Index, NumDims> m_inverseShuffle; // TODO(ezhulenev): Make it int type.
342 array<Index, NumDims> m_outputStrides;
343 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
344 array<Index, NumDims> m_inputStrides;
345 array<Index, NumDims> m_unshuffledInputStrides;
346
347 const Device EIGEN_DEVICE_REF m_device;
348 TensorEvaluator<ArgType, Device> m_impl;
349};
350
351
352// Eval as lvalue
353template<typename Shuffle, typename ArgType, typename Device>
354struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
355 : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
356{
357 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
358
359 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
360 typedef typename XprType::Index Index;
361 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
362 typedef DSizes<Index, NumDims> Dimensions;
363 typedef typename XprType::Scalar Scalar;
364 typedef typename XprType::CoeffReturnType CoeffReturnType;
365 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
366 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
367
368 enum {
369 IsAligned = false,
370 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
371 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
372 PreferBlockAccess = true,
373 Layout = TensorEvaluator<ArgType, Device>::Layout,
374 RawAccess = false
375 };
376
377 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
378
379 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
380 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
381 //===--------------------------------------------------------------------===//
382
383 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
384 : Base(op, device)
385 { }
386
387 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
388 {
389 return this->m_impl.coeffRef(this->srcCoeff(index));
390 }
391
392 template <int StoreMode> EIGEN_STRONG_INLINE
393 void writePacket(Index index, const PacketReturnType& x)
394 {
395 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
396
397 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
398 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
399 EIGEN_UNROLL_LOOP
400 for (int i = 0; i < PacketSize; ++i) {
401 this->coeffRef(index+i) = values[i];
402 }
403 }
404
405 template <typename TensorBlock>
406 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
407 const TensorBlockDesc& desc, const TensorBlock& block) {
408 eigen_assert(this->m_impl.data() != NULL);
409
410 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
411 TensorBlockIO;
412 typedef typename TensorBlockIO::Dst TensorBlockIODst;
413 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
414
415 const Scalar* block_buffer = block.data();
416
417 // TODO(ezhulenev): TensorBlockIO should be able to read from any Eigen
418 // expression with coefficient and packet access as `src`.
419 void* mem = NULL;
420 if (block_buffer == NULL) {
421 mem = this->m_device.allocate(desc.size() * sizeof(Scalar));
422 ScalarNoConst* buf = static_cast<ScalarNoConst*>(mem);
423
424 typedef internal::TensorBlockAssignment<
425 ScalarNoConst, NumDims, typename TensorBlock::XprType, Index>
426 TensorBlockAssignment;
427
428 TensorBlockAssignment::Run(
429 TensorBlockAssignment::target(
430 desc.dimensions(), internal::strides<Layout>(desc.dimensions()),
431 buf),
432 block.expr());
433
434 block_buffer = buf;
435 }
436
437 // Read from block.
438 TensorBlockIOSrc src(internal::strides<Layout>(desc.dimensions()),
439 block_buffer);
440
441 // Write to the output buffer.
442 typename TensorBlockIO::Dimensions output_strides(
443 this->m_unshuffledInputStrides);
444 typename TensorBlockIO::Dimensions output_dimensions;
445 for (int i = 0; i < NumDims; ++i) {
446 output_dimensions[this->m_shuffle[i]] = desc.dimension(i);
447 }
448 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
449 this->srcCoeff(desc.offset()));
450
451 // Reorder dimensions according to the shuffle.
452 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
453 for (int i = 0; i < NumDims; ++i) {
454 dst_to_src_dim_map[i] = static_cast<int>(this->m_inverseShuffle[i]);
455 }
456 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
457
458 // Deallocate temporary buffer used for the block materialization.
459 if (mem != NULL) this->m_device.deallocate(mem);
460 }
461};
462
463
464} // end namespace Eigen
465
466#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
The tensor base class.
Definition TensorForwardDeclarations.h:56
Tensor shuffling class.
Definition TensorShuffling.h:50
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27