Loading...
Searching...
No Matches
TensorMorphing.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12
13namespace Eigen {
14
15namespace internal {
16template<typename NewDimensions, typename XprType>
17struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
18{
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = array_size<NewDimensions>::value;
26 static const int Layout = XprTraits::Layout;
27};
28
29template<typename NewDimensions, typename XprType>
30struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
31{
32 typedef const TensorReshapingOp<NewDimensions, XprType>& type;
33};
34
35template<typename NewDimensions, typename XprType>
36struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
37{
38 typedef TensorReshapingOp<NewDimensions, XprType> type;
39};
40
41} // end namespace internal
42
48template <typename NewDimensions, typename XprType>
49class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
50{
51 public:
53 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
54 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
55 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
56 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
57 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
58
59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
60 : m_xpr(expr), m_dims(dims) {}
61
62 EIGEN_DEVICE_FUNC
63 const NewDimensions& dimensions() const { return m_dims; }
64
65 EIGEN_DEVICE_FUNC
66 const typename internal::remove_all<typename XprType::Nested>::type&
67 expression() const { return m_xpr; }
68
69 EIGEN_DEVICE_FUNC
70 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
71 {
73 Assign assign(*this, other);
74 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
75 return *this;
76 }
77
78 template<typename OtherDerived>
79 EIGEN_DEVICE_FUNC
80 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
81 {
83 Assign assign(*this, other);
84 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
85 return *this;
86 }
87
88 protected:
89 typename XprType::Nested m_xpr;
90 const NewDimensions m_dims;
91};
92
93
94// Eval as rvalue
95template<typename NewDimensions, typename ArgType, typename Device>
96struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
97{
99 typedef NewDimensions Dimensions;
100
101 enum {
102 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
103 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
104 Layout = TensorEvaluator<ArgType, Device>::Layout,
105 CoordAccess = false, // to be implemented
107 };
108
109 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
110 : m_impl(op.expression(), device), m_dimensions(op.dimensions())
111 {
112 // The total size of the reshaped tensor must be equal to the total size
113 // of the input tensor.
114 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
115 }
116
117 typedef typename XprType::Index Index;
118 typedef typename XprType::Scalar Scalar;
119 typedef typename XprType::CoeffReturnType CoeffReturnType;
120 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
121
122 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
123
124 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
125 return m_impl.evalSubExprsIfNeeded(data);
126 }
127 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
128 m_impl.cleanup();
129 }
130
131 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
132 {
133 return m_impl.coeff(index);
134 }
135
136 template<int LoadMode>
137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
138 {
139 return m_impl.template packet<LoadMode>(index);
140 }
141
142 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
143 return m_impl.costPerCoeff(vectorized);
144 }
145
146 EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
147
148 EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
149
150 protected:
151 TensorEvaluator<ArgType, Device> m_impl;
152 NewDimensions m_dimensions;
153};
154
155
156// Eval as lvalue
157template<typename NewDimensions, typename ArgType, typename Device>
158 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
159 : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
160
161{
162 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
163 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
164 typedef NewDimensions Dimensions;
165
166 enum {
167 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
168 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
169 Layout = TensorEvaluator<ArgType, Device>::Layout,
170 CoordAccess = false, // to be implemented
171 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
172 };
173
174 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
175 : Base(op, device)
176 { }
177
178 typedef typename XprType::Index Index;
179 typedef typename XprType::Scalar Scalar;
180 typedef typename XprType::CoeffReturnType CoeffReturnType;
181 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
182
183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
184 {
185 return this->m_impl.coeffRef(index);
186 }
187 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
188 void writePacket(Index index, const PacketReturnType& x)
189 {
190 this->m_impl.template writePacket<StoreMode>(index, x);
191 }
192};
193
194
202namespace internal {
203template<typename StartIndices, typename Sizes, typename XprType>
204struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
205{
206 typedef typename XprType::Scalar Scalar;
207 typedef traits<XprType> XprTraits;
208 typedef typename XprTraits::StorageKind StorageKind;
209 typedef typename XprTraits::Index Index;
210 typedef typename XprType::Nested Nested;
211 typedef typename remove_reference<Nested>::type _Nested;
212 static const int NumDimensions = array_size<StartIndices>::value;
213 static const int Layout = XprTraits::Layout;
214};
215
216template<typename StartIndices, typename Sizes, typename XprType>
217struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
218{
219 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
220};
221
222template<typename StartIndices, typename Sizes, typename XprType>
223struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
224{
225 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
226};
227
228} // end namespace internal
229
230
231
232template<typename StartIndices, typename Sizes, typename XprType>
233class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
234{
235 public:
236 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
237 typedef typename XprType::CoeffReturnType CoeffReturnType;
238 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
239 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
240 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
241
242 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
243 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
244
245 EIGEN_DEVICE_FUNC
246 const StartIndices& startIndices() const { return m_indices; }
247 EIGEN_DEVICE_FUNC
248 const Sizes& sizes() const { return m_sizes; }
249
250 EIGEN_DEVICE_FUNC
251 const typename internal::remove_all<typename XprType::Nested>::type&
252 expression() const { return m_xpr; }
253
254 template<typename OtherDerived>
255 EIGEN_DEVICE_FUNC
256 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
257 {
258 typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
259 Assign assign(*this, other);
260 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
261 return *this;
262 }
263
264 EIGEN_DEVICE_FUNC
265 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
266 {
267 typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
268 Assign assign(*this, other);
269 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
270 return *this;
271 }
272
273
274 protected:
275 typename XprType::Nested m_xpr;
276 const StartIndices m_indices;
277 const Sizes m_sizes;
278};
279
280
281// Fixme: figure out the exact threshold
282namespace {
283template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
284 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
285 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
286
287 private:
288 Index threshold_;
289};
290
291// It is very expensive to start the memcpy kernel on GPU: we therefore only
292// use it for large copies.
293#ifdef EIGEN_USE_GPU
294template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
295 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
296 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
297};
298#endif
299}
300
301// Eval as rvalue
302template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
303struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
304{
305 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
306 static const int NumDims = internal::array_size<Sizes>::value;
307
308 enum {
309 // Alignment can't be guaranteed at compile time since it depends on the
310 // slice offsets and sizes.
311 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
312 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
313 Layout = TensorEvaluator<ArgType, Device>::Layout,
314 CoordAccess = false,
315 RawAccess = false
316 };
317
318 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
319 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
320 {
321 for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
322 eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
323 }
324
325 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
326 const Sizes& output_dims = op.sizes();
327 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
328 m_inputStrides[0] = 1;
329 for (int i = 1; i < NumDims; ++i) {
330 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
331 }
332
333 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
334 m_outputStrides[0] = 1;
335 for (int i = 1; i < NumDims; ++i) {
336 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
337 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
338 }
339 } else {
340 m_inputStrides[NumDims-1] = 1;
341 for (int i = NumDims - 2; i >= 0; --i) {
342 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
343 }
344
345 // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
346 m_outputStrides[NumDims-1] = 1;
347 for (int i = NumDims - 2; i >= 0; --i) {
348 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
349 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
350 }
351 }
352 }
353
354 typedef typename XprType::Index Index;
355 typedef typename XprType::Scalar Scalar;
356 typedef typename XprType::CoeffReturnType CoeffReturnType;
357 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
358 typedef Sizes Dimensions;
359
360 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
361
362
363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
364 m_impl.evalSubExprsIfNeeded(NULL);
365 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
366 Index contiguous_values = 1;
367 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
368 for (int i = 0; i < NumDims; ++i) {
369 contiguous_values *= dimensions()[i];
370 if (dimensions()[i] != m_impl.dimensions()[i]) {
371 break;
372 }
373 }
374 } else {
375 for (int i = NumDims-1; i >= 0; --i) {
376 contiguous_values *= dimensions()[i];
377 if (dimensions()[i] != m_impl.dimensions()[i]) {
378 break;
379 }
380 }
381 }
382 // Use memcpy if it's going to be faster than using the regular evaluation.
383 const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
384 if (trigger(contiguous_values)) {
385 Scalar* src = (Scalar*)m_impl.data();
386 for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
387 Index offset = srcCoeff(i);
388 m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
389 }
390 return false;
391 }
392 }
393 return true;
394 }
395
396 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
397 m_impl.cleanup();
398 }
399
400 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
401 {
402 return m_impl.coeff(srcCoeff(index));
403 }
404
405 template<int LoadMode>
406 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
407 {
408 const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
409 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
410 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
411
412 Index inputIndices[] = {0, 0};
413 Index indices[] = {index, index + packetSize - 1};
414 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
415 for (int i = NumDims - 1; i > 0; --i) {
416 const Index idx0 = indices[0] / m_fastOutputStrides[i];
417 const Index idx1 = indices[1] / m_fastOutputStrides[i];
418 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
419 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
420 indices[0] -= idx0 * m_outputStrides[i];
421 indices[1] -= idx1 * m_outputStrides[i];
422 }
423 inputIndices[0] += (indices[0] + m_offsets[0]);
424 inputIndices[1] += (indices[1] + m_offsets[0]);
425 } else {
426 for (int i = 0; i < NumDims - 1; ++i) {
427 const Index idx0 = indices[0] / m_fastOutputStrides[i];
428 const Index idx1 = indices[1] / m_fastOutputStrides[i];
429 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
430 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
431 indices[0] -= idx0 * m_outputStrides[i];
432 indices[1] -= idx1 * m_outputStrides[i];
433 }
434 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
435 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
436 }
437 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
438 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
439 return rslt;
440 }
441 else {
442 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
443 values[0] = m_impl.coeff(inputIndices[0]);
444 values[packetSize-1] = m_impl.coeff(inputIndices[1]);
445 for (int i = 1; i < packetSize-1; ++i) {
446 values[i] = coeff(index+i);
447 }
448 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
449 return rslt;
450 }
451 }
452
453 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
454 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
455 }
456
457
458 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
459 Scalar* result = m_impl.data();
460 if (result) {
461 Index offset = 0;
462 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
463 for (int i = 0; i < NumDims; ++i) {
464 if (m_dimensions[i] != m_impl.dimensions()[i]) {
465 offset += m_offsets[i] * m_inputStrides[i];
466 for (int j = i+1; j < NumDims; ++j) {
467 if (m_dimensions[j] > 1) {
468 return NULL;
469 }
470 offset += m_offsets[j] * m_inputStrides[j];
471 }
472 break;
473 }
474 }
475 } else {
476 for (int i = NumDims - 1; i >= 0; --i) {
477 if (m_dimensions[i] != m_impl.dimensions()[i]) {
478 offset += m_offsets[i] * m_inputStrides[i];
479 for (int j = i-1; j >= 0; --j) {
480 if (m_dimensions[j] > 1) {
481 return NULL;
482 }
483 offset += m_offsets[j] * m_inputStrides[j];
484 }
485 break;
486 }
487 }
488 }
489 return result + offset;
490 }
491 return NULL;
492 }
493
494 protected:
495 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
496 {
497 Index inputIndex = 0;
498 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
499 for (int i = NumDims - 1; i > 0; --i) {
500 const Index idx = index / m_fastOutputStrides[i];
501 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
502 index -= idx * m_outputStrides[i];
503 }
504 inputIndex += (index + m_offsets[0]);
505 } else {
506 for (int i = 0; i < NumDims - 1; ++i) {
507 const Index idx = index / m_fastOutputStrides[i];
508 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
509 index -= idx * m_outputStrides[i];
510 }
511 inputIndex += (index + m_offsets[NumDims-1]);
512 }
513 return inputIndex;
514 }
515
516 array<Index, NumDims> m_outputStrides;
517 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
518 array<Index, NumDims> m_inputStrides;
519 TensorEvaluator<ArgType, Device> m_impl;
520 const Device& m_device;
521 Dimensions m_dimensions;
522 const StartIndices m_offsets;
523};
524
525
526// Eval as lvalue
527template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
528struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
529 : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
530{
531 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
532 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
533 static const int NumDims = internal::array_size<Sizes>::value;
534
535 enum {
536 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
537 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
538 Layout = TensorEvaluator<ArgType, Device>::Layout,
539 CoordAccess = false,
540 RawAccess = false
541 };
542
543 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
544 : Base(op, device)
545 { }
546
547 typedef typename XprType::Index Index;
548 typedef typename XprType::Scalar Scalar;
549 typedef typename XprType::CoeffReturnType CoeffReturnType;
550 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
551 typedef Sizes Dimensions;
552
553 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
554 {
555 return this->m_impl.coeffRef(this->srcCoeff(index));
556 }
557
558 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
559 void writePacket(Index index, const PacketReturnType& x)
560 {
561 const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
562 Index inputIndices[] = {0, 0};
563 Index indices[] = {index, index + packetSize - 1};
564 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
565 for (int i = NumDims - 1; i > 0; --i) {
566 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
567 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
568 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
569 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
570 indices[0] -= idx0 * this->m_outputStrides[i];
571 indices[1] -= idx1 * this->m_outputStrides[i];
572 }
573 inputIndices[0] += (indices[0] + this->m_offsets[0]);
574 inputIndices[1] += (indices[1] + this->m_offsets[0]);
575 } else {
576 for (int i = 0; i < NumDims - 1; ++i) {
577 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
578 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
579 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
580 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
581 indices[0] -= idx0 * this->m_outputStrides[i];
582 indices[1] -= idx1 * this->m_outputStrides[i];
583 }
584 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
585 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
586 }
587 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
588 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
589 }
590 else {
591 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
592 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
593 this->m_impl.coeffRef(inputIndices[0]) = values[0];
594 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
595 for (int i = 1; i < packetSize-1; ++i) {
596 this->coeffRef(index+i) = values[i];
597 }
598 }
599 }
600};
601
602
603
604namespace internal {
605template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
606struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
607{
608 typedef typename XprType::Scalar Scalar;
609 typedef traits<XprType> XprTraits;
610 typedef typename XprTraits::StorageKind StorageKind;
611 typedef typename XprTraits::Index Index;
612 typedef typename XprType::Nested Nested;
613 typedef typename remove_reference<Nested>::type _Nested;
614 static const int NumDimensions = array_size<StartIndices>::value;
615 static const int Layout = XprTraits::Layout;
616};
617
618template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
619struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
620{
621 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
622};
623
624template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
625struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
626{
627 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
628};
629
630} // end namespace internal
631
632
633template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
634class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
635{
636 public:
637 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
638 typedef typename XprType::CoeffReturnType CoeffReturnType;
639 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
640 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
641 typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
642
643 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
644 const XprType& expr, const StartIndices& startIndices,
645 const StopIndices& stopIndices, const Strides& strides)
646 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
647 m_strides(strides) {}
648
649 EIGEN_DEVICE_FUNC
650 const StartIndices& startIndices() const { return m_startIndices; }
651 EIGEN_DEVICE_FUNC
652 const StartIndices& stopIndices() const { return m_stopIndices; }
653 EIGEN_DEVICE_FUNC
654 const StartIndices& strides() const { return m_strides; }
655
656 EIGEN_DEVICE_FUNC
657 const typename internal::remove_all<typename XprType::Nested>::type&
658 expression() const { return m_xpr; }
659
660 EIGEN_DEVICE_FUNC
661 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
662 {
663 typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
664 Assign assign(*this, other);
665 internal::TensorExecutor<const Assign, DefaultDevice>::run(
666 assign, DefaultDevice());
667 return *this;
668 }
669
670 template<typename OtherDerived>
671 EIGEN_DEVICE_FUNC
672 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
673 {
674 typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
675 Assign assign(*this, other);
676 internal::TensorExecutor<const Assign, DefaultDevice>::run(
677 assign, DefaultDevice());
678 return *this;
679 }
680
681 protected:
682 typename XprType::Nested m_xpr;
683 const StartIndices m_startIndices;
684 const StopIndices m_stopIndices;
685 const Strides m_strides;
686};
687
688// Eval as rvalue
689template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
690struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
691{
692 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
693 static const int NumDims = internal::array_size<Strides>::value;
694
695 enum {
696 // Alignment can't be guaranteed at compile time since it depends on the
697 // slice offsets and sizes.
698 IsAligned = false,
699 PacketAccess = false,
700 BlockAccess = false,
701 Layout = TensorEvaluator<ArgType, Device>::Layout,
702 RawAccess = false
703 };
704
705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
706 : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
707 {
708 // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
709 DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
710 for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
711 eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
712 if(m_strides[i]>0){
713 startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
714 stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
715 }else{
716 /* implies m_strides[i]<0 by assert */
717 startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
718 stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
719 }
720 m_startIndices[i] = startIndicesClamped[i];
721 }
722
723 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
724
725 // check for degenerate intervals and compute output tensor shape
726 bool degenerate = false;;
727 for(int i = 0; i < NumDims; i++){
728 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
729 if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
730 m_dimensions[i] = 0;
731 degenerate = true;
732 }else{
733 m_dimensions[i] = interval / m_strides[i]
734 + (interval % m_strides[i] != 0 ? 1 : 0);
735 eigen_assert(m_dimensions[i] >= 0);
736 }
737 }
738 Strides output_dims = m_dimensions;
739
740 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
741 m_inputStrides[0] = m_strides[0];
742 m_offsets[0] = startIndicesClamped[0];
743 Index previousDimProduct = 1;
744 for (int i = 1; i < NumDims; ++i) {
745 previousDimProduct *= input_dims[i-1];
746 m_inputStrides[i] = previousDimProduct * m_strides[i];
747 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
748 }
749
750 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
751 m_outputStrides[0] = 1;
752 for (int i = 1; i < NumDims; ++i) {
753 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
754 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
755 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
756 }
757 } else {
758 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
759 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
760 Index previousDimProduct = 1;
761 for (int i = NumDims - 2; i >= 0; --i) {
762 previousDimProduct *= input_dims[i+1];
763 m_inputStrides[i] = previousDimProduct * m_strides[i];
764 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
765 }
766
767 m_outputStrides[NumDims-1] = 1;
768 for (int i = NumDims - 2; i >= 0; --i) {
769 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
770 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
771 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
772 }
773 }
774 m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
775 device.lastLevelCacheSize() /
776 sizeof(Scalar));
777 }
778
779 typedef typename XprType::Index Index;
780 typedef typename XprType::Scalar Scalar;
781 typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
782 typedef typename XprType::CoeffReturnType CoeffReturnType;
783 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
784 typedef Strides Dimensions;
785
786 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
787
788
789 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
790 m_impl.evalSubExprsIfNeeded(NULL);
791 return true;
792 }
793
794 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
795 m_impl.cleanup();
796 }
797
798 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
799 {
800 return m_impl.coeff(srcCoeff(index));
801 }
802
803 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
804 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
805 }
806
807 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
808 return NULL;
809 }
810
811 protected:
812 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
813 {
814 Index inputIndex = 0;
815 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
816 for (int i = NumDims - 1; i >= 0; --i) {
817 const Index idx = index / m_fastOutputStrides[i];
818 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
819 index -= idx * m_outputStrides[i];
820 }
821 } else {
822 for (int i = 0; i < NumDims; ++i) {
823 const Index idx = index / m_fastOutputStrides[i];
824 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
825 index -= idx * m_outputStrides[i];
826 }
827 }
828 return inputIndex;
829 }
830
831 static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
832 return numext::maxi(min, numext::mini(max,value));
833 }
834
835 array<Index, NumDims> m_outputStrides;
836 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
837 array<Index, NumDims> m_inputStrides;
838 TensorEvaluator<ArgType, Device> m_impl;
839 const Device& m_device;
840 DSizes<Index, NumDims> m_startIndices; // clamped startIndices
841 DSizes<Index, NumDims> m_dimensions;
842 DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
843 const Strides m_strides;
844 std::size_t m_block_total_size_max;
845};
846
847// Eval as lvalue
848template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
849struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
850 : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
851{
852 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
853 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
854 static const int NumDims = internal::array_size<Strides>::value;
855
856 enum {
857 IsAligned = false,
858 PacketAccess = false,
859 BlockAccess = false,
860 Layout = TensorEvaluator<ArgType, Device>::Layout,
861 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
862 RawAccess = false
863 };
864
865 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
866 : Base(op, device)
867 { }
868
869 typedef typename XprType::Index Index;
870 typedef typename XprType::Scalar Scalar;
871 typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
872 typedef typename XprType::CoeffReturnType CoeffReturnType;
873 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
874 typedef Strides Dimensions;
875
876 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
877 {
878 return this->m_impl.coeffRef(this->srcCoeff(index));
879 }
880};
881
882
883} // end namespace Eigen
884
885#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
Definition TensorAssign.h:56
The tensor base class.
Definition TensorForwardDeclarations.h:29
Tensor reshaping class.
Definition TensorMorphing.h:50
WriteAccessors
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition TensorEvaluator.h:112