Eigen-unsupported  3.4.1 (git rev 28ded8800c26864e537852658428ab44c8399e87)
 
Loading...
Searching...
No Matches
TensorPadding.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12
13namespace Eigen {
14
15namespace internal {
16template<typename PaddingDimensions, typename XprType>
17struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
18{
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = XprTraits::NumDimensions;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
28};
29
30template<typename PaddingDimensions, typename XprType>
31struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
32{
33 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
34};
35
36template<typename PaddingDimensions, typename XprType>
37struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
38{
39 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
40};
41
42} // end namespace internal
43
51template <typename PaddingDimensions, typename XprType>
52class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> {
53 public:
54 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
55 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
56 typedef typename XprType::CoeffReturnType CoeffReturnType;
57 typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
58 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
59 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
60
61 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
62 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
63
64 EIGEN_DEVICE_FUNC
65 const PaddingDimensions& padding() const { return m_padding_dims; }
66 EIGEN_DEVICE_FUNC
67 Scalar padding_value() const { return m_padding_value; }
68
69 EIGEN_DEVICE_FUNC
70 const typename internal::remove_all<typename XprType::Nested>::type&
71 expression() const { return m_xpr; }
72
73 protected:
74 typename XprType::Nested m_xpr;
75 const PaddingDimensions m_padding_dims;
76 const Scalar m_padding_value;
77};
78
79
80// Eval as rvalue
81template<typename PaddingDimensions, typename ArgType, typename Device>
82struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
83{
85 typedef typename XprType::Index Index;
86 static const int NumDims = internal::array_size<PaddingDimensions>::value;
87 typedef DSizes<Index, NumDims> Dimensions;
88 typedef typename XprType::Scalar Scalar;
89 typedef typename XprType::CoeffReturnType CoeffReturnType;
90 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
91 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
92 typedef StorageMemory<CoeffReturnType, Device> Storage;
93 typedef typename Storage::Type EvaluatorPointerType;
94
95 enum {
96 IsAligned = true,
97 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
99 PreferBlockAccess = true,
100 Layout = TensorEvaluator<ArgType, Device>::Layout,
101 CoordAccess = true,
102 RawAccess = false
103 };
104
105 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
106
107 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
108 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
109 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
110
111 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
112 Layout, Index>
113 TensorBlock;
114 //===--------------------------------------------------------------------===//
115
116 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
117 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
118 {
119 // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
120 // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
121 // of 1 element first and then pad.
122 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
123
124 // Compute dimensions
125 m_dimensions = m_impl.dimensions();
126 for (int i = 0; i < NumDims; ++i) {
127 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
128 }
129 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
130 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
131 m_inputStrides[0] = 1;
132 m_outputStrides[0] = 1;
133 for (int i = 1; i < NumDims; ++i) {
134 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
135 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
136 }
137 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
138 } else {
139 m_inputStrides[NumDims - 1] = 1;
140 m_outputStrides[NumDims] = 1;
141 for (int i = NumDims - 2; i >= 0; --i) {
142 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
143 m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
144 }
145 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
146 }
147 }
148
149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
150
151 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
152 m_impl.evalSubExprsIfNeeded(NULL);
153 return true;
154 }
155
156#ifdef EIGEN_USE_THREADS
157 template <typename EvalSubExprsCallback>
158 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
159 EvaluatorPointerType, EvalSubExprsCallback done) {
160 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
161 }
162#endif // EIGEN_USE_THREADS
163
164 EIGEN_STRONG_INLINE void cleanup() {
165 m_impl.cleanup();
166 }
167
168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
169 {
170 eigen_assert(index < dimensions().TotalSize());
171 Index inputIndex = 0;
172 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
173 EIGEN_UNROLL_LOOP
174 for (int i = NumDims - 1; i > 0; --i) {
175 const Index idx = index / m_outputStrides[i];
176 if (isPaddingAtIndexForDim(idx, i)) {
177 return m_paddingValue;
178 }
179 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
180 index -= idx * m_outputStrides[i];
181 }
182 if (isPaddingAtIndexForDim(index, 0)) {
183 return m_paddingValue;
184 }
185 inputIndex += (index - m_padding[0].first);
186 } else {
187 EIGEN_UNROLL_LOOP
188 for (int i = 0; i < NumDims - 1; ++i) {
189 const Index idx = index / m_outputStrides[i+1];
190 if (isPaddingAtIndexForDim(idx, i)) {
191 return m_paddingValue;
192 }
193 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
194 index -= idx * m_outputStrides[i+1];
195 }
196 if (isPaddingAtIndexForDim(index, NumDims-1)) {
197 return m_paddingValue;
198 }
199 inputIndex += (index - m_padding[NumDims-1].first);
200 }
201 return m_impl.coeff(inputIndex);
202 }
203
204 template<int LoadMode>
205 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
206 {
207 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
208 return packetColMajor(index);
209 }
210 return packetRowMajor(index);
211 }
212
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
214 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
215 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
216 EIGEN_UNROLL_LOOP
217 for (int i = 0; i < NumDims; ++i)
218 updateCostPerDimension(cost, i, i == 0);
219 } else {
220 EIGEN_UNROLL_LOOP
221 for (int i = NumDims - 1; i >= 0; --i)
222 updateCostPerDimension(cost, i, i == NumDims - 1);
223 }
224 return cost;
225 }
226
227 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
228 internal::TensorBlockResourceRequirements getResourceRequirements() const {
229 const size_t target_size = m_device.lastLevelCacheSize();
230 return internal::TensorBlockResourceRequirements::merge(
231 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
232 m_impl.getResourceRequirements());
233 }
234
235 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
236 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
237 bool /*root_of_expr_ast*/ = false) const {
238 // If one of the dimensions is zero, return empty block view.
239 if (desc.size() == 0) {
240 return TensorBlock(internal::TensorBlockKind::kView, NULL,
241 desc.dimensions());
242 }
243
244 static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
245 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
246
247 Index offset = desc.offset();
248
249 // Compute offsets in the output tensor corresponding to the desc.offset().
250 DSizes<Index, NumDims> output_offsets;
251 for (int i = NumDims - 1; i > 0; --i) {
252 const int dim = IsColMajor ? i : NumDims - i - 1;
253 const int stride_dim = IsColMajor ? dim : dim + 1;
254 output_offsets[dim] = offset / m_outputStrides[stride_dim];
255 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
256 }
257 output_offsets[inner_dim_idx] = offset;
258
259 // Offsets in the input corresponding to output offsets.
260 DSizes<Index, NumDims> input_offsets = output_offsets;
261 for (int i = 0; i < NumDims; ++i) {
262 const int dim = IsColMajor ? i : NumDims - i - 1;
263 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
264 }
265
266 // Compute offset in the input buffer (at this point it might be illegal and
267 // point outside of the input buffer, because we don't check for negative
268 // offsets, it will be autocorrected in the block iteration loop below).
269 Index input_offset = 0;
270 for (int i = 0; i < NumDims; ++i) {
271 const int dim = IsColMajor ? i : NumDims - i - 1;
272 input_offset += input_offsets[dim] * m_inputStrides[dim];
273 }
274
275 // Destination buffer and scratch buffer both indexed from 0 and have the
276 // same dimensions as the requested block (for destination buffer this
277 // property is guaranteed by `desc.destination()`).
278 Index output_offset = 0;
279 const DSizes<Index, NumDims> output_strides =
280 internal::strides<Layout>(desc.dimensions());
281
282 // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
283 // dimensions, skipping innermost dimension. In theory it should be possible
284 // to squeeze matching innermost dimensions, however in practice that did
285 // not show any improvements in benchmarks. Also in practice first outer
286 // dimension usually has padding, and will prevent squeezing.
287
288 // Initialize output block iterator state. Dimension in this array are
289 // always in inner_most -> outer_most order (col major layout).
290 array<BlockIteratorState, NumDims - 1> it;
291 for (int i = 0; i < NumDims - 1; ++i) {
292 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
293 it[i].count = 0;
294 it[i].size = desc.dimension(dim);
295
296 it[i].input_stride = m_inputStrides[dim];
297 it[i].input_span = it[i].input_stride * (it[i].size - 1);
298
299 it[i].output_stride = output_strides[dim];
300 it[i].output_span = it[i].output_stride * (it[i].size - 1);
301 }
302
303 const Index input_inner_dim_size =
304 static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
305
306 // Total output size.
307 const Index output_size = desc.size();
308
309 // We will fill inner dimension of this size in the output. It might be
310 // larger than the inner dimension in the input, so we might have to pad
311 // before/after we copy values from the input inner dimension.
312 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
313
314 // How many values to fill with padding BEFORE reading from the input inner
315 // dimension.
316 const Index output_inner_pad_before_size =
317 input_offsets[inner_dim_idx] < 0
318 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
319 output_inner_dim_size)
320 : 0;
321
322 // How many values we can actually copy from the input inner dimension.
323 const Index output_inner_copy_size = numext::mini(
324 // Want to copy from input.
325 (output_inner_dim_size - output_inner_pad_before_size),
326 // Can copy from input.
327 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
328 output_inner_pad_before_size),
329 Index(0)));
330
331 eigen_assert(output_inner_copy_size >= 0);
332
333 // How many values to fill with padding AFTER reading from the input inner
334 // dimension.
335 const Index output_inner_pad_after_size =
336 (output_inner_dim_size - output_inner_copy_size -
337 output_inner_pad_before_size);
338
339 // Sanity check, sum of all sizes must be equal to the output size.
340 eigen_assert(output_inner_dim_size ==
341 (output_inner_pad_before_size + output_inner_copy_size +
342 output_inner_pad_after_size));
343
344 // Keep track of current coordinates and padding in the output.
345 DSizes<Index, NumDims> output_coord = output_offsets;
346 DSizes<Index, NumDims> output_padded;
347 for (int i = 0; i < NumDims; ++i) {
348 const int dim = IsColMajor ? i : NumDims - i - 1;
349 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
350 }
351
352 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
353
354 // Prepare storage for the materialized padding result.
355 const typename TensorBlock::Storage block_storage =
356 TensorBlock::prepareStorage(desc, scratch);
357
358 // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
359 // single logical inner dimension.
360
361 // When possible we squeeze writes for the innermost (only if non-padded)
362 // dimension with the first padded dimension. This allows to reduce the
363 // number of calls to LinCopy and better utilize vector instructions.
364 const bool squeeze_writes =
365 NumDims > 1 &&
366 // inner dimension is not padded
367 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
368 // and equal to the block inner dimension
369 (input_inner_dim_size == output_inner_dim_size);
370
371 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
372
373 // Maximum coordinate on a squeeze dimension that we can write to.
374 const Index squeeze_max_coord =
375 squeeze_writes ? numext::mini(
376 // max non-padded element in the input
377 static_cast<Index>(m_dimensions[squeeze_dim] -
378 m_padding[squeeze_dim].second),
379 // max element in the output buffer
380 static_cast<Index>(output_offsets[squeeze_dim] +
381 desc.dimension(squeeze_dim)))
382 : static_cast<Index>(0);
383
384 // Iterate copying data from `m_impl.data()` to the output buffer.
385 for (Index size = 0; size < output_size;) {
386 // Detect if we are in the padded region (exclude innermost dimension).
387 bool is_padded = false;
388 for (int j = 1; j < NumDims; ++j) {
389 const int dim = IsColMajor ? j : NumDims - j - 1;
390 is_padded = output_padded[dim];
391 if (is_padded) break;
392 }
393
394 if (is_padded) {
395 // Fill single innermost dimension with padding value.
396 size += output_inner_dim_size;
397
398 LinCopy::template Run<LinCopy::Kind::FillLinear>(
399 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
400 typename LinCopy::Src(0, 0, &m_paddingValue),
401 output_inner_dim_size);
402
403
404 } else if (squeeze_writes) {
405 // Squeeze multiple reads from innermost dimensions.
406 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
407 size += output_inner_dim_size * squeeze_num;
408
409 // Copy `squeeze_num` inner dimensions from input to output.
410 LinCopy::template Run<LinCopy::Kind::Linear>(
411 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
412 typename LinCopy::Src(input_offset, 1, m_impl.data()),
413 output_inner_dim_size * squeeze_num);
414
415 // Update iteration state for only `squeeze_num - 1` processed inner
416 // dimensions, because we have another iteration state update at the end
417 // of the loop that will update iteration state for the last inner
418 // processed dimension.
419 it[0].count += (squeeze_num - 1);
420 input_offset += it[0].input_stride * (squeeze_num - 1);
421 output_offset += it[0].output_stride * (squeeze_num - 1);
422 output_coord[squeeze_dim] += (squeeze_num - 1);
423
424 } else {
425 // Single read from innermost dimension.
426 size += output_inner_dim_size;
427
428 { // Fill with padding before copying from input inner dimension.
429 const Index out = output_offset;
430
431 LinCopy::template Run<LinCopy::Kind::FillLinear>(
432 typename LinCopy::Dst(out, 1, block_storage.data()),
433 typename LinCopy::Src(0, 0, &m_paddingValue),
434 output_inner_pad_before_size);
435 }
436
437 { // Copy data from input inner dimension.
438 const Index out = output_offset + output_inner_pad_before_size;
439 const Index in = input_offset + output_inner_pad_before_size;
440
441 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
442
443 LinCopy::template Run<LinCopy::Kind::Linear>(
444 typename LinCopy::Dst(out, 1, block_storage.data()),
445 typename LinCopy::Src(in, 1, m_impl.data()),
446 output_inner_copy_size);
447 }
448
449 { // Fill with padding after copying from input inner dimension.
450 const Index out = output_offset + output_inner_pad_before_size +
451 output_inner_copy_size;
452
453 LinCopy::template Run<LinCopy::Kind::FillLinear>(
454 typename LinCopy::Dst(out, 1, block_storage.data()),
455 typename LinCopy::Src(0, 0, &m_paddingValue),
456 output_inner_pad_after_size);
457 }
458 }
459
460 for (int j = 0; j < NumDims - 1; ++j) {
461 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
462
463 if (++it[j].count < it[j].size) {
464 input_offset += it[j].input_stride;
465 output_offset += it[j].output_stride;
466 output_coord[dim] += 1;
467 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
468 break;
469 }
470 it[j].count = 0;
471 input_offset -= it[j].input_span;
472 output_offset -= it[j].output_span;
473 output_coord[dim] -= it[j].size - 1;
474 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
475 }
476 }
477
478 return block_storage.AsTensorMaterializedBlock();
479 }
480
481 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
482
483#ifdef EIGEN_USE_SYCL
484 // binding placeholder accessors to a command group handler for SYCL
485 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
486 m_impl.bind(cgh);
487 }
488#endif
489
490 private:
491 struct BlockIteratorState {
492 BlockIteratorState()
493 : count(0),
494 size(0),
495 input_stride(0),
496 input_span(0),
497 output_stride(0),
498 output_span(0) {}
499
500 Index count;
501 Index size;
502 Index input_stride;
503 Index input_span;
504 Index output_stride;
505 Index output_span;
506 };
507
508 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
509 Index index, int dim_index) const {
510#if defined(EIGEN_HAS_INDEX_LIST)
511 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
512 index < m_padding[dim_index].first) ||
513 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
514 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
515#else
516 return (index < m_padding[dim_index].first) ||
517 (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
518#endif
519 }
520
521 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
522 int dim_index) const {
523#if defined(EIGEN_HAS_INDEX_LIST)
524 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
525#else
526 EIGEN_UNUSED_VARIABLE(dim_index);
527 return false;
528#endif
529 }
530
531 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
532 int dim_index) const {
533#if defined(EIGEN_HAS_INDEX_LIST)
534 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
535#else
536 EIGEN_UNUSED_VARIABLE(dim_index);
537 return false;
538#endif
539 }
540
541
542 void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
543 const double in = static_cast<double>(m_impl.dimensions()[i]);
544 const double out = in + m_padding[i].first + m_padding[i].second;
545 if (out == 0)
546 return;
547 const double reduction = in / out;
548 cost *= reduction;
549 if (first) {
550 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
551 reduction * (1 * TensorOpCost::AddCost<Index>()));
552 } else {
553 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554 2 * TensorOpCost::MulCost<Index>() +
555 reduction * (2 * TensorOpCost::MulCost<Index>() +
556 1 * TensorOpCost::DivCost<Index>()));
557 }
558 }
559
560 protected:
561
562 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
563 {
564 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
565 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
566
567 const Index initialIndex = index;
568 Index inputIndex = 0;
569 EIGEN_UNROLL_LOOP
570 for (int i = NumDims - 1; i > 0; --i) {
571 const Index firstIdx = index;
572 const Index lastIdx = index + PacketSize - 1;
573 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
574 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
575 const Index lastPaddedRight = m_outputStrides[i+1];
576
577 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
578 // all the coefficient are in the padding zone.
579 return internal::pset1<PacketReturnType>(m_paddingValue);
580 }
581 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
582 // all the coefficient are in the padding zone.
583 return internal::pset1<PacketReturnType>(m_paddingValue);
584 }
585 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
586 // all the coefficient are between the 2 padding zones.
587 const Index idx = index / m_outputStrides[i];
588 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
589 index -= idx * m_outputStrides[i];
590 }
591 else {
592 // Every other case
593 return packetWithPossibleZero(initialIndex);
594 }
595 }
596
597 const Index lastIdx = index + PacketSize - 1;
598 const Index firstIdx = index;
599 const Index lastPaddedLeft = m_padding[0].first;
600 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
601 const Index lastPaddedRight = m_outputStrides[1];
602
603 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
604 // all the coefficient are in the padding zone.
605 return internal::pset1<PacketReturnType>(m_paddingValue);
606 }
607 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
608 // all the coefficient are in the padding zone.
609 return internal::pset1<PacketReturnType>(m_paddingValue);
610 }
611 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
612 // all the coefficient are between the 2 padding zones.
613 inputIndex += (index - m_padding[0].first);
614 return m_impl.template packet<Unaligned>(inputIndex);
615 }
616 // Every other case
617 return packetWithPossibleZero(initialIndex);
618 }
619
620 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
621 {
622 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
623 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
624
625 const Index initialIndex = index;
626 Index inputIndex = 0;
627 EIGEN_UNROLL_LOOP
628 for (int i = 0; i < NumDims - 1; ++i) {
629 const Index firstIdx = index;
630 const Index lastIdx = index + PacketSize - 1;
631 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
632 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
633 const Index lastPaddedRight = m_outputStrides[i];
634
635 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
636 // all the coefficient are in the padding zone.
637 return internal::pset1<PacketReturnType>(m_paddingValue);
638 }
639 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
640 // all the coefficient are in the padding zone.
641 return internal::pset1<PacketReturnType>(m_paddingValue);
642 }
643 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
644 // all the coefficient are between the 2 padding zones.
645 const Index idx = index / m_outputStrides[i+1];
646 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
647 index -= idx * m_outputStrides[i+1];
648 }
649 else {
650 // Every other case
651 return packetWithPossibleZero(initialIndex);
652 }
653 }
654
655 const Index lastIdx = index + PacketSize - 1;
656 const Index firstIdx = index;
657 const Index lastPaddedLeft = m_padding[NumDims-1].first;
658 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
659 const Index lastPaddedRight = m_outputStrides[NumDims-1];
660
661 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
662 // all the coefficient are in the padding zone.
663 return internal::pset1<PacketReturnType>(m_paddingValue);
664 }
665 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
666 // all the coefficient are in the padding zone.
667 return internal::pset1<PacketReturnType>(m_paddingValue);
668 }
669 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
670 // all the coefficient are between the 2 padding zones.
671 inputIndex += (index - m_padding[NumDims-1].first);
672 return m_impl.template packet<Unaligned>(inputIndex);
673 }
674 // Every other case
675 return packetWithPossibleZero(initialIndex);
676 }
677
678 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
679 {
680 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
681 EIGEN_UNROLL_LOOP
682 for (int i = 0; i < PacketSize; ++i) {
683 values[i] = coeff(index+i);
684 }
685 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
686 return rslt;
687 }
688
689 Dimensions m_dimensions;
690 array<Index, NumDims+1> m_outputStrides;
691 array<Index, NumDims> m_inputStrides;
692 TensorEvaluator<ArgType, Device> m_impl;
693 PaddingDimensions m_padding;
694
695 Scalar m_paddingValue;
696
697 const Device EIGEN_DEVICE_REF m_device;
698};
699
700
701
702
703} // end namespace Eigen
704
705#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
The tensor base class.
Definition TensorForwardDeclarations.h:56
Tensor padding class. At the moment only padding with a constant value is supported.
Definition TensorPadding.h:52
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27