Eigen-unsupported  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
TensorPadding.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12
13// IWYU pragma: private
14#include "./InternalHeaderCheck.h"
15
16namespace Eigen {
17
18namespace internal {
19template <typename PaddingDimensions, typename XprType>
20struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType> {
21 typedef typename XprType::Scalar Scalar;
22 typedef traits<XprType> XprTraits;
23 typedef typename XprTraits::StorageKind StorageKind;
24 typedef typename XprTraits::Index Index;
25 typedef typename XprType::Nested Nested;
26 typedef std::remove_reference_t<Nested> Nested_;
27 static constexpr int NumDimensions = XprTraits::NumDimensions;
28 static constexpr int Layout = XprTraits::Layout;
29 typedef typename XprTraits::PointerType PointerType;
30};
31
32template <typename PaddingDimensions, typename XprType>
33struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense> {
34 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
35};
36
37template <typename PaddingDimensions, typename XprType>
38struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1,
39 typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type> {
40 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
41};
42
43} // end namespace internal
44
52template <typename PaddingDimensions, typename XprType>
53class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> {
54 public:
55 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
56 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
57 typedef typename XprType::CoeffReturnType CoeffReturnType;
58 typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
59 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
60 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
61
62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims,
63 const Scalar padding_value)
64 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
65
66 EIGEN_DEVICE_FUNC const PaddingDimensions& padding() const { return m_padding_dims; }
67 EIGEN_DEVICE_FUNC Scalar padding_value() const { return m_padding_value; }
68
69 EIGEN_DEVICE_FUNC const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
70
71 protected:
72 typename XprType::Nested m_xpr;
73 const PaddingDimensions m_padding_dims;
74 const Scalar m_padding_value;
75};
76
77// Eval as rvalue
78template <typename PaddingDimensions, typename ArgType, typename Device>
79struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device> {
81 typedef typename XprType::Index Index;
82 static constexpr int NumDims = internal::array_size<PaddingDimensions>::value;
83 typedef DSizes<Index, NumDims> Dimensions;
84 typedef typename XprType::Scalar Scalar;
85 typedef typename XprType::CoeffReturnType CoeffReturnType;
86 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
87 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
88 typedef StorageMemory<CoeffReturnType, Device> Storage;
89 typedef typename Storage::Type EvaluatorPointerType;
90
91 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
92 enum {
93 IsAligned = true,
94 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
96 PreferBlockAccess = true,
97 CoordAccess = true,
98 RawAccess = false
99 };
100
101 typedef std::remove_const_t<Scalar> ScalarNoConst;
102
103 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
104 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
105 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
106
107 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims, Layout, Index> TensorBlock;
108 //===--------------------------------------------------------------------===//
109
110 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
111 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device) {
112 // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
113 // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
114 // of 1 element first and then pad.
115 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
116
117 // Compute dimensions
118 m_dimensions = m_impl.dimensions();
119 for (int i = 0; i < NumDims; ++i) {
120 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
121 }
122 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
123 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
124 m_inputStrides[0] = 1;
125 m_outputStrides[0] = 1;
126 for (int i = 1; i < NumDims; ++i) {
127 m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
128 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
129 }
130 m_outputStrides[NumDims] = m_outputStrides[NumDims - 1] * m_dimensions[NumDims - 1];
131 } else {
132 m_inputStrides[NumDims - 1] = 1;
133 m_outputStrides[NumDims] = 1;
134 for (int i = NumDims - 2; i >= 0; --i) {
135 m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
136 m_outputStrides[i + 1] = m_outputStrides[i + 2] * m_dimensions[i + 1];
137 }
138 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
139 }
140 }
141
142 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
143
144 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
145 m_impl.evalSubExprsIfNeeded(NULL);
146 return true;
147 }
148
149#ifdef EIGEN_USE_THREADS
150 template <typename EvalSubExprsCallback>
151 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
152 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
153 }
154#endif // EIGEN_USE_THREADS
155
156 EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
157
158 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
159 eigen_assert(index < dimensions().TotalSize());
160 Index inputIndex = 0;
161 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
162 EIGEN_UNROLL_LOOP
163 for (int i = NumDims - 1; i > 0; --i) {
164 const Index idx = index / m_outputStrides[i];
165 if (isPaddingAtIndexForDim(idx, i)) {
166 return m_paddingValue;
167 }
168 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
169 index -= idx * m_outputStrides[i];
170 }
171 if (isPaddingAtIndexForDim(index, 0)) {
172 return m_paddingValue;
173 }
174 inputIndex += (index - m_padding[0].first);
175 } else {
176 EIGEN_UNROLL_LOOP
177 for (int i = 0; i < NumDims - 1; ++i) {
178 const Index idx = index / m_outputStrides[i + 1];
179 if (isPaddingAtIndexForDim(idx, i)) {
180 return m_paddingValue;
181 }
182 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183 index -= idx * m_outputStrides[i + 1];
184 }
185 if (isPaddingAtIndexForDim(index, NumDims - 1)) {
186 return m_paddingValue;
187 }
188 inputIndex += (index - m_padding[NumDims - 1].first);
189 }
190 return m_impl.coeff(inputIndex);
191 }
192
193 template <int LoadMode>
194 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
195 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
196 return packetColMajor(index);
197 }
198 return packetRowMajor(index);
199 }
200
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
202 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
203 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
204 EIGEN_UNROLL_LOOP
205 for (int i = 0; i < NumDims; ++i) updateCostPerDimension(cost, i, i == 0);
206 } else {
207 EIGEN_UNROLL_LOOP
208 for (int i = NumDims - 1; i >= 0; --i) updateCostPerDimension(cost, i, i == NumDims - 1);
209 }
210 return cost;
211 }
212
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const {
214 const size_t target_size = m_device.lastLevelCacheSize();
215 return internal::TensorBlockResourceRequirements::merge(
216 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), m_impl.getResourceRequirements());
217 }
218
219 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
220 bool /*root_of_expr_ast*/ = false) const {
221 // If one of the dimensions is zero, return empty block view.
222 if (desc.size() == 0) {
223 return TensorBlock(internal::TensorBlockKind::kView, NULL, desc.dimensions());
224 }
225
226 static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
227 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
228
229 Index offset = desc.offset();
230
231 // Compute offsets in the output tensor corresponding to the desc.offset().
232 DSizes<Index, NumDims> output_offsets;
233 for (int i = NumDims - 1; i > 0; --i) {
234 const int dim = IsColMajor ? i : NumDims - i - 1;
235 const int stride_dim = IsColMajor ? dim : dim + 1;
236 output_offsets[dim] = offset / m_outputStrides[stride_dim];
237 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
238 }
239 output_offsets[inner_dim_idx] = offset;
240
241 // Offsets in the input corresponding to output offsets.
242 DSizes<Index, NumDims> input_offsets = output_offsets;
243 for (int i = 0; i < NumDims; ++i) {
244 const int dim = IsColMajor ? i : NumDims - i - 1;
245 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
246 }
247
248 // Compute offset in the input buffer (at this point it might be illegal and
249 // point outside of the input buffer, because we don't check for negative
250 // offsets, it will be autocorrected in the block iteration loop below).
251 Index input_offset = 0;
252 for (int i = 0; i < NumDims; ++i) {
253 const int dim = IsColMajor ? i : NumDims - i - 1;
254 input_offset += input_offsets[dim] * m_inputStrides[dim];
255 }
256
257 // Destination buffer and scratch buffer both indexed from 0 and have the
258 // same dimensions as the requested block (for destination buffer this
259 // property is guaranteed by `desc.destination()`).
260 Index output_offset = 0;
261 const DSizes<Index, NumDims> output_strides = internal::strides<Layout>(desc.dimensions());
262
263 // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
264 // dimensions, skipping innermost dimension. In theory it should be possible
265 // to squeeze matching innermost dimensions, however in practice that did
266 // not show any improvements in benchmarks. Also in practice first outer
267 // dimension usually has padding, and will prevent squeezing.
268
269 // Initialize output block iterator state. Dimension in this array are
270 // always in inner_most -> outer_most order (col major layout).
271 array<BlockIteratorState, NumDims - 1> it;
272 for (int i = 0; i < NumDims - 1; ++i) {
273 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
274 it[i].count = 0;
275 it[i].size = desc.dimension(dim);
276
277 it[i].input_stride = m_inputStrides[dim];
278 it[i].input_span = it[i].input_stride * (it[i].size - 1);
279
280 it[i].output_stride = output_strides[dim];
281 it[i].output_span = it[i].output_stride * (it[i].size - 1);
282 }
283
284 const Index input_inner_dim_size = static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
285
286 // Total output size.
287 const Index output_size = desc.size();
288
289 // We will fill inner dimension of this size in the output. It might be
290 // larger than the inner dimension in the input, so we might have to pad
291 // before/after we copy values from the input inner dimension.
292 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
293
294 // How many values to fill with padding BEFORE reading from the input inner
295 // dimension.
296 const Index output_inner_pad_before_size =
297 input_offsets[inner_dim_idx] < 0
298 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]), output_inner_dim_size)
299 : 0;
300
301 // How many values we can actually copy from the input inner dimension.
302 const Index output_inner_copy_size = numext::mini(
303 // Want to copy from input.
304 (output_inner_dim_size - output_inner_pad_before_size),
305 // Can copy from input.
306 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] + output_inner_pad_before_size), Index(0)));
307
308 eigen_assert(output_inner_copy_size >= 0);
309
310 // How many values to fill with padding AFTER reading from the input inner
311 // dimension.
312 const Index output_inner_pad_after_size =
313 (output_inner_dim_size - output_inner_copy_size - output_inner_pad_before_size);
314
315 // Sanity check, sum of all sizes must be equal to the output size.
316 eigen_assert(output_inner_dim_size ==
317 (output_inner_pad_before_size + output_inner_copy_size + output_inner_pad_after_size));
318
319 // Keep track of current coordinates and padding in the output.
320 DSizes<Index, NumDims> output_coord = output_offsets;
321 DSizes<Index, NumDims> output_padded;
322 for (int i = 0; i < NumDims; ++i) {
323 const int dim = IsColMajor ? i : NumDims - i - 1;
324 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
325 }
326
327 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
328
329 // Prepare storage for the materialized padding result.
330 const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
331
332 // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
333 // single logical inner dimension.
334
335 // When possible we squeeze writes for the innermost (only if non-padded)
336 // dimension with the first padded dimension. This allows to reduce the
337 // number of calls to LinCopy and better utilize vector instructions.
338 const bool squeeze_writes = NumDims > 1 &&
339 // inner dimension is not padded
340 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
341 // and equal to the block inner dimension
342 (input_inner_dim_size == output_inner_dim_size);
343
344 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
345
346 // Maximum coordinate on a squeeze dimension that we can write to.
347 const Index squeeze_max_coord =
348 squeeze_writes ? numext::mini(
349 // max non-padded element in the input
350 static_cast<Index>(m_dimensions[squeeze_dim] - m_padding[squeeze_dim].second),
351 // max element in the output buffer
352 static_cast<Index>(output_offsets[squeeze_dim] + desc.dimension(squeeze_dim)))
353 : static_cast<Index>(0);
354
355 // Iterate copying data from `m_impl.data()` to the output buffer.
356 for (Index size = 0; size < output_size;) {
357 // Detect if we are in the padded region (exclude innermost dimension).
358 bool is_padded = false;
359 for (int j = 1; j < NumDims; ++j) {
360 const int dim = IsColMajor ? j : NumDims - j - 1;
361 is_padded = output_padded[dim];
362 if (is_padded) break;
363 }
364
365 if (is_padded) {
366 // Fill single innermost dimension with padding value.
367 size += output_inner_dim_size;
368
369 LinCopy::template Run<LinCopy::Kind::FillLinear>(typename LinCopy::Dst(output_offset, 1, block_storage.data()),
370 typename LinCopy::Src(0, 0, &m_paddingValue),
371 output_inner_dim_size);
372
373 } else if (squeeze_writes) {
374 // Squeeze multiple reads from innermost dimensions.
375 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
376 size += output_inner_dim_size * squeeze_num;
377
378 // Copy `squeeze_num` inner dimensions from input to output.
379 LinCopy::template Run<LinCopy::Kind::Linear>(typename LinCopy::Dst(output_offset, 1, block_storage.data()),
380 typename LinCopy::Src(input_offset, 1, m_impl.data()),
381 output_inner_dim_size * squeeze_num);
382
383 // Update iteration state for only `squeeze_num - 1` processed inner
384 // dimensions, because we have another iteration state update at the end
385 // of the loop that will update iteration state for the last inner
386 // processed dimension.
387 it[0].count += (squeeze_num - 1);
388 input_offset += it[0].input_stride * (squeeze_num - 1);
389 output_offset += it[0].output_stride * (squeeze_num - 1);
390 output_coord[squeeze_dim] += (squeeze_num - 1);
391
392 } else {
393 // Single read from innermost dimension.
394 size += output_inner_dim_size;
395
396 { // Fill with padding before copying from input inner dimension.
397 const Index out = output_offset;
398
399 LinCopy::template Run<LinCopy::Kind::FillLinear>(typename LinCopy::Dst(out, 1, block_storage.data()),
400 typename LinCopy::Src(0, 0, &m_paddingValue),
401 output_inner_pad_before_size);
402 }
403
404 { // Copy data from input inner dimension.
405 const Index out = output_offset + output_inner_pad_before_size;
406 const Index in = input_offset + output_inner_pad_before_size;
407
408 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
409
410 LinCopy::template Run<LinCopy::Kind::Linear>(typename LinCopy::Dst(out, 1, block_storage.data()),
411 typename LinCopy::Src(in, 1, m_impl.data()),
412 output_inner_copy_size);
413 }
414
415 { // Fill with padding after copying from input inner dimension.
416 const Index out = output_offset + output_inner_pad_before_size + output_inner_copy_size;
417
418 LinCopy::template Run<LinCopy::Kind::FillLinear>(typename LinCopy::Dst(out, 1, block_storage.data()),
419 typename LinCopy::Src(0, 0, &m_paddingValue),
420 output_inner_pad_after_size);
421 }
422 }
423
424 for (int j = 0; j < NumDims - 1; ++j) {
425 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
426
427 if (++it[j].count < it[j].size) {
428 input_offset += it[j].input_stride;
429 output_offset += it[j].output_stride;
430 output_coord[dim] += 1;
431 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
432 break;
433 }
434 it[j].count = 0;
435 input_offset -= it[j].input_span;
436 output_offset -= it[j].output_span;
437 output_coord[dim] -= it[j].size - 1;
438 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
439 }
440 }
441
442 return block_storage.AsTensorMaterializedBlock();
443 }
444
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
446
447 private:
448 struct BlockIteratorState {
449 BlockIteratorState() : count(0), size(0), input_stride(0), input_span(0), output_stride(0), output_span(0) {}
450
451 Index count;
452 Index size;
453 Index input_stride;
454 Index input_span;
455 Index output_stride;
456 Index output_span;
457 };
458
459 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(Index index, int dim_index) const {
460 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
461 index < m_padding[dim_index].first) ||
462 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
463 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
464 }
465
466 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(int dim_index) const {
467 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
468 }
469
470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(int dim_index) const {
471 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
472 }
473
474 void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
475 const double in = static_cast<double>(m_impl.dimensions()[i]);
476 const double out = in + m_padding[i].first + m_padding[i].second;
477 if (out == 0) return;
478 const double reduction = in / out;
479 cost *= reduction;
480 if (first) {
481 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + reduction * (1 * TensorOpCost::AddCost<Index>()));
482 } else {
483 cost += TensorOpCost(0, 0,
484 2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
485 reduction * (2 * TensorOpCost::MulCost<Index>() + 1 * TensorOpCost::DivCost<Index>()));
486 }
487 }
488
489 protected:
490 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const {
491 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
492
493 const Index initialIndex = index;
494 Index inputIndex = 0;
495 EIGEN_UNROLL_LOOP
496 for (int i = NumDims - 1; i > 0; --i) {
497 const Index firstIdx = index;
498 const Index lastIdx = index + PacketSize - 1;
499 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
500 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
501 const Index lastPaddedRight = m_outputStrides[i + 1];
502
503 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
504 // all the coefficient are in the padding zone.
505 return internal::pset1<PacketReturnType>(m_paddingValue);
506 } else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
507 // all the coefficient are in the padding zone.
508 return internal::pset1<PacketReturnType>(m_paddingValue);
509 } else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) ||
510 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
511 // all the coefficient are between the 2 padding zones.
512 const Index idx = index / m_outputStrides[i];
513 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
514 index -= idx * m_outputStrides[i];
515 } else {
516 // Every other case
517 return packetWithPossibleZero(initialIndex);
518 }
519 }
520
521 const Index lastIdx = index + PacketSize - 1;
522 const Index firstIdx = index;
523 const Index lastPaddedLeft = m_padding[0].first;
524 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
525 const Index lastPaddedRight = m_outputStrides[1];
526
527 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
528 // all the coefficient are in the padding zone.
529 return internal::pset1<PacketReturnType>(m_paddingValue);
530 } else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
531 // all the coefficient are in the padding zone.
532 return internal::pset1<PacketReturnType>(m_paddingValue);
533 } else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) ||
534 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
535 // all the coefficient are between the 2 padding zones.
536 inputIndex += (index - m_padding[0].first);
537 return m_impl.template packet<Unaligned>(inputIndex);
538 }
539 // Every other case
540 return packetWithPossibleZero(initialIndex);
541 }
542
543 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const {
544 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
545
546 const Index initialIndex = index;
547 Index inputIndex = 0;
548 EIGEN_UNROLL_LOOP
549 for (int i = 0; i < NumDims - 1; ++i) {
550 const Index firstIdx = index;
551 const Index lastIdx = index + PacketSize - 1;
552 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i + 1];
553 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i + 1];
554 const Index lastPaddedRight = m_outputStrides[i];
555
556 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
557 // all the coefficient are in the padding zone.
558 return internal::pset1<PacketReturnType>(m_paddingValue);
559 } else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
560 // all the coefficient are in the padding zone.
561 return internal::pset1<PacketReturnType>(m_paddingValue);
562 } else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) ||
563 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
564 // all the coefficient are between the 2 padding zones.
565 const Index idx = index / m_outputStrides[i + 1];
566 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
567 index -= idx * m_outputStrides[i + 1];
568 } else {
569 // Every other case
570 return packetWithPossibleZero(initialIndex);
571 }
572 }
573
574 const Index lastIdx = index + PacketSize - 1;
575 const Index firstIdx = index;
576 const Index lastPaddedLeft = m_padding[NumDims - 1].first;
577 const Index firstPaddedRight = (m_dimensions[NumDims - 1] - m_padding[NumDims - 1].second);
578 const Index lastPaddedRight = m_outputStrides[NumDims - 1];
579
580 if (!isLeftPaddingCompileTimeZero(NumDims - 1) && lastIdx < lastPaddedLeft) {
581 // all the coefficient are in the padding zone.
582 return internal::pset1<PacketReturnType>(m_paddingValue);
583 } else if (!isRightPaddingCompileTimeZero(NumDims - 1) && firstIdx >= firstPaddedRight &&
584 lastIdx < lastPaddedRight) {
585 // all the coefficient are in the padding zone.
586 return internal::pset1<PacketReturnType>(m_paddingValue);
587 } else if ((isLeftPaddingCompileTimeZero(NumDims - 1) && isRightPaddingCompileTimeZero(NumDims - 1)) ||
588 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
589 // all the coefficient are between the 2 padding zones.
590 inputIndex += (index - m_padding[NumDims - 1].first);
591 return m_impl.template packet<Unaligned>(inputIndex);
592 }
593 // Every other case
594 return packetWithPossibleZero(initialIndex);
595 }
596
597 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const {
598 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
599 EIGEN_UNROLL_LOOP
600 for (int i = 0; i < PacketSize; ++i) {
601 values[i] = coeff(index + i);
602 }
603 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
604 return rslt;
605 }
606
607 Dimensions m_dimensions;
608 array<Index, NumDims + 1> m_outputStrides;
609 array<Index, NumDims> m_inputStrides;
610 TensorEvaluator<ArgType, Device> m_impl;
611 PaddingDimensions m_padding;
612
613 Scalar m_paddingValue;
614
615 const Device EIGEN_DEVICE_REF m_device;
616};
617
618} // end namespace Eigen
619
620#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor padding class. At the moment only padding with a constant value is supported.
Definition TensorPadding.h:53
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30