10#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
14#include "./InternalHeaderCheck.h"
19template <
typename PaddingDimensions,
typename XprType>
20struct traits<TensorPaddingOp<PaddingDimensions, XprType> > :
public traits<XprType> {
21 typedef typename XprType::Scalar Scalar;
22 typedef traits<XprType> XprTraits;
23 typedef typename XprTraits::StorageKind StorageKind;
24 typedef typename XprTraits::Index
Index;
25 typedef typename XprType::Nested Nested;
26 typedef std::remove_reference_t<Nested> Nested_;
27 static constexpr int NumDimensions = XprTraits::NumDimensions;
28 static constexpr int Layout = XprTraits::Layout;
29 typedef typename XprTraits::PointerType PointerType;
32template <
typename PaddingDimensions,
typename XprType>
33struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense> {
34 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
37template <
typename PaddingDimensions,
typename XprType>
38struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1,
39 typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type> {
40 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
52template <
typename PaddingDimensions,
typename XprType>
53class TensorPaddingOp :
public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> {
55 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
57 typedef typename XprType::CoeffReturnType CoeffReturnType;
58 typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
59 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
60 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(
const XprType& expr,
const PaddingDimensions& padding_dims,
63 const Scalar padding_value)
64 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
66 EIGEN_DEVICE_FUNC
const PaddingDimensions& padding()
const {
return m_padding_dims; }
67 EIGEN_DEVICE_FUNC Scalar padding_value()
const {
return m_padding_value; }
69 EIGEN_DEVICE_FUNC
const internal::remove_all_t<typename XprType::Nested>& expression()
const {
return m_xpr; }
72 typename XprType::Nested m_xpr;
73 const PaddingDimensions m_padding_dims;
74 const Scalar m_padding_value;
78template <
typename PaddingDimensions,
typename ArgType,
typename Device>
81 typedef typename XprType::Index
Index;
82 static constexpr int NumDims = internal::array_size<PaddingDimensions>::value;
84 typedef typename XprType::Scalar
Scalar;
86 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
87 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
88 typedef StorageMemory<CoeffReturnType, Device> Storage;
89 typedef typename Storage::Type EvaluatorPointerType;
91 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
94 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
96 PreferBlockAccess =
true,
101 typedef std::remove_const_t<Scalar> ScalarNoConst;
104 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
105 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
107 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims, Layout, Index> TensorBlock;
110 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
111 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device) {
115 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
118 m_dimensions = m_impl.dimensions();
119 for (
int i = 0; i < NumDims; ++i) {
120 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
122 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
123 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
124 m_inputStrides[0] = 1;
125 m_outputStrides[0] = 1;
126 for (
int i = 1; i < NumDims; ++i) {
127 m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
128 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
130 m_outputStrides[NumDims] = m_outputStrides[NumDims - 1] * m_dimensions[NumDims - 1];
132 m_inputStrides[NumDims - 1] = 1;
133 m_outputStrides[NumDims] = 1;
134 for (
int i = NumDims - 2; i >= 0; --i) {
135 m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
136 m_outputStrides[i + 1] = m_outputStrides[i + 2] * m_dimensions[i + 1];
138 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
142 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
144 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
145 m_impl.evalSubExprsIfNeeded(NULL);
149#ifdef EIGEN_USE_THREADS
150 template <
typename EvalSubExprsCallback>
151 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
152 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
156 EIGEN_STRONG_INLINE
void cleanup() { m_impl.cleanup(); }
158 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
159 eigen_assert(index < dimensions().TotalSize());
160 Index inputIndex = 0;
161 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
163 for (
int i = NumDims - 1; i > 0; --i) {
164 const Index idx = index / m_outputStrides[i];
165 if (isPaddingAtIndexForDim(idx, i)) {
166 return m_paddingValue;
168 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
169 index -= idx * m_outputStrides[i];
171 if (isPaddingAtIndexForDim(index, 0)) {
172 return m_paddingValue;
174 inputIndex += (index - m_padding[0].first);
177 for (
int i = 0; i < NumDims - 1; ++i) {
178 const Index idx = index / m_outputStrides[i + 1];
179 if (isPaddingAtIndexForDim(idx, i)) {
180 return m_paddingValue;
182 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183 index -= idx * m_outputStrides[i + 1];
185 if (isPaddingAtIndexForDim(index, NumDims - 1)) {
186 return m_paddingValue;
188 inputIndex += (index - m_padding[NumDims - 1].first);
190 return m_impl.coeff(inputIndex);
193 template <
int LoadMode>
194 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const {
195 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
196 return packetColMajor(index);
198 return packetRowMajor(index);
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
202 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
203 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
205 for (
int i = 0; i < NumDims; ++i) updateCostPerDimension(cost, i, i == 0);
208 for (
int i = NumDims - 1; i >= 0; --i) updateCostPerDimension(cost, i, i == NumDims - 1);
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements()
const {
214 const size_t target_size = m_device.lastLevelCacheSize();
215 return internal::TensorBlockResourceRequirements::merge(
216 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), m_impl.getResourceRequirements());
219 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
220 bool =
false)
const {
222 if (desc.size() == 0) {
223 return TensorBlock(internal::TensorBlockKind::kView, NULL, desc.dimensions());
226 static const bool IsColMajor = Layout ==
static_cast<int>(
ColMajor);
227 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
229 Index offset = desc.offset();
232 DSizes<Index, NumDims> output_offsets;
233 for (
int i = NumDims - 1; i > 0; --i) {
234 const int dim = IsColMajor ? i : NumDims - i - 1;
235 const int stride_dim = IsColMajor ? dim : dim + 1;
236 output_offsets[dim] = offset / m_outputStrides[stride_dim];
237 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
239 output_offsets[inner_dim_idx] = offset;
242 DSizes<Index, NumDims> input_offsets = output_offsets;
243 for (
int i = 0; i < NumDims; ++i) {
244 const int dim = IsColMajor ? i : NumDims - i - 1;
245 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
251 Index input_offset = 0;
252 for (
int i = 0; i < NumDims; ++i) {
253 const int dim = IsColMajor ? i : NumDims - i - 1;
254 input_offset += input_offsets[dim] * m_inputStrides[dim];
260 Index output_offset = 0;
261 const DSizes<Index, NumDims> output_strides = internal::strides<Layout>(desc.dimensions());
271 array<BlockIteratorState, NumDims - 1> it;
272 for (
int i = 0; i < NumDims - 1; ++i) {
273 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
275 it[i].size = desc.dimension(dim);
277 it[i].input_stride = m_inputStrides[dim];
278 it[i].input_span = it[i].input_stride * (it[i].size - 1);
280 it[i].output_stride = output_strides[dim];
281 it[i].output_span = it[i].output_stride * (it[i].size - 1);
284 const Index input_inner_dim_size =
static_cast<Index
>(m_impl.dimensions()[inner_dim_idx]);
287 const Index output_size = desc.size();
292 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
296 const Index output_inner_pad_before_size =
297 input_offsets[inner_dim_idx] < 0
298 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]), output_inner_dim_size)
302 const Index output_inner_copy_size = numext::mini(
304 (output_inner_dim_size - output_inner_pad_before_size),
306 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] + output_inner_pad_before_size), Index(0)));
308 eigen_assert(output_inner_copy_size >= 0);
312 const Index output_inner_pad_after_size =
313 (output_inner_dim_size - output_inner_copy_size - output_inner_pad_before_size);
316 eigen_assert(output_inner_dim_size ==
317 (output_inner_pad_before_size + output_inner_copy_size + output_inner_pad_after_size));
320 DSizes<Index, NumDims> output_coord = output_offsets;
321 DSizes<Index, NumDims> output_padded;
322 for (
int i = 0; i < NumDims; ++i) {
323 const int dim = IsColMajor ? i : NumDims - i - 1;
324 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
327 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
330 const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
338 const bool squeeze_writes = NumDims > 1 &&
340 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
342 (input_inner_dim_size == output_inner_dim_size);
344 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
347 const Index squeeze_max_coord =
348 squeeze_writes ? numext::mini(
350 static_cast<Index
>(m_dimensions[squeeze_dim] - m_padding[squeeze_dim].second),
352 static_cast<Index
>(output_offsets[squeeze_dim] + desc.dimension(squeeze_dim)))
353 : static_cast<Index>(0);
356 for (Index size = 0; size < output_size;) {
358 bool is_padded =
false;
359 for (
int j = 1; j < NumDims; ++j) {
360 const int dim = IsColMajor ? j : NumDims - j - 1;
361 is_padded = output_padded[dim];
362 if (is_padded)
break;
367 size += output_inner_dim_size;
369 LinCopy::template Run<LinCopy::Kind::FillLinear>(
typename LinCopy::Dst(output_offset, 1, block_storage.data()),
370 typename LinCopy::Src(0, 0, &m_paddingValue),
371 output_inner_dim_size);
373 }
else if (squeeze_writes) {
375 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
376 size += output_inner_dim_size * squeeze_num;
379 LinCopy::template Run<LinCopy::Kind::Linear>(
typename LinCopy::Dst(output_offset, 1, block_storage.data()),
380 typename LinCopy::Src(input_offset, 1, m_impl.data()),
381 output_inner_dim_size * squeeze_num);
387 it[0].count += (squeeze_num - 1);
388 input_offset += it[0].input_stride * (squeeze_num - 1);
389 output_offset += it[0].output_stride * (squeeze_num - 1);
390 output_coord[squeeze_dim] += (squeeze_num - 1);
394 size += output_inner_dim_size;
397 const Index out = output_offset;
399 LinCopy::template Run<LinCopy::Kind::FillLinear>(
typename LinCopy::Dst(out, 1, block_storage.data()),
400 typename LinCopy::Src(0, 0, &m_paddingValue),
401 output_inner_pad_before_size);
405 const Index out = output_offset + output_inner_pad_before_size;
406 const Index in = input_offset + output_inner_pad_before_size;
408 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
410 LinCopy::template Run<LinCopy::Kind::Linear>(
typename LinCopy::Dst(out, 1, block_storage.data()),
411 typename LinCopy::Src(in, 1, m_impl.data()),
412 output_inner_copy_size);
416 const Index out = output_offset + output_inner_pad_before_size + output_inner_copy_size;
418 LinCopy::template Run<LinCopy::Kind::FillLinear>(
typename LinCopy::Dst(out, 1, block_storage.data()),
419 typename LinCopy::Src(0, 0, &m_paddingValue),
420 output_inner_pad_after_size);
424 for (
int j = 0; j < NumDims - 1; ++j) {
425 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
427 if (++it[j].count < it[j].size) {
428 input_offset += it[j].input_stride;
429 output_offset += it[j].output_stride;
430 output_coord[dim] += 1;
431 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
435 input_offset -= it[j].input_span;
436 output_offset -= it[j].output_span;
437 output_coord[dim] -= it[j].size - 1;
438 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
442 return block_storage.AsTensorMaterializedBlock();
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
448 struct BlockIteratorState {
449 BlockIteratorState() : count(0), size(0), input_stride(0), input_span(0), output_stride(0), output_span(0) {}
459 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isPaddingAtIndexForDim(Index index,
int dim_index)
const {
460 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
461 index < m_padding[dim_index].first) ||
462 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
463 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
466 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isLeftPaddingCompileTimeZero(
int dim_index)
const {
467 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isRightPaddingCompileTimeZero(
int dim_index)
const {
471 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
474 void updateCostPerDimension(TensorOpCost& cost,
int i,
bool first)
const {
475 const double in =
static_cast<double>(m_impl.dimensions()[i]);
476 const double out = in + m_padding[i].first + m_padding[i].second;
477 if (out == 0)
return;
478 const double reduction = in / out;
481 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + reduction * (1 * TensorOpCost::AddCost<Index>()));
483 cost += TensorOpCost(0, 0,
484 2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
485 reduction * (2 * TensorOpCost::MulCost<Index>() + 1 * TensorOpCost::DivCost<Index>()));
490 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index)
const {
491 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
493 const Index initialIndex = index;
494 Index inputIndex = 0;
496 for (
int i = NumDims - 1; i > 0; --i) {
497 const Index firstIdx = index;
498 const Index lastIdx = index + PacketSize - 1;
499 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
500 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
501 const Index lastPaddedRight = m_outputStrides[i + 1];
503 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
505 return internal::pset1<PacketReturnType>(m_paddingValue);
506 }
else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
508 return internal::pset1<PacketReturnType>(m_paddingValue);
509 }
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) ||
510 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
512 const Index idx = index / m_outputStrides[i];
513 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
514 index -= idx * m_outputStrides[i];
517 return packetWithPossibleZero(initialIndex);
521 const Index lastIdx = index + PacketSize - 1;
522 const Index firstIdx = index;
523 const Index lastPaddedLeft = m_padding[0].first;
524 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
525 const Index lastPaddedRight = m_outputStrides[1];
527 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
529 return internal::pset1<PacketReturnType>(m_paddingValue);
530 }
else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
532 return internal::pset1<PacketReturnType>(m_paddingValue);
533 }
else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) ||
534 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
536 inputIndex += (index - m_padding[0].first);
537 return m_impl.template packet<Unaligned>(inputIndex);
540 return packetWithPossibleZero(initialIndex);
543 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index)
const {
544 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
546 const Index initialIndex = index;
547 Index inputIndex = 0;
549 for (
int i = 0; i < NumDims - 1; ++i) {
550 const Index firstIdx = index;
551 const Index lastIdx = index + PacketSize - 1;
552 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i + 1];
553 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i + 1];
554 const Index lastPaddedRight = m_outputStrides[i];
556 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
558 return internal::pset1<PacketReturnType>(m_paddingValue);
559 }
else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
561 return internal::pset1<PacketReturnType>(m_paddingValue);
562 }
else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) ||
563 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
565 const Index idx = index / m_outputStrides[i + 1];
566 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
567 index -= idx * m_outputStrides[i + 1];
570 return packetWithPossibleZero(initialIndex);
574 const Index lastIdx = index + PacketSize - 1;
575 const Index firstIdx = index;
576 const Index lastPaddedLeft = m_padding[NumDims - 1].first;
577 const Index firstPaddedRight = (m_dimensions[NumDims - 1] - m_padding[NumDims - 1].second);
578 const Index lastPaddedRight = m_outputStrides[NumDims - 1];
580 if (!isLeftPaddingCompileTimeZero(NumDims - 1) && lastIdx < lastPaddedLeft) {
582 return internal::pset1<PacketReturnType>(m_paddingValue);
583 }
else if (!isRightPaddingCompileTimeZero(NumDims - 1) && firstIdx >= firstPaddedRight &&
584 lastIdx < lastPaddedRight) {
586 return internal::pset1<PacketReturnType>(m_paddingValue);
587 }
else if ((isLeftPaddingCompileTimeZero(NumDims - 1) && isRightPaddingCompileTimeZero(NumDims - 1)) ||
588 (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
590 inputIndex += (index - m_padding[NumDims - 1].first);
591 return m_impl.template packet<Unaligned>(inputIndex);
594 return packetWithPossibleZero(initialIndex);
597 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index)
const {
598 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
600 for (
int i = 0; i < PacketSize; ++i) {
601 values[i] = coeff(index + i);
603 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
607 Dimensions m_dimensions;
608 array<Index, NumDims + 1> m_outputStrides;
609 array<Index, NumDims> m_inputStrides;
610 TensorEvaluator<ArgType, Device> m_impl;
611 PaddingDimensions m_padding;
613 Scalar m_paddingValue;
615 const Device EIGEN_DEVICE_REF m_device;
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor padding class. At the moment only padding with a constant value is supported.
Definition TensorPadding.h:53
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30