10#ifndef EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
11#define EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
13#include "./InternalHeaderCheck.h"
18template <
typename RollDimensions,
typename XprType>
19struct traits<TensorRollOp<RollDimensions, XprType> > :
public traits<XprType> {
20 typedef typename XprType::Scalar Scalar;
21 typedef traits<XprType> XprTraits;
22 typedef typename XprTraits::StorageKind StorageKind;
23 typedef typename XprTraits::Index
Index;
24 typedef typename XprType::Nested Nested;
25 typedef std::remove_reference_t<Nested> Nested_;
26 static constexpr int NumDimensions = XprTraits::NumDimensions;
27 static constexpr int Layout = XprTraits::Layout;
28 typedef typename XprTraits::PointerType PointerType;
31template <
typename RollDimensions,
typename XprType>
32struct eval<TensorRollOp<RollDimensions, XprType>, Eigen::Dense> {
33 typedef const TensorRollOp<RollDimensions, XprType>& type;
36template <
typename RollDimensions,
typename XprType>
37struct nested<TensorRollOp<RollDimensions, XprType>, 1, typename eval<TensorRollOp<RollDimensions, XprType> >::type> {
38 typedef TensorRollOp<RollDimensions, XprType> type;
49template <
typename RollDimensions,
typename XprType>
50class TensorRollOp :
public TensorBase<TensorRollOp<RollDimensions, XprType>, WriteAccessors> {
53 typedef typename Eigen::internal::traits<TensorRollOp>::Scalar Scalar;
55 typedef typename XprType::CoeffReturnType CoeffReturnType;
56 typedef typename Eigen::internal::nested<TensorRollOp>::type Nested;
57 typedef typename Eigen::internal::traits<TensorRollOp>::StorageKind StorageKind;
58 typedef typename Eigen::internal::traits<TensorRollOp>::Index Index;
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorRollOp(
const XprType& expr,
const RollDimensions& roll_dims)
61 : m_xpr(expr), m_roll_dims(roll_dims) {}
63 EIGEN_DEVICE_FUNC
const RollDimensions& roll()
const {
return m_roll_dims; }
65 EIGEN_DEVICE_FUNC
const internal::remove_all_t<typename XprType::Nested>& expression()
const {
return m_xpr; }
67 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorRollOp)
70 typename XprType::Nested m_xpr;
71 const RollDimensions m_roll_dims;
75template <
typename RollDimensions,
typename ArgType,
typename Device>
78 typedef typename XprType::Index
Index;
79 static constexpr int NumDims = internal::array_size<RollDimensions>::value;
81 typedef typename XprType::Scalar
Scalar;
83 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
84 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
85 typedef StorageMemory<CoeffReturnType, Device> Storage;
86 typedef typename Storage::Type EvaluatorPointerType;
88 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
91 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
92 BlockAccess = NumDims > 0,
93 PreferBlockAccess =
true,
98 typedef internal::TensorIntDivisor<Index> IndexDivisor;
101 using TensorBlockDesc = internal::TensorBlockDescriptor<NumDims, Index>;
102 using TensorBlockScratch = internal::TensorBlockScratchAllocator<Device>;
103 using ArgTensorBlock =
typename TensorEvaluator<const ArgType, Device>::TensorBlock;
104 using TensorBlock =
typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims, Layout, Index>;
107 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
108 : m_impl(op.expression(), device), m_rolls(op.roll()), m_device(device) {
109 EIGEN_STATIC_ASSERT((NumDims > 0), Must_Have_At_Least_One_Dimension_To_Roll);
112 m_dimensions = m_impl.dimensions();
113 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
115 for (
int i = 1; i < NumDims; ++i) {
116 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
117 if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
120 m_strides[NumDims - 1] = 1;
121 for (
int i = NumDims - 2; i >= 0; --i) {
122 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
123 if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
128 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
130 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
131 m_impl.evalSubExprsIfNeeded(
nullptr);
135#ifdef EIGEN_USE_THREADS
136 template <
typename EvalSubExprsCallback>
137 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
138 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
142 EIGEN_STRONG_INLINE
void cleanup() { m_impl.cleanup(); }
144 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index roll(Index
const i, Index
const r, Index
const n)
const {
145 auto const tmp = (i + r) % n;
153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array<Index, NumDims> rollCoords(array<Index, NumDims>
const& coords)
const {
154 array<Index, NumDims> rolledCoords;
155 for (
int id = 0;
id < NumDims;
id++) {
156 eigen_assert(coords[
id] < m_dimensions[
id]);
157 rolledCoords[id] = roll(coords[
id], m_rolls[
id], m_dimensions[
id]);
162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rollIndex(Index index)
const {
163 eigen_assert(index < dimensions().TotalSize());
164 Index rolledIndex = 0;
165 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
167 for (
int i = NumDims - 1; i > 0; --i) {
168 Index idx = index / m_fast_strides[i];
169 index -= idx * m_strides[i];
170 rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
172 rolledIndex += roll(index, m_rolls[0], m_dimensions[0]);
175 for (
int i = 0; i < NumDims - 1; ++i) {
176 Index idx = index / m_fast_strides[i];
177 index -= idx * m_strides[i];
178 rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
180 rolledIndex += roll(index, m_rolls[NumDims - 1], m_dimensions[NumDims - 1]);
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
186 return m_impl.coeff(rollIndex(index));
189 template <
int LoadMode>
190 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const {
191 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
192 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
194 for (
int i = 0; i < PacketSize; ++i) {
195 values[i] = coeff(index + i);
197 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements()
const {
202 const size_t target_size = m_device.lastLevelCacheSize();
203 return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size).addCostPerCoeff({0, 0, 24});
206 struct BlockIteratorState {
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
214 bool =
false)
const {
215 static const bool is_col_major =
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor);
218 array<Index, NumDims> coords;
219 extract_coordinates(desc.offset(), coords);
220 array<Index, NumDims> initial_coords = coords;
225 array<BlockIteratorState, NumDims> it;
226 for (
int i = 0; i < NumDims; ++i) {
227 const int dim = is_col_major ? i : NumDims - 1 - i;
228 it[i].size = desc.dimension(dim);
229 it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
230 it[i].span = it[i].stride * (it[i].size - 1);
233 eigen_assert(it[0].stride == 1);
236 const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
237 CoeffReturnType* block_buffer = block_storage.data();
239 static const int inner_dim = is_col_major ? 0 : NumDims - 1;
240 const Index inner_dim_size = it[0].size;
242 while (it[NumDims - 1].count < it[NumDims - 1].size) {
244 for (; i < inner_dim_size; ++i) {
245 auto const rolled = rollCoords(coords);
246 auto const index = is_col_major ? m_dimensions.IndexOfColMajor(rolled) : m_dimensions.IndexOfRowMajor(rolled);
247 *(block_buffer + offset + i) = m_impl.coeff(index);
250 coords[inner_dim] = initial_coords[inner_dim];
252 if (NumDims == 1)
break;
255 for (i = 1; i < NumDims; ++i) {
256 if (++it[i].count < it[i].size) {
257 offset += it[i].stride;
258 coords[is_col_major ? i : NumDims - 1 - i]++;
261 if (i != NumDims - 1) it[i].count = 0;
262 coords[is_col_major ? i : NumDims - 1 - i] = initial_coords[is_col_major ? i : NumDims - 1 - i];
263 offset -= it[i].span;
267 return block_storage.AsTensorMaterializedBlock();
270 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
271 double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
272 TensorOpCost::DivCost<Index>());
273 for (
int i = 0; i < NumDims; ++i) {
274 compute_cost += 2 * TensorOpCost::AddCost<Index>();
276 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost,
false , PacketSize);
279 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return nullptr; }
282 Dimensions m_dimensions;
283 array<Index, NumDims> m_strides;
284 array<IndexDivisor, NumDims> m_fast_strides;
285 TensorEvaluator<ArgType, Device> m_impl;
286 RollDimensions m_rolls;
287 const Device EIGEN_DEVICE_REF m_device;
289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void extract_coordinates(Index index, array<Index, NumDims>& coords)
const {
290 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
291 for (
int i = NumDims - 1; i > 0; --i) {
292 const Index idx = index / m_fast_strides[i];
293 index -= idx * m_strides[i];
298 for (
int i = 0; i < NumDims - 1; ++i) {
299 const Index idx = index / m_fast_strides[i];
300 index -= idx * m_strides[i];
303 coords[NumDims - 1] = index;
312template <
typename RollDimensions,
typename ArgType,
typename Device>
314 :
public TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
315 typedef TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> Base;
316 typedef TensorRollOp<RollDimensions, ArgType> XprType;
317 typedef typename XprType::Index Index;
318 static constexpr int NumDims = internal::array_size<RollDimensions>::value;
319 typedef DSizes<Index, NumDims> Dimensions;
321 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
324 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
326 PreferBlockAccess =
false,
330 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device) : Base(op, device) {}
332 typedef typename XprType::Scalar Scalar;
333 typedef typename XprType::CoeffReturnType CoeffReturnType;
334 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
335 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
338 typedef internal::TensorBlockNotImplemented TensorBlock;
341 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return this->m_dimensions; }
343 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
const {
344 return this->m_impl.coeffRef(this->rollIndex(index));
347 template <
int StoreMode>
348 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index,
const PacketReturnType& x)
const {
349 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
350 EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
351 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
353 for (
int i = 0; i < PacketSize; ++i) {
354 this->coeffRef(index + i) = values[i];
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor roll (circular shift) elements class.
Definition TensorRoll.h:50
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30