Eigen-unsupported  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
TensorRoll.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2024 Tobias Wood tobias@spinicist.org.uk
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
11#define EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
12// IWYU pragma: private
13#include "./InternalHeaderCheck.h"
14
15namespace Eigen {
16
17namespace internal {
18template <typename RollDimensions, typename XprType>
19struct traits<TensorRollOp<RollDimensions, XprType> > : public traits<XprType> {
20 typedef typename XprType::Scalar Scalar;
21 typedef traits<XprType> XprTraits;
22 typedef typename XprTraits::StorageKind StorageKind;
23 typedef typename XprTraits::Index Index;
24 typedef typename XprType::Nested Nested;
25 typedef std::remove_reference_t<Nested> Nested_;
26 static constexpr int NumDimensions = XprTraits::NumDimensions;
27 static constexpr int Layout = XprTraits::Layout;
28 typedef typename XprTraits::PointerType PointerType;
29};
30
31template <typename RollDimensions, typename XprType>
32struct eval<TensorRollOp<RollDimensions, XprType>, Eigen::Dense> {
33 typedef const TensorRollOp<RollDimensions, XprType>& type;
34};
35
36template <typename RollDimensions, typename XprType>
37struct nested<TensorRollOp<RollDimensions, XprType>, 1, typename eval<TensorRollOp<RollDimensions, XprType> >::type> {
38 typedef TensorRollOp<RollDimensions, XprType> type;
39};
40
41} // end namespace internal
42
49template <typename RollDimensions, typename XprType>
50class TensorRollOp : public TensorBase<TensorRollOp<RollDimensions, XprType>, WriteAccessors> {
51 public:
53 typedef typename Eigen::internal::traits<TensorRollOp>::Scalar Scalar;
54 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
55 typedef typename XprType::CoeffReturnType CoeffReturnType;
56 typedef typename Eigen::internal::nested<TensorRollOp>::type Nested;
57 typedef typename Eigen::internal::traits<TensorRollOp>::StorageKind StorageKind;
58 typedef typename Eigen::internal::traits<TensorRollOp>::Index Index;
59
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorRollOp(const XprType& expr, const RollDimensions& roll_dims)
61 : m_xpr(expr), m_roll_dims(roll_dims) {}
62
63 EIGEN_DEVICE_FUNC const RollDimensions& roll() const { return m_roll_dims; }
64
65 EIGEN_DEVICE_FUNC const internal::remove_all_t<typename XprType::Nested>& expression() const { return m_xpr; }
66
67 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorRollOp)
68
69 protected:
70 typename XprType::Nested m_xpr;
71 const RollDimensions m_roll_dims;
72};
73
74// Eval as rvalue
75template <typename RollDimensions, typename ArgType, typename Device>
76struct TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
78 typedef typename XprType::Index Index;
79 static constexpr int NumDims = internal::array_size<RollDimensions>::value;
80 typedef DSizes<Index, NumDims> Dimensions;
81 typedef typename XprType::Scalar Scalar;
82 typedef typename XprType::CoeffReturnType CoeffReturnType;
83 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
84 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
85 typedef StorageMemory<CoeffReturnType, Device> Storage;
86 typedef typename Storage::Type EvaluatorPointerType;
87
88 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
89 enum {
90 IsAligned = false,
91 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
92 BlockAccess = NumDims > 0,
93 PreferBlockAccess = true,
94 CoordAccess = false, // to be implemented
95 RawAccess = false
96 };
97
98 typedef internal::TensorIntDivisor<Index> IndexDivisor;
99
100 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
101 using TensorBlockDesc = internal::TensorBlockDescriptor<NumDims, Index>;
102 using TensorBlockScratch = internal::TensorBlockScratchAllocator<Device>;
103 using ArgTensorBlock = typename TensorEvaluator<const ArgType, Device>::TensorBlock;
104 using TensorBlock = typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims, Layout, Index>;
105 //===--------------------------------------------------------------------===//
106
107 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
108 : m_impl(op.expression(), device), m_rolls(op.roll()), m_device(device) {
109 EIGEN_STATIC_ASSERT((NumDims > 0), Must_Have_At_Least_One_Dimension_To_Roll);
110
111 // Compute strides
112 m_dimensions = m_impl.dimensions();
113 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
114 m_strides[0] = 1;
115 for (int i = 1; i < NumDims; ++i) {
116 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
117 if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
118 }
119 } else {
120 m_strides[NumDims - 1] = 1;
121 for (int i = NumDims - 2; i >= 0; --i) {
122 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
123 if (m_strides[i] > 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
124 }
125 }
126 }
127
128 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
129
130 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
131 m_impl.evalSubExprsIfNeeded(nullptr);
132 return true;
133 }
134
135#ifdef EIGEN_USE_THREADS
136 template <typename EvalSubExprsCallback>
137 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(EvaluatorPointerType, EvalSubExprsCallback done) {
138 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
139 }
140#endif // EIGEN_USE_THREADS
141
142 EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); }
143
144 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index roll(Index const i, Index const r, Index const n) const {
145 auto const tmp = (i + r) % n;
146 if (tmp < 0) {
147 return tmp + n;
148 } else {
149 return tmp;
150 }
151 }
152
153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE array<Index, NumDims> rollCoords(array<Index, NumDims> const& coords) const {
154 array<Index, NumDims> rolledCoords;
155 for (int id = 0; id < NumDims; id++) {
156 eigen_assert(coords[id] < m_dimensions[id]);
157 rolledCoords[id] = roll(coords[id], m_rolls[id], m_dimensions[id]);
158 }
159 return rolledCoords;
160 }
161
162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rollIndex(Index index) const {
163 eigen_assert(index < dimensions().TotalSize());
164 Index rolledIndex = 0;
165 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
166 EIGEN_UNROLL_LOOP
167 for (int i = NumDims - 1; i > 0; --i) {
168 Index idx = index / m_fast_strides[i];
169 index -= idx * m_strides[i];
170 rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
171 }
172 rolledIndex += roll(index, m_rolls[0], m_dimensions[0]);
173 } else {
174 EIGEN_UNROLL_LOOP
175 for (int i = 0; i < NumDims - 1; ++i) {
176 Index idx = index / m_fast_strides[i];
177 index -= idx * m_strides[i];
178 rolledIndex += roll(idx, m_rolls[i], m_dimensions[i]) * m_strides[i];
179 }
180 rolledIndex += roll(index, m_rolls[NumDims - 1], m_dimensions[NumDims - 1]);
181 }
182 return rolledIndex;
183 }
184
185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
186 return m_impl.coeff(rollIndex(index));
187 }
188
189 template <int LoadMode>
190 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
191 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
192 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
193 EIGEN_UNROLL_LOOP
194 for (int i = 0; i < PacketSize; ++i) {
195 values[i] = coeff(index + i);
196 }
197 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
198 return rslt;
199 }
200
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const {
202 const size_t target_size = m_device.lastLevelCacheSize();
203 return internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size).addCostPerCoeff({0, 0, 24});
204 }
205
206 struct BlockIteratorState {
207 Index stride;
208 Index span;
209 Index size;
210 Index count;
211 };
212
213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
214 bool /*root_of_expr_ast*/ = false) const {
215 static const bool is_col_major = static_cast<int>(Layout) == static_cast<int>(ColMajor);
216
217 // Compute spatial coordinates for the first block element.
218 array<Index, NumDims> coords;
219 extract_coordinates(desc.offset(), coords);
220 array<Index, NumDims> initial_coords = coords;
221 Index offset = 0; // Offset in the output block buffer.
222
223 // Initialize output block iterator state. Dimension in this array are
224 // always in inner_most -> outer_most order (col major layout).
225 array<BlockIteratorState, NumDims> it;
226 for (int i = 0; i < NumDims; ++i) {
227 const int dim = is_col_major ? i : NumDims - 1 - i;
228 it[i].size = desc.dimension(dim);
229 it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
230 it[i].span = it[i].stride * (it[i].size - 1);
231 it[i].count = 0;
232 }
233 eigen_assert(it[0].stride == 1);
234
235 // Prepare storage for the materialized generator result.
236 const typename TensorBlock::Storage block_storage = TensorBlock::prepareStorage(desc, scratch);
237 CoeffReturnType* block_buffer = block_storage.data();
238
239 static const int inner_dim = is_col_major ? 0 : NumDims - 1;
240 const Index inner_dim_size = it[0].size;
241
242 while (it[NumDims - 1].count < it[NumDims - 1].size) {
243 Index i = 0;
244 for (; i < inner_dim_size; ++i) {
245 auto const rolled = rollCoords(coords);
246 auto const index = is_col_major ? m_dimensions.IndexOfColMajor(rolled) : m_dimensions.IndexOfRowMajor(rolled);
247 *(block_buffer + offset + i) = m_impl.coeff(index);
248 coords[inner_dim]++;
249 }
250 coords[inner_dim] = initial_coords[inner_dim];
251
252 if (NumDims == 1) break; // For the 1d tensor we need to generate only one inner-most dimension.
253
254 // Update offset.
255 for (i = 1; i < NumDims; ++i) {
256 if (++it[i].count < it[i].size) {
257 offset += it[i].stride;
258 coords[is_col_major ? i : NumDims - 1 - i]++;
259 break;
260 }
261 if (i != NumDims - 1) it[i].count = 0;
262 coords[is_col_major ? i : NumDims - 1 - i] = initial_coords[is_col_major ? i : NumDims - 1 - i];
263 offset -= it[i].span;
264 }
265 }
266
267 return block_storage.AsTensorMaterializedBlock();
268 }
269
270 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
271 double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
272 TensorOpCost::DivCost<Index>());
273 for (int i = 0; i < NumDims; ++i) {
274 compute_cost += 2 * TensorOpCost::AddCost<Index>();
275 }
276 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
277 }
278
279 EIGEN_DEVICE_FUNC typename Storage::Type data() const { return nullptr; }
280
281 protected:
282 Dimensions m_dimensions;
283 array<Index, NumDims> m_strides;
284 array<IndexDivisor, NumDims> m_fast_strides;
285 TensorEvaluator<ArgType, Device> m_impl;
286 RollDimensions m_rolls;
287 const Device EIGEN_DEVICE_REF m_device;
288
289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void extract_coordinates(Index index, array<Index, NumDims>& coords) const {
290 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
291 for (int i = NumDims - 1; i > 0; --i) {
292 const Index idx = index / m_fast_strides[i];
293 index -= idx * m_strides[i];
294 coords[i] = idx;
295 }
296 coords[0] = index;
297 } else {
298 for (int i = 0; i < NumDims - 1; ++i) {
299 const Index idx = index / m_fast_strides[i];
300 index -= idx * m_strides[i];
301 coords[i] = idx;
302 }
303 coords[NumDims - 1] = index;
304 }
305 }
306
307 private:
308};
309
310// Eval as lvalue
311
312template <typename RollDimensions, typename ArgType, typename Device>
313struct TensorEvaluator<TensorRollOp<RollDimensions, ArgType>, Device>
314 : public TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> {
315 typedef TensorEvaluator<const TensorRollOp<RollDimensions, ArgType>, Device> Base;
316 typedef TensorRollOp<RollDimensions, ArgType> XprType;
317 typedef typename XprType::Index Index;
318 static constexpr int NumDims = internal::array_size<RollDimensions>::value;
319 typedef DSizes<Index, NumDims> Dimensions;
320
321 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
322 enum {
323 IsAligned = false,
324 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
325 BlockAccess = false,
326 PreferBlockAccess = false,
327 CoordAccess = false,
328 RawAccess = false
329 };
330 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : Base(op, device) {}
331
332 typedef typename XprType::Scalar Scalar;
333 typedef typename XprType::CoeffReturnType CoeffReturnType;
334 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
335 static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
336
337 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
338 typedef internal::TensorBlockNotImplemented TensorBlock;
339 //===--------------------------------------------------------------------===//
340
341 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return this->m_dimensions; }
342
343 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) const {
344 return this->m_impl.coeffRef(this->rollIndex(index));
345 }
346
347 template <int StoreMode>
348 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) const {
349 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
350 EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
351 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
352 EIGEN_UNROLL_LOOP
353 for (int i = 0; i < PacketSize; ++i) {
354 this->coeffRef(index + i) = values[i];
355 }
356 }
357};
358
359} // end namespace Eigen
360
361#endif // EIGEN_CXX11_TENSOR_TENSOR_ROLL_H
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor roll (circular shift) elements class.
Definition TensorRoll.h:50
WriteAccessors
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30