Eigen-unsupported  3.4.1 (git rev 28ded8800c26864e537852658428ab44c8399e87)
 
Loading...
Searching...
No Matches
TensorChipping.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
12
13namespace Eigen {
14
15namespace internal {
16template<DenseIndex DimId, typename XprType>
17struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
18{
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = XprTraits::NumDimensions - 1;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
28};
29
30template<DenseIndex DimId, typename XprType>
31struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
32{
33 typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
34};
35
36template<DenseIndex DimId, typename XprType>
37struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
38{
39 typedef TensorChippingOp<DimId, XprType> type;
40};
41
42template <DenseIndex DimId>
43struct DimensionId
44{
45 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
46 EIGEN_UNUSED_VARIABLE(dim);
47 eigen_assert(dim == DimId);
48 }
49 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
50 return DimId;
51 }
52};
53template <>
54struct DimensionId<Dynamic>
55{
56 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
57 eigen_assert(dim >= 0);
58 }
59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
60 return actual_dim;
61 }
62 private:
63 const DenseIndex actual_dim;
64};
65
66
67} // end namespace internal
68
72template <DenseIndex DimId, typename XprType>
73class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > {
74 public:
76 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
77 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
78 typedef typename XprType::CoeffReturnType CoeffReturnType;
79 typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
80 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
81 typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
82
83 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
84 : m_xpr(expr), m_offset(offset), m_dim(dim) {
85 }
86
87 EIGEN_DEVICE_FUNC
88 const Index offset() const { return m_offset; }
89 EIGEN_DEVICE_FUNC
90 const Index dim() const { return m_dim.actualDim(); }
91
92 EIGEN_DEVICE_FUNC
93 const typename internal::remove_all<typename XprType::Nested>::type&
94 expression() const { return m_xpr; }
95
96 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
97
98 protected:
99 typename XprType::Nested m_xpr;
100 const Index m_offset;
101 const internal::DimensionId<DimId> m_dim;
102};
103
104
105// Eval as rvalue
106template<DenseIndex DimId, typename ArgType, typename Device>
107struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
108{
109 typedef TensorChippingOp<DimId, ArgType> XprType;
110 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
111 static const int NumDims = NumInputDims-1;
112 typedef typename XprType::Index Index;
113 typedef DSizes<Index, NumDims> Dimensions;
114 typedef typename XprType::Scalar Scalar;
115 typedef typename XprType::CoeffReturnType CoeffReturnType;
116 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
117 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
118 typedef StorageMemory<CoeffReturnType, Device> Storage;
119 typedef typename Storage::Type EvaluatorPointerType;
120
121 enum {
122 // Alignment can't be guaranteed at compile time since it depends on the
123 // slice offsets.
124 IsAligned = false,
125 Layout = TensorEvaluator<ArgType, Device>::Layout,
126 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
127 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
128 // Chipping of outer-most dimension is a trivial operation, because we can
129 // read and write directly from the underlying tensor using single offset.
130 IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
131 (static_cast<int>(Layout) == RowMajor && DimId == 0),
132 // Chipping inner-most dimension.
133 IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
134 (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
135 // Prefer block access if the underlying expression prefers it, otherwise
136 // only if chipping is not trivial.
137 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
138 !IsOuterChipping,
139 CoordAccess = false, // to be implemented
140 RawAccess = false
141 };
142
143 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
144
145 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
146 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
147 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
148
149 typedef internal::TensorBlockDescriptor<NumInputDims, Index>
150 ArgTensorBlockDesc;
151 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
152 ArgTensorBlock;
153
154 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
155 Layout, Index>
156 TensorBlock;
157 //===--------------------------------------------------------------------===//
158
159 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
160 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
161 {
162 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
163 eigen_assert(NumInputDims > m_dim.actualDim());
164
165 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
166 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
167
168 int j = 0;
169 for (int i = 0; i < NumInputDims; ++i) {
170 if (i != m_dim.actualDim()) {
171 m_dimensions[j] = input_dims[i];
172 ++j;
173 }
174 }
175
176 m_stride = 1;
177 m_inputStride = 1;
178 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
179 for (int i = 0; i < m_dim.actualDim(); ++i) {
180 m_stride *= input_dims[i];
181 m_inputStride *= input_dims[i];
182 }
183 } else {
184 for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
185 m_stride *= input_dims[i];
186 m_inputStride *= input_dims[i];
187 }
188 }
189 m_inputStride *= input_dims[m_dim.actualDim()];
190 m_inputOffset = m_stride * op.offset();
191 }
192
193 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
194
195 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
196 m_impl.evalSubExprsIfNeeded(NULL);
197 return true;
198 }
199
200 EIGEN_STRONG_INLINE void cleanup() {
201 m_impl.cleanup();
202 }
203
204 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
205 {
206 return m_impl.coeff(srcCoeff(index));
207 }
208
209 template<int LoadMode>
210 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
211 {
212 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
213 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
214
215 if (isInnerChipping()) {
216 // m_stride is equal to 1, so let's avoid the integer division.
217 eigen_assert(m_stride == 1);
218 Index inputIndex = index * m_inputStride + m_inputOffset;
219 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
220 EIGEN_UNROLL_LOOP
221 for (int i = 0; i < PacketSize; ++i) {
222 values[i] = m_impl.coeff(inputIndex);
223 inputIndex += m_inputStride;
224 }
225 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
226 return rslt;
227 } else if (isOuterChipping()) {
228 // m_stride is always greater than index, so let's avoid the integer division.
229 eigen_assert(m_stride > index);
230 return m_impl.template packet<LoadMode>(index + m_inputOffset);
231 } else {
232 const Index idx = index / m_stride;
233 const Index rem = index - idx * m_stride;
234 if (rem + PacketSize <= m_stride) {
235 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
236 return m_impl.template packet<LoadMode>(inputIndex);
237 } else {
238 // Cross the stride boundary. Fallback to slow path.
239 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
240 EIGEN_UNROLL_LOOP
241 for (int i = 0; i < PacketSize; ++i) {
242 values[i] = coeff(index);
243 ++index;
244 }
245 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
246 return rslt;
247 }
248 }
249 }
250
251 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
252 costPerCoeff(bool vectorized) const {
253 double cost = 0;
254 if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
255 m_dim.actualDim() == 0) ||
256 (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
257 m_dim.actualDim() == NumInputDims - 1)) {
258 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
259 } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
260 m_dim.actualDim() == NumInputDims - 1) ||
261 (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
262 m_dim.actualDim() == 0)) {
263 cost += TensorOpCost::AddCost<Index>();
264 } else {
265 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
266 3 * TensorOpCost::AddCost<Index>();
267 }
268
269 return m_impl.costPerCoeff(vectorized) +
270 TensorOpCost(0, 0, cost, vectorized, PacketSize);
271 }
272
273 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
274 internal::TensorBlockResourceRequirements getResourceRequirements() const {
275 const size_t target_size = m_device.lastLevelCacheSize();
276 return internal::TensorBlockResourceRequirements::merge(
277 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
278 m_impl.getResourceRequirements());
279 }
280
281 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
282 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
283 bool root_of_expr_ast = false) const {
284 const Index chip_dim = m_dim.actualDim();
285
286 DSizes<Index, NumInputDims> input_block_dims;
287 for (int i = 0; i < NumInputDims; ++i) {
288 input_block_dims[i]
289 = i < chip_dim ? desc.dimension(i)
290 : i > chip_dim ? desc.dimension(i - 1)
291 : 1;
292 }
293
294 ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims);
295
296 // Try to reuse destination buffer for materializing argument block.
297 if (desc.HasDestinationBuffer()) {
298 DSizes<Index, NumInputDims> arg_destination_strides;
299 for (int i = 0; i < NumInputDims; ++i) {
300 arg_destination_strides[i]
301 = i < chip_dim ? desc.destination().strides()[i]
302 : i > chip_dim ? desc.destination().strides()[i - 1]
303 : 0; // for dimensions of size `1` stride should never be used.
304 }
305
306 arg_desc.template AddDestinationBuffer<Layout>(
307 desc.destination().template data<ScalarNoConst>(),
308 arg_destination_strides);
309 }
310
311 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
312 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
313
314 if (arg_block.data() != NULL) {
315 // Forward argument block buffer if possible.
316 return TensorBlock(arg_block.kind(), arg_block.data(),
317 desc.dimensions());
318
319 } else {
320 // Assign argument block expression to a buffer.
321
322 // Prepare storage for the materialized chipping result.
323 const typename TensorBlock::Storage block_storage =
324 TensorBlock::prepareStorage(desc, scratch);
325
326 typedef internal::TensorBlockAssignment<
327 ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index>
328 TensorBlockAssignment;
329
330 TensorBlockAssignment::Run(
331 TensorBlockAssignment::target(
332 arg_desc.dimensions(),
333 internal::strides<Layout>(arg_desc.dimensions()),
334 block_storage.data()),
335 arg_block.expr());
336
337 return block_storage.AsTensorMaterializedBlock();
338 }
339 }
340
341 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
342 typename Storage::Type result = constCast(m_impl.data());
343 if (isOuterChipping() && result) {
344 return result + m_inputOffset;
345 } else {
346 return NULL;
347 }
348 }
349#ifdef EIGEN_USE_SYCL
350 // binding placeholder accessors to a command group handler for SYCL
351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
352 m_impl.bind(cgh);
353 }
354#endif
355
356 protected:
357 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
358 {
359 Index inputIndex;
360 if (isInnerChipping()) {
361 // m_stride is equal to 1, so let's avoid the integer division.
362 eigen_assert(m_stride == 1);
363 inputIndex = index * m_inputStride + m_inputOffset;
364 } else if (isOuterChipping()) {
365 // m_stride is always greater than index, so let's avoid the integer
366 // division.
367 eigen_assert(m_stride > index);
368 inputIndex = index + m_inputOffset;
369 } else {
370 const Index idx = index / m_stride;
371 inputIndex = idx * m_inputStride + m_inputOffset;
372 index -= idx * m_stride;
373 inputIndex += index;
374 }
375 return inputIndex;
376 }
377
378 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const {
379 return IsInnerChipping ||
380 (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == 0) ||
381 (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == NumInputDims - 1);
382 }
383
384 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const {
385 return IsOuterChipping ||
386 (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == NumInputDims-1) ||
387 (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == 0);
388 }
389
390 Dimensions m_dimensions;
391 Index m_stride;
392 Index m_inputOffset;
393 Index m_inputStride;
394 TensorEvaluator<ArgType, Device> m_impl;
395 const internal::DimensionId<DimId> m_dim;
396 const Device EIGEN_DEVICE_REF m_device;
397};
398
399
400// Eval as lvalue
401template<DenseIndex DimId, typename ArgType, typename Device>
402struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
403 : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
404{
405 typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
406 typedef TensorChippingOp<DimId, ArgType> XprType;
407 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
408 static const int NumDims = NumInputDims-1;
409 typedef typename XprType::Index Index;
410 typedef DSizes<Index, NumDims> Dimensions;
411 typedef typename XprType::Scalar Scalar;
412 typedef typename XprType::CoeffReturnType CoeffReturnType;
413 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
414 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
415
416 enum {
417 IsAligned = false,
418 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
419 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
420 Layout = TensorEvaluator<ArgType, Device>::Layout,
421 RawAccess = false
422 };
423
424 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
425 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
426 //===--------------------------------------------------------------------===//
427
428 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
429 : Base(op, device)
430 { }
431
432 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
433 {
434 return this->m_impl.coeffRef(this->srcCoeff(index));
435 }
436
437 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
438 void writePacket(Index index, const PacketReturnType& x)
439 {
440 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
441
442 if (this->isInnerChipping()) {
443 // m_stride is equal to 1, so let's avoid the integer division.
444 eigen_assert(this->m_stride == 1);
445 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
446 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
447 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
448 EIGEN_UNROLL_LOOP
449 for (int i = 0; i < PacketSize; ++i) {
450 this->m_impl.coeffRef(inputIndex) = values[i];
451 inputIndex += this->m_inputStride;
452 }
453 } else if (this->isOuterChipping()) {
454 // m_stride is always greater than index, so let's avoid the integer division.
455 eigen_assert(this->m_stride > index);
456 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
457 } else {
458 const Index idx = index / this->m_stride;
459 const Index rem = index - idx * this->m_stride;
460 if (rem + PacketSize <= this->m_stride) {
461 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
462 this->m_impl.template writePacket<StoreMode>(inputIndex, x);
463 } else {
464 // Cross stride boundary. Fallback to slow path.
465 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
466 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
467 EIGEN_UNROLL_LOOP
468 for (int i = 0; i < PacketSize; ++i) {
469 this->coeffRef(index) = values[i];
470 ++index;
471 }
472 }
473 }
474 }
475
476 template <typename TensorBlock>
477 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
478 const TensorBlockDesc& desc, const TensorBlock& block) {
479 assert(this->m_impl.data() != NULL);
480
481 const Index chip_dim = this->m_dim.actualDim();
482
483 DSizes<Index, NumInputDims> input_block_dims;
484 for (int i = 0; i < NumInputDims; ++i) {
485 input_block_dims[i] = i < chip_dim ? desc.dimension(i)
486 : i > chip_dim ? desc.dimension(i - 1)
487 : 1;
488 }
489
490 typedef TensorReshapingOp<const DSizes<Index, NumInputDims>,
491 const typename TensorBlock::XprType>
492 TensorBlockExpr;
493
494 typedef internal::TensorBlockAssignment<Scalar, NumInputDims,
495 TensorBlockExpr, Index>
496 TensorBlockAssign;
497
498 TensorBlockAssign::Run(
499 TensorBlockAssign::target(
500 input_block_dims,
501 internal::strides<Layout>(this->m_impl.dimensions()),
502 this->m_impl.data(), this->srcCoeff(desc.offset())),
503 block.expr().reshape(input_block_dims));
504 }
505};
506
507
508} // end namespace Eigen
509
510#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
The tensor base class.
Definition TensorForwardDeclarations.h:56
Definition TensorChipping.h:73
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
const int Dynamic
The tensor evaluator class.
Definition TensorEvaluator.h:27