10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
14#include "./InternalHeaderCheck.h"
19template <
typename NewDimensions,
typename XprType>
20struct traits<TensorReshapingOp<NewDimensions, XprType>> :
public traits<XprType> {
21 typedef typename XprType::Scalar Scalar;
22 typedef traits<XprType> XprTraits;
23 typedef typename XprTraits::StorageKind StorageKind;
24 typedef typename XprTraits::Index
Index;
25 typedef typename XprType::Nested Nested;
26 typedef std::remove_reference_t<Nested> Nested_;
27 static constexpr int NumDimensions = array_size<NewDimensions>::value;
28 static constexpr int Layout = XprTraits::Layout;
29 typedef typename XprTraits::PointerType PointerType;
32template <
typename NewDimensions,
typename XprType>
33struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> {
34 typedef const TensorReshapingOp<NewDimensions, XprType> EIGEN_DEVICE_REF type;
37template <
typename NewDimensions,
typename XprType>
38struct nested<TensorReshapingOp<NewDimensions, XprType>, 1,
39 typename eval<TensorReshapingOp<NewDimensions, XprType>>::type> {
40 typedef TensorReshapingOp<NewDimensions, XprType> type;
50template <
typename NewDimensions,
typename XprType>
51class TensorReshapingOp :
public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> {
54 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
55 typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
56 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
57 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
58 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(
const XprType& expr,
const NewDimensions& dims)
61 : m_xpr(expr), m_dims(dims) {}
63 EIGEN_DEVICE_FUNC
const NewDimensions& dimensions()
const {
return m_dims; }
65 EIGEN_DEVICE_FUNC
const internal::remove_all_t<typename XprType::Nested>& expression()
const {
return m_xpr; }
67 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
70 typename XprType::Nested m_xpr;
71 const NewDimensions m_dims;
75template <
typename NewDimensions,
typename ArgType,
typename Device>
80 typedef typename XprType::Index
Index;
81 typedef typename XprType::Scalar
Scalar;
83 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
84 typedef StorageMemory<CoeffReturnType, Device> Storage;
85 typedef typename Storage::Type EvaluatorPointerType;
86 typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
88 static constexpr int NumOutputDims = internal::array_size<Dimensions>::value;
89 static constexpr int NumInputDims =
90 internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
101 static const ReshapingKind kind =
102 (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(0, 1)) ? OneByN
103 : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(1, 1)) ? NByOne
107 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
109 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
110 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
114 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess && NumInputDims > 0 && NumOutputDims > 0,
115 PreferBlockAccess =
false,
117 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
120 typedef std::remove_const_t<Scalar> ScalarNoConst;
123 typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
124 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
126 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims, Layout, Index> TensorBlock;
129 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
130 : m_impl(op.expression(), device), m_dimensions(op.dimensions()) {
133 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
136 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
138#ifdef EIGEN_USE_THREADS
139 template <
typename EvalSubExprsCallback>
140 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(EvaluatorPointerType data, EvalSubExprsCallback done) {
141 m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
145 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data); }
146 EIGEN_STRONG_INLINE
void cleanup() { m_impl.cleanup(); }
148 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
return m_impl.coeff(index); }
150 template <
int LoadMode>
151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const {
152 return m_impl.template packet<LoadMode>(index);
155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
156 return m_impl.costPerCoeff(vectorized);
159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements()
const {
160 return internal::TensorBlockResourceRequirements::any();
165 struct BlockIteratorState {
172 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
173 bool =
false)
const {
174 eigen_assert(m_impl.data() != NULL);
175 eigen_assert((kind == Runtime) || (kind == OneByN && desc.dimensions()[0] == 1) ||
176 (kind == NByOne && desc.dimensions()[1] == 1));
178 if (kind == OneByN || kind == NByOne) {
181 return TensorBlock(internal::TensorBlockKind::kView, m_impl.data() + desc.offset(), desc.dimensions());
185 return TensorBlock::materialize(m_impl.data(), m_dimensions, desc, scratch);
189 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return constCast(m_impl.data()); }
191 EIGEN_DEVICE_FUNC
const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
194 TensorEvaluator<ArgType, Device> m_impl;
195 NewDimensions m_dimensions;
199template <
typename NewDimensions,
typename ArgType,
typename Device>
201 :
public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
204 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
205 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
206 typedef NewDimensions Dimensions;
208 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
210 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
211 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
212 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
213 PreferBlockAccess =
false,
215 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
218 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device) : Base(op, device) {}
220 typedef typename XprType::Index Index;
221 typedef typename XprType::Scalar Scalar;
222 typedef typename XprType::CoeffReturnType CoeffReturnType;
223 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
226 typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index> TensorBlockDesc;
229 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
const {
230 return this->m_impl.coeffRef(index);
233 template <
int StoreMode>
234 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index,
const PacketReturnType& x)
const {
235 this->m_impl.template writePacket<StoreMode>(index, x);
238 template <
typename TensorBlock>
239 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
const TensorBlockDesc& desc,
const TensorBlock& block) {
240 eigen_assert(this->m_impl.data() != NULL);
242 typedef typename TensorBlock::XprType TensorBlockExpr;
243 typedef internal::TensorBlockAssignment<Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index>
246 TensorBlockAssign::Run(TensorBlockAssign::target(desc.dimensions(), internal::strides<Layout>(this->dimensions()),
247 this->m_impl.data(), desc.offset()),
260template <
typename StartIndices,
typename Sizes,
typename XprType>
261struct traits<TensorSlicingOp<StartIndices, Sizes, XprType>> :
public traits<XprType> {
262 typedef typename XprType::Scalar Scalar;
263 typedef traits<XprType> XprTraits;
264 typedef typename XprTraits::StorageKind StorageKind;
265 typedef typename XprTraits::Index Index;
266 typedef typename XprType::Nested Nested;
267 typedef std::remove_reference_t<Nested> Nested_;
268 static constexpr int NumDimensions = array_size<StartIndices>::value;
269 static constexpr int Layout = XprTraits::Layout;
270 typedef typename XprTraits::PointerType PointerType;
273template <
typename StartIndices,
typename Sizes,
typename XprType>
274struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>,
Eigen::Dense> {
275 typedef const TensorSlicingOp<StartIndices, Sizes, XprType> EIGEN_DEVICE_REF type;
278template <
typename StartIndices,
typename Sizes,
typename XprType>
279struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1,
280 typename eval<TensorSlicingOp<StartIndices, Sizes, XprType>>::type> {
281 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
286template <
typename StartIndices,
typename Sizes,
typename XprType>
287class TensorSlicingOp :
public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType>> {
289 typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType>> Base;
290 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
291 typedef typename XprType::CoeffReturnType CoeffReturnType;
292 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
293 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
294 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
296 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(
const XprType& expr,
const StartIndices& indices,
298 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
300 EIGEN_DEVICE_FUNC
const StartIndices& startIndices()
const {
return m_indices; }
301 EIGEN_DEVICE_FUNC
const Sizes& sizes()
const {
return m_sizes; }
303 EIGEN_DEVICE_FUNC
const internal::remove_all_t<typename XprType::Nested>& expression()
const {
return m_xpr; }
305 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
308 typename XprType::Nested m_xpr;
309 const StartIndices m_indices;
316template <
typename Index,
typename Device,
bool BlockAccess>
317struct MemcpyTriggerForSlicing {
318 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const Device& device) : threshold_(2 * device.numThreads()) {}
319 EIGEN_DEVICE_FUNC
bool operator()(
Index total,
Index contiguous)
const {
320 const bool prefer_block_evaluation = BlockAccess && total > 32 * 1024;
321 return !prefer_block_evaluation && contiguous > threshold_;
331template <
typename Index,
bool BlockAccess>
332struct MemcpyTriggerForSlicing<
Index, GpuDevice, BlockAccess> {
333 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const GpuDevice&) {}
334 EIGEN_DEVICE_FUNC
bool operator()(
Index,
Index contiguous)
const {
return contiguous > 4 * 1024 * 1024; }
341template <
typename Index,
bool BlockAccess>
342struct MemcpyTriggerForSlicing<
Index, Eigen::SyclDevice, BlockAccess> {
343 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const SyclDevice&) {}
344 EIGEN_DEVICE_FUNC
bool operator()(
Index,
Index contiguous)
const {
return contiguous > 4 * 1024 * 1024; }
351template <
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
352struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> {
353 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
354 static constexpr int NumDims = internal::array_size<Sizes>::value;
356 typedef typename XprType::Index Index;
357 typedef typename XprType::Scalar Scalar;
358 typedef typename XprType::CoeffReturnType CoeffReturnType;
359 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
360 typedef Sizes Dimensions;
361 typedef StorageMemory<CoeffReturnType, Device> Storage;
362 typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
363 typedef typename Storage::Type EvaluatorPointerType;
365 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
370 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
371 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
373 !internal::is_same<std::remove_const_t<Scalar>,
bool>::value,
374 PreferBlockAccess =
true,
379 typedef std::remove_const_t<Scalar> ScalarNoConst;
382 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
383 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
386 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock TensorBlock;
389 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
390 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) {
391 m_is_identity =
true;
392 for (
int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
393 eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
394 if (m_impl.dimensions()[i] != op.sizes()[i] || op.startIndices()[i] != 0) {
395 m_is_identity =
false;
400 if (NumDims == 0)
return;
402 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
403 const Sizes& output_dims = op.sizes();
404 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
405 m_inputStrides[0] = 1;
406 for (
int i = 1; i < NumDims; ++i) {
407 m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
411 m_outputStrides[0] = 1;
412 for (
int i = 1; i < NumDims; ++i) {
413 m_outputStrides[i] = m_outputStrides[i - 1] * output_dims[i - 1];
414 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
417 m_inputStrides[NumDims - 1] = 1;
418 for (
int i = NumDims - 2; i >= 0; --i) {
419 m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
423 m_outputStrides[NumDims - 1] = 1;
424 for (
int i = NumDims - 2; i >= 0; --i) {
425 m_outputStrides[i] = m_outputStrides[i + 1] * output_dims[i + 1];
426 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
431 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
433 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
434 m_impl.evalSubExprsIfNeeded(NULL);
435 if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data && m_impl.data()) {
436 Index contiguous_values = 1;
437 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
438 for (
int i = 0; i < NumDims; ++i) {
439 contiguous_values *= dimensions()[i];
440 if (dimensions()[i] != m_impl.dimensions()[i]) {
445 for (
int i = NumDims - 1; i >= 0; --i) {
446 contiguous_values *= dimensions()[i];
447 if (dimensions()[i] != m_impl.dimensions()[i]) {
453 const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
454 if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
455 EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
456 for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
457 Index offset = srcCoeff(i);
458 m_device.memcpy((
void*)(m_device.get(data + i)), m_device.get(src + offset),
459 contiguous_values *
sizeof(Scalar));
467#ifdef EIGEN_USE_THREADS
468 template <
typename EvalSubExprsCallback>
469 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(EvaluatorPointerType , EvalSubExprsCallback done) {
470 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
474 EIGEN_STRONG_INLINE
void cleanup() { m_impl.cleanup(); }
476 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
478 return m_impl.coeff(index);
480 return m_impl.coeff(srcCoeff(index));
484 template <
int LoadMode>
485 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const {
486 const int packetSize = PacketType<CoeffReturnType, Device>::size;
487 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
488 eigen_assert(index + packetSize - 1 < internal::array_prod(dimensions()));
491 return m_impl.template packet<LoadMode>(index);
494 Index inputIndices[] = {0, 0};
495 Index indices[] = {index, index + packetSize - 1};
496 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
498 for (
int i = NumDims - 1; i > 0; --i) {
499 const Index idx0 = indices[0] / m_fastOutputStrides[i];
500 const Index idx1 = indices[1] / m_fastOutputStrides[i];
501 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
502 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
503 indices[0] -= idx0 * m_outputStrides[i];
504 indices[1] -= idx1 * m_outputStrides[i];
506 inputIndices[0] += (indices[0] + m_offsets[0]);
507 inputIndices[1] += (indices[1] + m_offsets[0]);
510 for (
int i = 0; i < NumDims - 1; ++i) {
511 const Index idx0 = indices[0] / m_fastOutputStrides[i];
512 const Index idx1 = indices[1] / m_fastOutputStrides[i];
513 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
514 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
515 indices[0] -= idx0 * m_outputStrides[i];
516 indices[1] -= idx1 * m_outputStrides[i];
518 inputIndices[0] += (indices[0] + m_offsets[NumDims - 1]);
519 inputIndices[1] += (indices[1] + m_offsets[NumDims - 1]);
521 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
522 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
525 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
526 values[0] = m_impl.coeff(inputIndices[0]);
527 values[packetSize - 1] = m_impl.coeff(inputIndices[1]);
529 for (
int i = 1; i < packetSize - 1; ++i) {
530 values[i] = coeff(index + i);
532 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
537 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
538 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
541 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements()
const {
542 const size_t target_size = m_device.lastLevelCacheSize();
543 return internal::TensorBlockResourceRequirements::merge(
544 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), m_impl.getResourceRequirements());
547 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
548 bool =
false)
const {
549 TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
550 TensorBlock block = m_impl.block(arg_desc, scratch);
551 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
555 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
556 typename Storage::Type result = constCast(m_impl.data());
559 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
560 for (
int i = 0; i < NumDims; ++i) {
561 if (m_dimensions[i] != m_impl.dimensions()[i]) {
562 offset += m_offsets[i] * m_inputStrides[i];
563 for (
int j = i + 1; j < NumDims; ++j) {
564 if (m_dimensions[j] > 1) {
567 offset += m_offsets[j] * m_inputStrides[j];
573 for (
int i = NumDims - 1; i >= 0; --i) {
574 if (m_dimensions[i] != m_impl.dimensions()[i]) {
575 offset += m_offsets[i] * m_inputStrides[i];
576 for (
int j = i - 1; j >= 0; --j) {
577 if (m_dimensions[j] > 1) {
580 offset += m_offsets[j] * m_inputStrides[j];
586 return result + offset;
592 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const {
593 Index inputIndex = 0;
594 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
596 for (
int i = NumDims - 1; i > 0; --i) {
597 const Index idx = index / m_fastOutputStrides[i];
598 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
599 index -= idx * m_outputStrides[i];
601 inputIndex += (index + m_offsets[0]);
604 for (
int i = 0; i < NumDims - 1; ++i) {
605 const Index idx = index / m_fastOutputStrides[i];
606 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
607 index -= idx * m_outputStrides[i];
609 inputIndex += (index + m_offsets[NumDims - 1]);
614 array<Index, NumDims> m_outputStrides;
615 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
616 array<Index, NumDims> m_inputStrides;
617 TensorEvaluator<ArgType, Device> m_impl;
618 const Device EIGEN_DEVICE_REF m_device;
619 Dimensions m_dimensions;
621 const StartIndices m_offsets;
625template <
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
626struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
627 :
public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> {
628 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
629 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
630 static constexpr int NumDims = internal::array_size<Sizes>::value;
632 typedef typename XprType::Index Index;
633 typedef typename XprType::Scalar Scalar;
634 typedef typename XprType::CoeffReturnType CoeffReturnType;
635 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
636 typedef Sizes Dimensions;
638 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
641 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
642 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
643 PreferBlockAccess =
true,
645 RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
648 typedef std::remove_const_t<Scalar> ScalarNoConst;
651 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
652 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
655 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device) : Base(op, device) {}
657 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
const {
658 if (this->m_is_identity) {
659 return this->m_impl.coeffRef(index);
661 return this->m_impl.coeffRef(this->srcCoeff(index));
665 template <
int StoreMode>
666 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index,
const PacketReturnType& x)
const {
667 if (this->m_is_identity) {
668 this->m_impl.template writePacket<StoreMode>(index, x);
672 const int packetSize = PacketType<CoeffReturnType, Device>::size;
673 Index inputIndices[] = {0, 0};
674 Index indices[] = {index, index + packetSize - 1};
675 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
677 for (
int i = NumDims - 1; i > 0; --i) {
678 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
679 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
680 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
681 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
682 indices[0] -= idx0 * this->m_outputStrides[i];
683 indices[1] -= idx1 * this->m_outputStrides[i];
685 inputIndices[0] += (indices[0] + this->m_offsets[0]);
686 inputIndices[1] += (indices[1] + this->m_offsets[0]);
689 for (
int i = 0; i < NumDims - 1; ++i) {
690 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
691 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
692 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
693 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
694 indices[0] -= idx0 * this->m_outputStrides[i];
695 indices[1] -= idx1 * this->m_outputStrides[i];
697 inputIndices[0] += (indices[0] + this->m_offsets[NumDims - 1]);
698 inputIndices[1] += (indices[1] + this->m_offsets[NumDims - 1]);
700 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
701 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
703 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
704 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
705 this->m_impl.coeffRef(inputIndices[0]) = values[0];
706 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize - 1];
708 for (
int i = 1; i < packetSize - 1; ++i) {
709 this->coeffRef(index + i) = values[i];
714 template <
typename TensorBlock>
715 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
const TensorBlockDesc& desc,
const TensorBlock& block) {
716 TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
717 this->m_impl.writeBlock(arg_desc, block);
722template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
723struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>> :
public traits<XprType> {
724 typedef typename XprType::Scalar Scalar;
725 typedef traits<XprType> XprTraits;
726 typedef typename XprTraits::StorageKind StorageKind;
727 typedef typename XprTraits::Index
Index;
728 typedef typename XprType::Nested Nested;
729 typedef std::remove_reference_t<Nested> Nested_;
730 static constexpr int NumDimensions = array_size<StartIndices>::value;
731 static constexpr int Layout = XprTraits::Layout;
732 typedef typename XprTraits::PointerType PointerType;
735template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
736struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> {
737 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> EIGEN_DEVICE_REF type;
740template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
741struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1,
742 typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>>::type> {
743 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
748template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
749class TensorStridingSlicingOp
750 :
public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>> {
752 typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>> Base;
753 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
754 typedef typename XprType::CoeffReturnType CoeffReturnType;
755 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
756 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
757 typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
759 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
const XprType& expr,
const StartIndices& startIndices,
760 const StopIndices& stopIndices,
const Strides& strides)
761 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), m_strides(strides) {}
763 EIGEN_DEVICE_FUNC
const StartIndices& startIndices()
const {
return m_startIndices; }
764 EIGEN_DEVICE_FUNC
const StartIndices& stopIndices()
const {
return m_stopIndices; }
765 EIGEN_DEVICE_FUNC
const StartIndices& strides()
const {
return m_strides; }
767 EIGEN_DEVICE_FUNC
const internal::remove_all_t<typename XprType::Nested>& expression()
const {
return m_xpr; }
769 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
772 typename XprType::Nested m_xpr;
773 const StartIndices m_startIndices;
774 const StopIndices m_stopIndices;
775 const Strides m_strides;
779template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
780struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> {
781 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
782 static constexpr int NumDims = internal::array_size<Strides>::value;
783 typedef typename XprType::Index Index;
784 typedef typename XprType::Scalar Scalar;
785 typedef typename XprType::CoeffReturnType CoeffReturnType;
786 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
787 typedef StorageMemory<CoeffReturnType, Device> Storage;
788 typedef typename Storage::Type EvaluatorPointerType;
789 typedef Strides Dimensions;
791 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
796 PacketAccess =
false,
798 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
803 typedef internal::TensorBlockNotImplemented TensorBlock;
806 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
807 : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) {
809 DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
810 for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
811 eigen_assert(m_strides[i] != 0 &&
"0 stride is invalid");
812 if (m_strides[i] > 0) {
813 startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
814 stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
817 startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
818 stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
820 m_startIndices[i] = startIndicesClamped[i];
823 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
824 const InputDimensions& input_dims = m_impl.dimensions();
827 m_is_identity =
true;
828 for (
int i = 0; i < NumDims; i++) {
829 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
830 if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
833 m_dimensions[i] = (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
834 eigen_assert(m_dimensions[i] >= 0);
836 if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
837 m_is_identity =
false;
841 Strides output_dims = m_dimensions;
843 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
844 m_inputStrides[0] = m_strides[0];
845 m_offsets[0] = startIndicesClamped[0];
846 Index previousDimProduct = 1;
847 for (
int i = 1; i < NumDims; ++i) {
848 previousDimProduct *= input_dims[i - 1];
849 m_inputStrides[i] = previousDimProduct * m_strides[i];
850 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
854 m_outputStrides[0] = 1;
855 for (
int i = 1; i < NumDims; ++i) {
856 m_outputStrides[i] = m_outputStrides[i - 1] * output_dims[i - 1];
857 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
860 m_inputStrides[NumDims - 1] = m_strides[NumDims - 1];
861 m_offsets[NumDims - 1] = startIndicesClamped[NumDims - 1];
862 Index previousDimProduct = 1;
863 for (
int i = NumDims - 2; i >= 0; --i) {
864 previousDimProduct *= input_dims[i + 1];
865 m_inputStrides[i] = previousDimProduct * m_strides[i];
866 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
869 m_outputStrides[NumDims - 1] = 1;
870 for (
int i = NumDims - 2; i >= 0; --i) {
871 m_outputStrides[i] = m_outputStrides[i + 1] * output_dims[i + 1];
872 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
877 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
879 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
880 m_impl.evalSubExprsIfNeeded(NULL);
884 EIGEN_STRONG_INLINE
void cleanup() { m_impl.cleanup(); }
886 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
888 return m_impl.coeff(index);
890 return m_impl.coeff(srcCoeff(index));
894 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
895 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
898 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
return NULL; }
901 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const {
902 Index inputIndex = 0;
903 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
905 for (
int i = NumDims - 1; i >= 0; --i) {
906 const Index idx = index / m_fastOutputStrides[i];
907 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
908 index -= idx * m_outputStrides[i];
912 for (
int i = 0; i < NumDims; ++i) {
913 const Index idx = index / m_fastOutputStrides[i];
914 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
915 index -= idx * m_outputStrides[i];
921 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
922#ifndef SYCL_DEVICE_ONLY
923 return numext::maxi(min, numext::mini(max, value));
925 return cl::sycl::clamp(value, min, max);
929 array<Index, NumDims> m_outputStrides;
930 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
931 array<Index, NumDims> m_inputStrides;
933 TensorEvaluator<ArgType, Device> m_impl;
934 const Device EIGEN_DEVICE_REF m_device;
935 DSizes<Index, NumDims> m_startIndices;
936 DSizes<Index, NumDims> m_dimensions;
937 DSizes<Index, NumDims> m_offsets;
938 const Strides m_strides;
942template <
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
943struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
944 :
public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> {
945 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
946 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
947 static constexpr int NumDims = internal::array_size<Strides>::value;
948 static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
952 PacketAccess =
false,
954 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
955 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
960 typedef internal::TensorBlockNotImplemented TensorBlock;
963 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device) : Base(op, device) {}
965 typedef typename XprType::Index Index;
966 typedef typename XprType::Scalar Scalar;
967 typedef typename XprType::CoeffReturnType CoeffReturnType;
968 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
969 typedef Strides Dimensions;
971 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
const {
972 if (this->m_is_identity) {
973 return this->m_impl.coeffRef(index);
975 return this->m_impl.coeffRef(this->srcCoeff(index));
The tensor base class.
Definition TensorForwardDeclarations.h:68
Tensor reshaping class.
Definition TensorMorphing.h:51
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:30