10#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
16template<
typename Shuffle,
typename XprType>
17struct traits<TensorShufflingOp<Shuffle, XprType> > :
public traits<XprType>
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index
Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = XprTraits::NumDimensions;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
30template<
typename Shuffle,
typename XprType>
31struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense>
33 typedef const TensorShufflingOp<Shuffle, XprType>& type;
36template<
typename Shuffle,
typename XprType>
37struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
39 typedef TensorShufflingOp<Shuffle, XprType> type;
49template <
typename Shuffle,
typename XprType>
50class TensorShufflingOp :
public TensorBase<TensorShufflingOp<Shuffle, XprType> > {
53 typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
55 typedef typename XprType::CoeffReturnType CoeffReturnType;
56 typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
57 typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
58 typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(
const XprType& expr,
const Shuffle& shfl)
61 : m_xpr(expr), m_shuffle(shfl) {}
64 const Shuffle& shufflePermutation()
const {
return m_shuffle; }
67 const typename internal::remove_all<typename XprType::Nested>::type&
68 expression()
const {
return m_xpr; }
70 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorShufflingOp)
74 typename XprType::Nested m_xpr;
75 const Shuffle m_shuffle;
80template<
typename Shuffle,
typename ArgType,
typename Device>
85 typedef typename XprType::Index
Index;
86 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
88 typedef typename XprType::Scalar
Scalar;
90 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
91 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
92 typedef StorageMemory<CoeffReturnType, Device> Storage;
93 typedef typename Storage::Type EvaluatorPointerType;
97 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
99 PreferBlockAccess =
true,
100 Layout = TensorEvaluator<ArgType, Device>::Layout,
105 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
108 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
109 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
111 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
116 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
118 m_impl(op.expression(), device)
120 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
121 const Shuffle& shuffle = op.shufflePermutation();
122 m_is_identity =
true;
123 for (
int i = 0; i < NumDims; ++i) {
124 m_shuffle[i] =
static_cast<int>(shuffle[i]);
125 m_dimensions[i] = input_dims[shuffle[i]];
126 m_inverseShuffle[shuffle[i]] = i;
127 if (m_is_identity && shuffle[i] != i) {
128 m_is_identity =
false;
132 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
133 m_unshuffledInputStrides[0] = 1;
134 m_outputStrides[0] = 1;
136 for (
int i = 1; i < NumDims; ++i) {
137 m_unshuffledInputStrides[i] =
138 m_unshuffledInputStrides[i - 1] * input_dims[i - 1];
139 m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
140 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
141 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
144 m_unshuffledInputStrides[NumDims - 1] = 1;
145 m_outputStrides[NumDims - 1] = 1;
146 for (
int i = NumDims - 2; i >= 0; --i) {
147 m_unshuffledInputStrides[i] =
148 m_unshuffledInputStrides[i + 1] * input_dims[i + 1];
149 m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
150 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(
151 m_outputStrides[i] > 0 ? m_outputStrides[i] : Index(1));
155 for (
int i = 0; i < NumDims; ++i) {
156 m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
160 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
162 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType ) {
163 m_impl.evalSubExprsIfNeeded(NULL);
167#ifdef EIGEN_USE_THREADS
168 template <
typename EvalSubExprsCallback>
169 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
170 EvaluatorPointerType, EvalSubExprsCallback done) {
171 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
175 EIGEN_STRONG_INLINE
void cleanup() {
179 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
182 return m_impl.coeff(index);
184 return m_impl.coeff(srcCoeff(index));
188 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
189 struct PacketLoader {
190 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
191 static PacketReturnType Run(
const Self& self, Index index) {
192 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
194 for (
int i = 0; i < PacketSize; ++i) {
195 values[i] = self.coeff(index + i);
197 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
202 template<
int LoadMode,
typename Self>
203 struct PacketLoader<LoadMode, Self, true> {
204 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
205 static PacketReturnType Run(
const Self& self, Index index) {
206 if (self.m_is_identity) {
207 return self.m_impl.template packet<LoadMode>(index);
209 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
211 for (
int i = 0; i < PacketSize; ++i) {
212 values[i] = self.coeff(index + i);
214 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
220 template<
int LoadMode>
221 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
223 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
224 eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
225 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
229 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
230 static const int inner_dim =
231 Layout ==
static_cast<int>(
ColMajor) ? 0 : NumDims - 1;
233 const size_t target_size = m_device.firstLevelCacheSize();
234 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
241 using BlockRequirements = internal::TensorBlockResourceRequirements;
242 if (inner_dim_shuffled) {
243 return BlockRequirements::uniform<Scalar>(target_size)
244 .addCostPerCoeff({0, 0, NumDims * 28});
246 return BlockRequirements::skewed<Scalar>(target_size);
250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
251 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
252 bool root_of_expr_ast =
false)
const {
253 assert(m_impl.data() != NULL);
255 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
257 typedef typename TensorBlockIO::Dst TensorBlockIODst;
258 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
260 const typename TensorBlock::Storage block_storage =
261 TensorBlock::prepareStorage(
262 desc, scratch, root_of_expr_ast);
264 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
265 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.offset()));
267 TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(),
268 block_storage.data());
270 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
271 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
273 return block_storage.AsTensorMaterializedBlock();
276 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
277 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
278 NumDims * (2 * TensorOpCost::AddCost<Index>() +
279 2 * TensorOpCost::MulCost<Index>() +
280 TensorOpCost::DivCost<Index>());
281 return m_impl.costPerCoeff(vectorized) +
282 TensorOpCost(0, 0, compute_cost, m_is_identity , PacketSize);
285 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
return NULL; }
289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
294 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
296 const DSizes<Index, NumDims>& input_block_strides,
297 const DSizes<Index, NumDims>& output_block_strides,
298 const DSizes<internal::TensorIntDivisor<Index>, NumDims>& fast_input_block_strides)
const {
299 Index output_index = 0;
300 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
301 for (
int i = NumDims - 1; i > 0; --i) {
302 const Index idx = input_index / fast_input_block_strides[i];
303 output_index += idx * output_block_strides[m_inverseShuffle[i]];
304 input_index -= idx * input_block_strides[i];
306 return output_index + input_index *
307 output_block_strides[m_inverseShuffle[0]];
309 for (
int i = 0; i < NumDims - 1; ++i) {
310 const Index idx = input_index / fast_input_block_strides[i];
311 output_index += idx * output_block_strides[m_inverseShuffle[i]];
312 input_index -= idx * input_block_strides[i];
314 return output_index + input_index *
315 output_block_strides[m_inverseShuffle[NumDims - 1]];
319 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const {
320 Index inputIndex = 0;
321 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
322 for (
int i = NumDims - 1; i > 0; --i) {
323 const Index idx = index / m_fastOutputStrides[i];
324 inputIndex += idx * m_inputStrides[i];
325 index -= idx * m_outputStrides[i];
327 return inputIndex + index * m_inputStrides[0];
329 for (
int i = 0; i < NumDims - 1; ++i) {
330 const Index idx = index / m_fastOutputStrides[i];
331 inputIndex += idx * m_inputStrides[i];
332 index -= idx * m_outputStrides[i];
334 return inputIndex + index * m_inputStrides[NumDims - 1];
338 Dimensions m_dimensions;
340 array<int, NumDims> m_shuffle;
341 array<Index, NumDims> m_inverseShuffle;
342 array<Index, NumDims> m_outputStrides;
343 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
344 array<Index, NumDims> m_inputStrides;
345 array<Index, NumDims> m_unshuffledInputStrides;
347 const Device EIGEN_DEVICE_REF m_device;
348 TensorEvaluator<ArgType, Device> m_impl;
353template<
typename Shuffle,
typename ArgType,
typename Device>
355 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
357 typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
359 typedef TensorShufflingOp<Shuffle, ArgType> XprType;
360 typedef typename XprType::Index Index;
361 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
362 typedef DSizes<Index, NumDims> Dimensions;
363 typedef typename XprType::Scalar Scalar;
364 typedef typename XprType::CoeffReturnType CoeffReturnType;
365 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
366 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
370 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
371 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
372 PreferBlockAccess =
true,
373 Layout = TensorEvaluator<ArgType, Device>::Layout,
377 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
380 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
383 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
387 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
389 return this->m_impl.coeffRef(this->srcCoeff(index));
392 template <
int StoreMode> EIGEN_STRONG_INLINE
393 void writePacket(Index index,
const PacketReturnType& x)
395 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
397 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
398 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
400 for (
int i = 0; i < PacketSize; ++i) {
401 this->coeffRef(index+i) = values[i];
405 template <
typename TensorBlock>
406 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
407 const TensorBlockDesc& desc,
const TensorBlock& block) {
408 eigen_assert(this->m_impl.data() != NULL);
410 typedef internal::TensorBlockIO<ScalarNoConst, Index, NumDims, Layout>
412 typedef typename TensorBlockIO::Dst TensorBlockIODst;
413 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
415 const Scalar* block_buffer = block.data();
420 if (block_buffer == NULL) {
421 mem = this->m_device.allocate(desc.size() *
sizeof(Scalar));
422 ScalarNoConst* buf =
static_cast<ScalarNoConst*
>(mem);
424 typedef internal::TensorBlockAssignment<
425 ScalarNoConst, NumDims,
typename TensorBlock::XprType, Index>
426 TensorBlockAssignment;
428 TensorBlockAssignment::Run(
429 TensorBlockAssignment::target(
430 desc.dimensions(), internal::strides<Layout>(desc.dimensions()),
438 TensorBlockIOSrc src(internal::strides<Layout>(desc.dimensions()),
442 typename TensorBlockIO::Dimensions output_strides(
443 this->m_unshuffledInputStrides);
444 typename TensorBlockIO::Dimensions output_dimensions;
445 for (
int i = 0; i < NumDims; ++i) {
446 output_dimensions[this->m_shuffle[i]] = desc.dimension(i);
448 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
449 this->srcCoeff(desc.offset()));
452 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
453 for (
int i = 0; i < NumDims; ++i) {
454 dst_to_src_dim_map[i] =
static_cast<int>(this->m_inverseShuffle[i]);
456 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
459 if (mem != NULL) this->m_device.deallocate(mem);
The tensor base class.
Definition TensorForwardDeclarations.h:56
Tensor shuffling class.
Definition TensorShuffling.h:50
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27