10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
16template<DenseIndex DimId,
typename XprType>
17struct traits<TensorChippingOp<DimId, XprType> > :
public traits<XprType>
19 typedef typename XprType::Scalar Scalar;
20 typedef traits<XprType> XprTraits;
21 typedef typename XprTraits::StorageKind StorageKind;
22 typedef typename XprTraits::Index
Index;
23 typedef typename XprType::Nested Nested;
24 typedef typename remove_reference<Nested>::type _Nested;
25 static const int NumDimensions = XprTraits::NumDimensions - 1;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
30template<DenseIndex DimId,
typename XprType>
31struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
33 typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
36template<DenseIndex DimId,
typename XprType>
37struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
39 typedef TensorChippingOp<DimId, XprType> type;
42template <DenseIndex DimId>
45 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
46 EIGEN_UNUSED_VARIABLE(dim);
47 eigen_assert(dim == DimId);
49 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
56 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
57 eigen_assert(dim >= 0);
59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
63 const DenseIndex actual_dim;
72template <DenseIndex DimId,
typename XprType>
73class TensorChippingOp :
public TensorBase<TensorChippingOp<DimId, XprType> > {
76 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
78 typedef typename XprType::CoeffReturnType CoeffReturnType;
79 typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
80 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
81 typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
83 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(
const XprType& expr,
const Index offset,
const Index dim)
84 : m_xpr(expr), m_offset(offset), m_dim(dim) {
88 const Index offset()
const {
return m_offset; }
90 const Index dim()
const {
return m_dim.actualDim(); }
93 const typename internal::remove_all<typename XprType::Nested>::type&
94 expression()
const {
return m_xpr; }
96 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
99 typename XprType::Nested m_xpr;
100 const Index m_offset;
101 const internal::DimensionId<DimId> m_dim;
106template<DenseIndex DimId,
typename ArgType,
typename Device>
110 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
111 static const int NumDims = NumInputDims-1;
112 typedef typename XprType::Index
Index;
114 typedef typename XprType::Scalar
Scalar;
116 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
117 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
118 typedef StorageMemory<CoeffReturnType, Device> Storage;
119 typedef typename Storage::Type EvaluatorPointerType;
125 Layout = TensorEvaluator<ArgType, Device>::Layout,
126 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
127 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
130 IsOuterChipping = (
static_cast<int>(Layout) ==
ColMajor && DimId == NumInputDims - 1) ||
131 (
static_cast<int>(Layout) ==
RowMajor && DimId == 0),
133 IsInnerChipping = (
static_cast<int>(Layout) ==
ColMajor && DimId == 0) ||
134 (
static_cast<int>(Layout) ==
RowMajor && DimId == NumInputDims - 1),
137 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
143 typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
146 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
147 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
149 typedef internal::TensorBlockDescriptor<NumInputDims, Index>
151 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
154 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
159 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
160 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
162 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
163 eigen_assert(NumInputDims > m_dim.actualDim());
165 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
166 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
169 for (
int i = 0; i < NumInputDims; ++i) {
170 if (i != m_dim.actualDim()) {
171 m_dimensions[j] = input_dims[i];
178 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
179 for (
int i = 0; i < m_dim.actualDim(); ++i) {
180 m_stride *= input_dims[i];
181 m_inputStride *= input_dims[i];
184 for (
int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
185 m_stride *= input_dims[i];
186 m_inputStride *= input_dims[i];
189 m_inputStride *= input_dims[m_dim.actualDim()];
190 m_inputOffset = m_stride * op.offset();
193 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
195 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
196 m_impl.evalSubExprsIfNeeded(NULL);
200 EIGEN_STRONG_INLINE
void cleanup() {
204 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
206 return m_impl.coeff(srcCoeff(index));
209 template<
int LoadMode>
210 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
212 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
213 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
215 if (isInnerChipping()) {
217 eigen_assert(m_stride == 1);
218 Index inputIndex = index * m_inputStride + m_inputOffset;
219 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
221 for (
int i = 0; i < PacketSize; ++i) {
222 values[i] = m_impl.coeff(inputIndex);
223 inputIndex += m_inputStride;
225 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
227 }
else if (isOuterChipping()) {
229 eigen_assert(m_stride > index);
230 return m_impl.template packet<LoadMode>(index + m_inputOffset);
232 const Index idx = index / m_stride;
233 const Index rem = index - idx * m_stride;
234 if (rem + PacketSize <= m_stride) {
235 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
236 return m_impl.template packet<LoadMode>(inputIndex);
239 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
241 for (
int i = 0; i < PacketSize; ++i) {
242 values[i] = coeff(index);
245 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
251 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
252 costPerCoeff(
bool vectorized)
const {
254 if ((
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor) &&
255 m_dim.actualDim() == 0) ||
256 (
static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
257 m_dim.actualDim() == NumInputDims - 1)) {
258 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
259 }
else if ((
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor) &&
260 m_dim.actualDim() == NumInputDims - 1) ||
261 (
static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
262 m_dim.actualDim() == 0)) {
263 cost += TensorOpCost::AddCost<Index>();
265 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
266 3 * TensorOpCost::AddCost<Index>();
269 return m_impl.costPerCoeff(vectorized) +
270 TensorOpCost(0, 0, cost, vectorized, PacketSize);
273 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
274 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
275 const size_t target_size = m_device.lastLevelCacheSize();
276 return internal::TensorBlockResourceRequirements::merge(
277 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
278 m_impl.getResourceRequirements());
281 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
282 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
283 bool root_of_expr_ast =
false)
const {
284 const Index chip_dim = m_dim.actualDim();
286 DSizes<Index, NumInputDims> input_block_dims;
287 for (
int i = 0; i < NumInputDims; ++i) {
289 = i < chip_dim ? desc.dimension(i)
290 : i > chip_dim ? desc.dimension(i - 1)
294 ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims);
297 if (desc.HasDestinationBuffer()) {
298 DSizes<Index, NumInputDims> arg_destination_strides;
299 for (
int i = 0; i < NumInputDims; ++i) {
300 arg_destination_strides[i]
301 = i < chip_dim ? desc.destination().strides()[i]
302 : i > chip_dim ? desc.destination().strides()[i - 1]
306 arg_desc.template AddDestinationBuffer<Layout>(
307 desc.destination().template data<ScalarNoConst>(),
308 arg_destination_strides);
311 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
312 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
314 if (arg_block.data() != NULL) {
316 return TensorBlock(arg_block.kind(), arg_block.data(),
323 const typename TensorBlock::Storage block_storage =
324 TensorBlock::prepareStorage(desc, scratch);
326 typedef internal::TensorBlockAssignment<
327 ScalarNoConst, NumInputDims,
typename ArgTensorBlock::XprType, Index>
328 TensorBlockAssignment;
330 TensorBlockAssignment::Run(
331 TensorBlockAssignment::target(
332 arg_desc.dimensions(),
333 internal::strides<Layout>(arg_desc.dimensions()),
334 block_storage.data()),
337 return block_storage.AsTensorMaterializedBlock();
341 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
342 typename Storage::Type result = constCast(m_impl.data());
343 if (isOuterChipping() && result) {
344 return result + m_inputOffset;
351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
357 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index)
const
360 if (isInnerChipping()) {
362 eigen_assert(m_stride == 1);
363 inputIndex = index * m_inputStride + m_inputOffset;
364 }
else if (isOuterChipping()) {
367 eigen_assert(m_stride > index);
368 inputIndex = index + m_inputOffset;
370 const Index idx = index / m_stride;
371 inputIndex = idx * m_inputStride + m_inputOffset;
372 index -= idx * m_stride;
378 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool isInnerChipping()
const {
379 return IsInnerChipping ||
380 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
381 (
static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
384 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool isOuterChipping()
const {
385 return IsOuterChipping ||
386 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
387 (
static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == 0);
390 Dimensions m_dimensions;
394 TensorEvaluator<ArgType, Device> m_impl;
395 const internal::DimensionId<DimId> m_dim;
396 const Device EIGEN_DEVICE_REF m_device;
401template<DenseIndex DimId,
typename ArgType,
typename Device>
403 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
405 typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
406 typedef TensorChippingOp<DimId, ArgType> XprType;
407 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
408 static const int NumDims = NumInputDims-1;
409 typedef typename XprType::Index Index;
410 typedef DSizes<Index, NumDims> Dimensions;
411 typedef typename XprType::Scalar Scalar;
412 typedef typename XprType::CoeffReturnType CoeffReturnType;
413 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
414 static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
418 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
419 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
420 Layout = TensorEvaluator<ArgType, Device>::Layout,
425 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
428 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
432 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
434 return this->m_impl.coeffRef(this->srcCoeff(index));
437 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
438 void writePacket(Index index,
const PacketReturnType& x)
440 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
442 if (this->isInnerChipping()) {
444 eigen_assert(this->m_stride == 1);
445 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
446 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
447 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
449 for (
int i = 0; i < PacketSize; ++i) {
450 this->m_impl.coeffRef(inputIndex) = values[i];
451 inputIndex += this->m_inputStride;
453 }
else if (this->isOuterChipping()) {
455 eigen_assert(this->m_stride > index);
456 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
458 const Index idx = index / this->m_stride;
459 const Index rem = index - idx * this->m_stride;
460 if (rem + PacketSize <= this->m_stride) {
461 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
462 this->m_impl.template writePacket<StoreMode>(inputIndex, x);
465 EIGEN_ALIGN_MAX
typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
466 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
468 for (
int i = 0; i < PacketSize; ++i) {
469 this->coeffRef(index) = values[i];
476 template <
typename TensorBlock>
477 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
478 const TensorBlockDesc& desc,
const TensorBlock& block) {
479 assert(this->m_impl.data() != NULL);
481 const Index chip_dim = this->m_dim.actualDim();
483 DSizes<Index, NumInputDims> input_block_dims;
484 for (
int i = 0; i < NumInputDims; ++i) {
485 input_block_dims[i] = i < chip_dim ? desc.dimension(i)
486 : i > chip_dim ? desc.dimension(i - 1)
490 typedef TensorReshapingOp<const DSizes<Index, NumInputDims>,
491 const typename TensorBlock::XprType>
494 typedef internal::TensorBlockAssignment<Scalar, NumInputDims,
495 TensorBlockExpr, Index>
498 TensorBlockAssign::Run(
499 TensorBlockAssign::target(
501 internal::strides<Layout>(this->m_impl.dimensions()),
502 this->m_impl.data(), this->srcCoeff(desc.offset())),
503 block.expr().reshape(input_block_dims));
The tensor base class.
Definition TensorForwardDeclarations.h:56
Definition TensorChipping.h:73
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The tensor evaluator class.
Definition TensorEvaluator.h:27