11#ifndef EIGEN_MATRIX_PRODUCT_ALTIVEC_H
12#define EIGEN_MATRIX_PRODUCT_ALTIVEC_H
14#ifndef EIGEN_ALTIVEC_USE_CUSTOM_PACK
15#define EIGEN_ALTIVEC_USE_CUSTOM_PACK 1
18#include "MatrixProductCommon.h"
20#if !defined(EIGEN_ALTIVEC_DISABLE_MMA)
21#define EIGEN_ALTIVEC_DISABLE_MMA 0
25#if !EIGEN_ALTIVEC_DISABLE_MMA && defined(__has_builtin)
26#if __has_builtin(__builtin_mma_assemble_acc)
27 #define EIGEN_ALTIVEC_MMA_SUPPORT
32#if defined(EIGEN_ALTIVEC_MMA_SUPPORT)
34#if !defined(EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH)
35#define EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH 0
39#if EIGEN_ALTIVEC_ENABLE_MMA_DYNAMIC_DISPATCH && !EIGEN_COMP_LLVM
40#define EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH 1
43#define EIGEN_ALTIVEC_MMA_ONLY 1
48#if defined(EIGEN_ALTIVEC_MMA_ONLY) || defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
49 #include "MatrixProductMMA.h"
64template<
typename Scalar>
67 typedef typename packet_traits<Scalar>::type vectortype;
68 typedef PacketBlock<vectortype,4> type;
69 typedef vectortype rhstype;
72 vectorsize = packet_traits<Scalar>::size,
79struct quad_traits<double>
81 typedef Packet2d vectortype;
82 typedef PacketBlock<vectortype,4> type;
83 typedef PacketBlock<Packet2d,2> rhstype;
86 vectorsize = packet_traits<double>::size,
96const static Packet16uc p16uc_GETREAL32 = { 0, 1, 2, 3,
101const static Packet16uc p16uc_GETIMAG32 = { 4, 5, 6, 7,
105const static Packet16uc p16uc_GETREAL64 = { 0, 1, 2, 3, 4, 5, 6, 7,
106 16, 17, 18, 19, 20, 21, 22, 23};
109const static Packet16uc p16uc_GETIMAG64 = { 8, 9, 10, 11, 12, 13, 14, 15,
110 24, 25, 26, 27, 28, 29, 30, 31};
130template<
typename Scalar,
typename Index,
int StorageOrder>
131EIGEN_ALWAYS_INLINE std::complex<Scalar> getAdjointVal(
Index i,
Index j, const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder>& dt)
133 std::complex<Scalar> v;
136 v.real( dt(j,i).
real());
137 v.imag(-dt(j,i).
imag());
140 v.real( dt(i,j).
real());
141 v.imag( dt(i,j).
imag());
143 v.real( dt(i,j).
real());
149template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
150EIGEN_STRONG_INLINE
void symm_pack_complex_rhs_helper(std::complex<Scalar>* blockB,
const std::complex<Scalar>* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
152 const Index depth = k2 + rows;
153 const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder> rhs(_rhs, rhsStride);
154 const Index vectorSize = N*quad_traits<Scalar>::vectorsize;
155 const Index vectorDelta = vectorSize * rows;
156 Scalar* blockBf =
reinterpret_cast<Scalar *
>(blockB);
158 Index rir = 0, rii, j = 0;
159 for(; j + vectorSize <= cols; j+=vectorSize)
161 rii = rir + vectorDelta;
163 for(
Index i = k2; i < depth; i++)
165 for(
Index k = 0; k < vectorSize; k++)
167 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j + k, rhs);
169 blockBf[rir + k] = v.real();
170 blockBf[rii + k] = v.imag();
183 for(
Index i = k2; i < depth; i++)
185 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j, rhs);
187 blockBf[rir] = v.real();
188 blockBf[rii] = v.imag();
198template<
typename Scalar,
typename Index,
int StorageOrder>
199EIGEN_STRONG_INLINE
void symm_pack_complex_lhs_helper(std::complex<Scalar>* blockA,
const std::complex<Scalar>* _lhs,
Index lhsStride,
Index cols,
Index rows)
201 const Index depth = cols;
202 const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder> lhs(_lhs, lhsStride);
203 const Index vectorSize = quad_traits<Scalar>::vectorsize;
204 const Index vectorDelta = vectorSize * depth;
205 Scalar* blockAf = (Scalar *)(blockA);
207 Index rir = 0, rii, j = 0;
208 for(; j + vectorSize <= rows; j+=vectorSize)
210 rii = rir + vectorDelta;
212 for(
Index i = 0; i < depth; i++)
214 for(
Index k = 0; k < vectorSize; k++)
216 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(j+k, i, lhs);
218 blockAf[rir + k] = v.real();
219 blockAf[rii + k] = v.imag();
230 rii = rir + ((rows - j) * depth);
232 for(
Index i = 0; i < depth; i++)
237 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(k, i, lhs);
239 blockAf[rir] = v.real();
240 blockAf[rii] = v.imag();
249template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
250EIGEN_STRONG_INLINE
void symm_pack_rhs_helper(Scalar* blockB,
const Scalar* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
252 const Index depth = k2 + rows;
253 const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(_rhs, rhsStride);
254 const Index vectorSize = quad_traits<Scalar>::vectorsize;
257 for(; j + N*vectorSize <= cols; j+=N*vectorSize)
260 for(; i < depth; i++)
262 for(
Index k = 0; k < N*vectorSize; k++)
265 blockB[ri + k] = rhs(j+k, i);
267 blockB[ri + k] = rhs(i, j+k);
275 for(
Index i = k2; i < depth; i++)
278 blockB[ri] = rhs(i, j);
280 blockB[ri] = rhs(j, i);
286template<
typename Scalar,
typename Index,
int StorageOrder>
287EIGEN_STRONG_INLINE
void symm_pack_lhs_helper(Scalar* blockA,
const Scalar* _lhs,
Index lhsStride,
Index cols,
Index rows)
289 const Index depth = cols;
290 const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs, lhsStride);
291 const Index vectorSize = quad_traits<Scalar>::vectorsize;
294 for(; j + vectorSize <= rows; j+=vectorSize)
298 for(; i < depth; i++)
300 for(
Index k = 0; k < vectorSize; k++)
303 blockA[ri + k] = lhs(j+k, i);
305 blockA[ri + k] = lhs(i, j+k);
313 for(
Index i = 0; i < depth; i++)
319 blockA[ri] = lhs(k, i);
321 blockA[ri] = lhs(i, k);
328template<
typename Index,
int nr,
int StorageOrder>
329struct symm_pack_rhs<std::complex<float>,
Index, nr, StorageOrder>
331 void operator()(std::complex<float>* blockB,
const std::complex<float>* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
333 symm_pack_complex_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
337template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
338struct symm_pack_lhs<std::complex<float>,
Index, Pack1, Pack2_dummy, StorageOrder>
340 void operator()(std::complex<float>* blockA,
const std::complex<float>* _lhs,
Index lhsStride,
Index cols,
Index rows)
342 symm_pack_complex_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
348template<
typename Index,
int nr,
int StorageOrder>
349struct symm_pack_rhs<std::complex<double>,
Index, nr, StorageOrder>
351 void operator()(std::complex<double>* blockB,
const std::complex<double>* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
353 symm_pack_complex_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
357template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
358struct symm_pack_lhs<std::complex<double>,
Index, Pack1, Pack2_dummy, StorageOrder>
360 void operator()(std::complex<double>* blockA,
const std::complex<double>* _lhs,
Index lhsStride,
Index cols,
Index rows)
362 symm_pack_complex_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
367template<
typename Index,
int nr,
int StorageOrder>
368struct symm_pack_rhs<float,
Index, nr, StorageOrder>
370 void operator()(
float* blockB,
const float* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
372 symm_pack_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
376template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
377struct symm_pack_lhs<float,
Index, Pack1, Pack2_dummy, StorageOrder>
379 void operator()(
float* blockA,
const float* _lhs,
Index lhsStride,
Index cols,
Index rows)
381 symm_pack_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
386template<
typename Index,
int nr,
int StorageOrder>
387struct symm_pack_rhs<double,
Index, nr, StorageOrder>
389 void operator()(
double* blockB,
const double* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
391 symm_pack_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
395template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
396struct symm_pack_lhs<double,
Index, Pack1, Pack2_dummy, StorageOrder>
398 void operator()(
double* blockA,
const double* _lhs,
Index lhsStride,
Index cols,
Index rows)
400 symm_pack_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
415template<
typename Scalar,
typename Packet,
typename Index,
int N>
416EIGEN_ALWAYS_INLINE
void storeBlock(Scalar* to, PacketBlock<Packet,N>& block)
418 const Index size = 16 /
sizeof(Scalar);
419 pstore<Scalar>(to + (0 * size), block.packet[0]);
420 pstore<Scalar>(to + (1 * size), block.packet[1]);
422 pstore<Scalar>(to + (2 * size), block.packet[2]);
425 pstore<Scalar>(to + (3 * size), block.packet[3]);
430template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode,
bool UseLhs>
432 EIGEN_STRONG_INLINE
void operator()(std::complex<Scalar>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
434 const Index vectorSize = quad_traits<Scalar>::vectorsize;
435 const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);
436 Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;
437 Scalar* blockAt =
reinterpret_cast<Scalar *
>(blockA);
440 for(; j + vectorSize <= rows; j+=vectorSize)
444 rii = rir + vectorDelta;
446 for(; i + vectorSize <= depth; i+=vectorSize)
448 PacketBlock<Packet,4> blockr, blocki;
449 PacketBlock<PacketC,8> cblock;
452 bload<DataMapper, PacketC, Index, 2, StorageOrder, true, 4>(cblock, lhs, j, i);
454 bload<DataMapper, PacketC, Index, 2, StorageOrder, true, 4>(cblock, lhs, i, j);
457 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETREAL32);
458 blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETREAL32);
459 blockr.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETREAL32);
460 blockr.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETREAL32);
462 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETIMAG32);
463 blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETIMAG32);
464 blocki.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETIMAG32);
465 blocki.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETIMAG32);
469 blocki.packet[0] = -blocki.packet[0];
470 blocki.packet[1] = -blocki.packet[1];
471 blocki.packet[2] = -blocki.packet[2];
472 blocki.packet[3] = -blocki.packet[3];
475 if(((StorageOrder ==
RowMajor) && UseLhs) || (((StorageOrder ==
ColMajor) && !UseLhs)))
481 storeBlock<Scalar, Packet, Index, 4>(blockAt + rir, blockr);
482 storeBlock<Scalar, Packet, Index, 4>(blockAt + rii, blocki);
487 for(; i < depth; i++)
489 PacketBlock<Packet,1> blockr, blocki;
490 PacketBlock<PacketC,2> cblock;
492 if(((StorageOrder ==
ColMajor) && UseLhs) || (((StorageOrder ==
RowMajor) && !UseLhs)))
495 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
496 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 2, i);
498 cblock.packet[0] = lhs.template loadPacket<PacketC>(i, j + 0);
499 cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);
503 cblock.packet[0] = pload2(lhs(j + 0, i), lhs(j + 1, i));
504 cblock.packet[1] = pload2(lhs(j + 2, i), lhs(j + 3, i));
506 cblock.packet[0] = pload2(lhs(i, j + 0), lhs(i, j + 1));
507 cblock.packet[1] = pload2(lhs(i, j + 2), lhs(i, j + 3));
511 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL32);
512 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG32);
516 blocki.packet[0] = -blocki.packet[0];
519 pstore<Scalar>(blockAt + rir, blockr.packet[0]);
520 pstore<Scalar>(blockAt + rii, blocki.packet[0]);
526 rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
531 if(PanelMode) rir -= (offset*(vectorSize - 1));
535 rii = rir + ((PanelMode) ? stride : depth);
537 for(
Index i = 0; i < depth; i++)
539 blockAt[rir] = lhs(i, j).real();
542 blockAt[rii] = -lhs(i, j).imag();
544 blockAt[rii] = lhs(i, j).imag();
550 rir += ((PanelMode) ? (2*stride - depth) : depth);
555 if(PanelMode) rir += (offset*(rows - j - vectorSize));
556 rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
558 for(
Index i = 0; i < depth; i++)
563 blockAt[rir] = lhs(k, i).real();
566 blockAt[rii] = -lhs(k, i).imag();
568 blockAt[rii] = lhs(k, i).imag();
580template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
int StorageOrder,
bool PanelMode,
bool UseLhs>
582 EIGEN_STRONG_INLINE
void operator()(Scalar* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
584 const Index vectorSize = quad_traits<Scalar>::vectorsize;
587 for(; j + vectorSize <= rows; j+=vectorSize)
591 if(PanelMode) ri += vectorSize*offset;
593 for(; i + vectorSize <= depth; i+=vectorSize)
595 PacketBlock<Packet,4> block;
598 bload<DataMapper, Packet, Index, 4, StorageOrder, false, 4>(block, lhs, j, i);
600 bload<DataMapper, Packet, Index, 4, StorageOrder, false, 4>(block, lhs, i, j);
602 if(((StorageOrder ==
RowMajor) && UseLhs) || ((StorageOrder ==
ColMajor) && !UseLhs))
607 storeBlock<Scalar, Packet, Index, 4>(blockA + ri, block);
611 for(; i < depth; i++)
613 if(((StorageOrder ==
RowMajor) && UseLhs) || ((StorageOrder ==
ColMajor) && !UseLhs))
616 blockA[ri+0] = lhs(j+0, i);
617 blockA[ri+1] = lhs(j+1, i);
618 blockA[ri+2] = lhs(j+2, i);
619 blockA[ri+3] = lhs(j+3, i);
621 blockA[ri+0] = lhs(i, j+0);
622 blockA[ri+1] = lhs(i, j+1);
623 blockA[ri+2] = lhs(i, j+2);
624 blockA[ri+3] = lhs(i, j+3);
629 lhsV = lhs.template loadPacket<Packet>(j, i);
631 lhsV = lhs.template loadPacket<Packet>(i, j);
633 pstore<Scalar>(blockA + ri, lhsV);
639 if(PanelMode) ri += vectorSize*(stride - offset - depth);
644 if(PanelMode) ri += offset;
648 for(
Index i = 0; i < depth; i++)
650 blockA[ri] = lhs(i, j);
654 if(PanelMode) ri += stride - depth;
659 if(PanelMode) ri += offset*(rows - j);
661 for(
Index i = 0; i < depth; i++)
666 blockA[ri] = lhs(k, i);
676template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
677struct dhs_pack<double,
Index, DataMapper, Packet2d, StorageOrder, PanelMode, true>
679 EIGEN_STRONG_INLINE
void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
681 const Index vectorSize = quad_traits<double>::vectorsize;
684 for(; j + vectorSize <= rows; j+=vectorSize)
688 if(PanelMode) ri += vectorSize*offset;
690 for(; i + vectorSize <= depth; i+=vectorSize)
692 PacketBlock<Packet2d,2> block;
695 block.packet[0] = lhs.template loadPacket<Packet2d>(j + 0, i);
696 block.packet[1] = lhs.template loadPacket<Packet2d>(j + 1, i);
700 block.packet[0] = lhs.template loadPacket<Packet2d>(j, i + 0);
701 block.packet[1] = lhs.template loadPacket<Packet2d>(j, i + 1);
704 storeBlock<double, Packet2d, Index, 2>(blockA + ri, block);
708 for(; i < depth; i++)
712 blockA[ri+0] = lhs(j+0, i);
713 blockA[ri+1] = lhs(j+1, i);
715 Packet2d lhsV = lhs.template loadPacket<Packet2d>(j, i);
716 pstore<double>(blockA + ri, lhsV);
722 if(PanelMode) ri += vectorSize*(stride - offset - depth);
727 if(PanelMode) ri += offset*(rows - j);
729 for(
Index i = 0; i < depth; i++)
734 blockA[ri] = lhs(k, i);
743template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
744struct dhs_pack<double,
Index, DataMapper, Packet2d, StorageOrder, PanelMode, false>
746 EIGEN_STRONG_INLINE
void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
748 const Index vectorSize = quad_traits<double>::vectorsize;
751 for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
755 if(PanelMode) ri += offset*(2*vectorSize);
757 for(; i + vectorSize <= depth; i+=vectorSize)
759 PacketBlock<Packet2d,4> block;
762 PacketBlock<Packet2d,2> block1, block2;
763 block1.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 0);
764 block1.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 1);
765 block2.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 2);
766 block2.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 3);
771 pstore<double>(blockB + ri , block1.packet[0]);
772 pstore<double>(blockB + ri + 2, block2.packet[0]);
773 pstore<double>(blockB + ri + 4, block1.packet[1]);
774 pstore<double>(blockB + ri + 6, block2.packet[1]);
776 block.packet[0] = rhs.template loadPacket<Packet2d>(i + 0, j + 0);
777 block.packet[1] = rhs.template loadPacket<Packet2d>(i + 0, j + 2);
778 block.packet[2] = rhs.template loadPacket<Packet2d>(i + 1, j + 0);
779 block.packet[3] = rhs.template loadPacket<Packet2d>(i + 1, j + 2);
781 storeBlock<double, Packet2d, Index, 4>(blockB + ri, block);
786 for(; i < depth; i++)
790 blockB[ri+0] = rhs(i, j+0);
791 blockB[ri+1] = rhs(i, j+1);
795 blockB[ri+0] = rhs(i, j+2);
796 blockB[ri+1] = rhs(i, j+3);
798 Packet2d rhsV = rhs.template loadPacket<Packet2d>(i, j);
799 pstore<double>(blockB + ri, rhsV);
803 rhsV = rhs.template loadPacket<Packet2d>(i, j + 2);
804 pstore<double>(blockB + ri, rhsV);
809 if(PanelMode) ri += (2*vectorSize)*(stride - offset - depth);
812 if(PanelMode) ri += offset;
816 for(
Index i = 0; i < depth; i++)
818 blockB[ri] = rhs(i, j);
822 if(PanelMode) ri += stride - depth;
828template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
829struct dhs_cpack<double,
Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, true>
831 EIGEN_STRONG_INLINE
void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
833 const Index vectorSize = quad_traits<double>::vectorsize;
834 const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);
835 Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;
836 double* blockAt =
reinterpret_cast<double *
>(blockA);
839 for(; j + vectorSize <= rows; j+=vectorSize)
843 rii = rir + vectorDelta;
845 for(; i + vectorSize <= depth; i+=vectorSize)
847 PacketBlock<Packet,2> blockr, blocki;
848 PacketBlock<PacketC,4> cblock;
852 cblock.packet[0] = lhs.template loadPacket<PacketC>(j, i + 0);
853 cblock.packet[1] = lhs.template loadPacket<PacketC>(j, i + 1);
855 cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 1, i + 0);
856 cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1);
858 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETREAL64);
859 blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETREAL64);
861 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETIMAG64);
862 blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETIMAG64);
864 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
865 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
867 cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 0, i + 1);
868 cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1);
870 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
871 blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
873 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
874 blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
879 blocki.packet[0] = -blocki.packet[0];
880 blocki.packet[1] = -blocki.packet[1];
883 storeBlock<double, Packet, Index, 2>(blockAt + rir, blockr);
884 storeBlock<double, Packet, Index, 2>(blockAt + rii, blocki);
889 for(; i < depth; i++)
891 PacketBlock<Packet,1> blockr, blocki;
892 PacketBlock<PacketC,2> cblock;
894 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
895 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
897 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
898 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
902 blocki.packet[0] = -blocki.packet[0];
905 pstore<double>(blockAt + rir, blockr.packet[0]);
906 pstore<double>(blockAt + rii, blocki.packet[0]);
912 rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
917 if(PanelMode) rir += (offset*(rows - j - vectorSize));
918 rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
920 for(
Index i = 0; i < depth; i++)
925 blockAt[rir] = lhs(k, i).real();
928 blockAt[rii] = -lhs(k, i).imag();
930 blockAt[rii] = lhs(k, i).imag();
941template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
942struct dhs_cpack<double,
Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, false>
944 EIGEN_STRONG_INLINE
void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
946 const Index vectorSize = quad_traits<double>::vectorsize;
947 const Index vectorDelta = 2*vectorSize * ((PanelMode) ? stride : depth);
948 Index rir = ((PanelMode) ? (2*vectorSize*offset) : 0), rii;
949 double* blockBt =
reinterpret_cast<double *
>(blockB);
952 for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
956 rii = rir + vectorDelta;
958 for(; i < depth; i++)
960 PacketBlock<PacketC,4> cblock;
961 PacketBlock<Packet,2> blockr, blocki;
963 bload<DataMapper, PacketC, Index, 2, ColMajor, false, 4>(cblock, rhs, i, j);
965 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
966 blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
968 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
969 blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
973 blocki.packet[0] = -blocki.packet[0];
974 blocki.packet[1] = -blocki.packet[1];
977 storeBlock<double, Packet, Index, 2>(blockBt + rir, blockr);
978 storeBlock<double, Packet, Index, 2>(blockBt + rii, blocki);
984 rir += ((PanelMode) ? (2*vectorSize*(2*stride - depth)) : vectorDelta);
987 if(PanelMode) rir -= (offset*(2*vectorSize - 1));
991 rii = rir + ((PanelMode) ? stride : depth);
993 for(
Index i = 0; i < depth; i++)
995 blockBt[rir] = rhs(i, j).real();
998 blockBt[rii] = -rhs(i, j).imag();
1000 blockBt[rii] = rhs(i, j).imag();
1006 rir += ((PanelMode) ? (2*stride - depth) : depth);
1016template<
typename Packet,
bool NegativeAccumulate,
int N>
1017EIGEN_ALWAYS_INLINE
void pger_common(PacketBlock<Packet,N>* acc,
const Packet& lhsV,
const Packet* rhsV)
1019 if(NegativeAccumulate)
1021 acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
1023 acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);
1026 acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);
1029 acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);
1032 acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
1034 acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);
1037 acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);
1040 acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);
1045template<
int N,
typename Scalar,
typename Packet,
bool NegativeAccumulate>
1046EIGEN_ALWAYS_INLINE
void pger(PacketBlock<Packet,N>* acc,
const Scalar* lhs,
const Packet* rhsV)
1048 Packet lhsV = pload<Packet>(lhs);
1050 pger_common<Packet, NegativeAccumulate, N>(acc, lhsV, rhsV);
1053template<
typename Scalar,
typename Packet,
typename Index, const Index remaining_rows>
1054EIGEN_ALWAYS_INLINE
void loadPacketRemaining(
const Scalar* lhs, Packet &lhsV)
1057 lhsV = vec_xl_len((Scalar *)lhs, remaining_rows *
sizeof(Scalar));
1062 }
while (++i < remaining_rows);
1066template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool NegativeAccumulate, const Index remaining_rows>
1067EIGEN_ALWAYS_INLINE
void pger(PacketBlock<Packet,N>* acc,
const Scalar* lhs,
const Packet* rhsV)
1070 loadPacketRemaining<Scalar, Packet, Index, remaining_rows>(lhs, lhsV);
1072 pger_common<Packet, NegativeAccumulate, N>(acc, lhsV, rhsV);
1076template<
int N,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1077EIGEN_ALWAYS_INLINE
void pgerc_common(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Packet &lhsV,
const Packet &lhsVi,
const Packet* rhsV,
const Packet* rhsVi)
1079 pger_common<Packet, false, N>(accReal, lhsV, rhsV);
1082 pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
1083 EIGEN_UNUSED_VARIABLE(lhsVi);
1086 pger_common<Packet, ConjugateLhs == ConjugateRhs, N>(accReal, lhsVi, rhsVi);
1087 pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
1089 EIGEN_UNUSED_VARIABLE(rhsVi);
1091 pger_common<Packet, ConjugateLhs, N>(accImag, lhsVi, rhsV);
1095template<
int N,
typename Scalar,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1096EIGEN_ALWAYS_INLINE
void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag,
const Packet* rhsV,
const Packet* rhsVi)
1098 Packet lhsV = ploadLhs<Scalar, Packet>(lhs_ptr);
1100 if(!LhsIsReal) lhsVi = ploadLhs<Scalar, Packet>(lhs_ptr_imag);
1101 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1103 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
1106template<
typename Scalar,
typename Packet,
typename Index,
bool LhsIsReal, const Index remaining_rows>
1107EIGEN_ALWAYS_INLINE
void loadPacketRemaining(
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi)
1110 lhsV = vec_xl_len((Scalar *)lhs_ptr, remaining_rows *
sizeof(Scalar));
1111 if(!LhsIsReal) lhsVi = vec_xl_len((Scalar *)lhs_ptr_imag, remaining_rows *
sizeof(Scalar));
1112 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1116 lhsV[i] = lhs_ptr[i];
1117 if(!LhsIsReal) lhsVi[i] = lhs_ptr_imag[i];
1118 }
while (++i < remaining_rows);
1119 if(LhsIsReal) EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1123template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1124EIGEN_ALWAYS_INLINE
void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag,
const Packet* rhsV,
const Packet* rhsVi)
1127 loadPacketRemaining<Scalar, Packet, Index, LhsIsReal, remaining_rows>(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi);
1129 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
1132template<
typename Scalar,
typename Packet>
1133EIGEN_ALWAYS_INLINE Packet ploadLhs(
const Scalar* lhs)
1135 return ploadu<Packet>(lhs);
1139template<
typename Scalar,
typename Packet,
int N>
1140EIGEN_ALWAYS_INLINE
void bsetzero(PacketBlock<Packet,N>& acc)
1142 acc.packet[0] = pset1<Packet>((Scalar)0);
1144 acc.packet[1] = pset1<Packet>((Scalar)0);
1147 acc.packet[2] = pset1<Packet>((Scalar)0);
1150 acc.packet[3] = pset1<Packet>((Scalar)0);
1155template<
typename Packet,
int N>
1156EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha)
1158 acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
1160 acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);
1163 acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);
1166 acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);
1170template<
typename Packet,
int N>
1171EIGEN_ALWAYS_INLINE
void bscalec_common(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha)
1173 acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);
1175 acc.packet[1] = pmul<Packet>(accZ.packet[1], pAlpha);
1178 acc.packet[2] = pmul<Packet>(accZ.packet[2], pAlpha);
1181 acc.packet[3] = pmul<Packet>(accZ.packet[3], pAlpha);
1186template<
typename Packet,
int N>
1187EIGEN_ALWAYS_INLINE
void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag,
const Packet& bReal,
const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag)
1189 bscalec_common<Packet, N>(cReal, aReal, bReal);
1191 bscalec_common<Packet, N>(cImag, aImag, bReal);
1193 pger_common<Packet, true, N>(&cReal, bImag, aImag.packet);
1195 pger_common<Packet, false, N>(&cImag, bImag, aReal.packet);
1198template<
typename Packet,
int N>
1199EIGEN_ALWAYS_INLINE
void band(PacketBlock<Packet,N>& acc,
const Packet& pMask)
1201 acc.packet[0] = pand(acc.packet[0], pMask);
1203 acc.packet[1] = pand(acc.packet[1], pMask);
1206 acc.packet[2] = pand(acc.packet[2], pMask);
1209 acc.packet[3] = pand(acc.packet[3], pMask);
1213template<
typename Packet,
int N>
1214EIGEN_ALWAYS_INLINE
void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag,
const Packet& bReal,
const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag,
const Packet& pMask)
1216 band<Packet, N>(aReal, pMask);
1217 band<Packet, N>(aImag, pMask);
1219 bscalec<Packet,N>(aReal, aImag, bReal, bImag, cReal, cImag);
1223template<
typename DataMapper,
typename Packet,
typename Index, const Index accCols,
int StorageOrder,
bool Complex,
int N>
1224EIGEN_ALWAYS_INLINE
void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc,
const DataMapper& res,
Index row,
Index col)
1227 acc.packet[0] = res.template loadPacket<Packet>(row + 0, col);
1229 acc.packet[1] = res.template loadPacket<Packet>(row + 1, col);
1232 acc.packet[2] = res.template loadPacket<Packet>(row + 2, col);
1235 acc.packet[3] = res.template loadPacket<Packet>(row + 3, col);
1238 acc.packet[0+N] = res.template loadPacket<Packet>(row + 0, col + accCols);
1240 acc.packet[1+N] = res.template loadPacket<Packet>(row + 1, col + accCols);
1243 acc.packet[2+N] = res.template loadPacket<Packet>(row + 2, col + accCols);
1246 acc.packet[3+N] = res.template loadPacket<Packet>(row + 3, col + accCols);
1250 acc.packet[0] = res.template loadPacket<Packet>(row, col + 0);
1252 acc.packet[1] = res.template loadPacket<Packet>(row, col + 1);
1255 acc.packet[2] = res.template loadPacket<Packet>(row, col + 2);
1258 acc.packet[3] = res.template loadPacket<Packet>(row, col + 3);
1261 acc.packet[0+N] = res.template loadPacket<Packet>(row + accCols, col + 0);
1263 acc.packet[1+N] = res.template loadPacket<Packet>(row + accCols, col + 1);
1266 acc.packet[2+N] = res.template loadPacket<Packet>(row + accCols, col + 2);
1269 acc.packet[3+N] = res.template loadPacket<Packet>(row + accCols, col + 3);
1275const static Packet4i mask41 = { -1, 0, 0, 0 };
1276const static Packet4i mask42 = { -1, -1, 0, 0 };
1277const static Packet4i mask43 = { -1, -1, -1, 0 };
1279const static Packet2l mask21 = { -1, 0 };
1281template<
typename Packet>
1282EIGEN_ALWAYS_INLINE Packet bmask(
const int remaining_rows)
1284 if (remaining_rows == 0) {
1285 return pset1<Packet>(
float(0.0));
1287 switch (remaining_rows) {
1288 case 1:
return Packet(mask41);
1289 case 2:
return Packet(mask42);
1290 default:
return Packet(mask43);
1296EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(
const int remaining_rows)
1298 if (remaining_rows == 0) {
1299 return pset1<Packet2d>(
double(0.0));
1301 return Packet2d(mask21);
1305template<
typename Packet,
int N>
1306EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha,
const Packet& pMask)
1308 band<Packet, N>(accZ, pMask);
1310 bscale<Packet, N>(acc, accZ, pAlpha);
1313template<
typename Packet,
int N> EIGEN_ALWAYS_INLINE
void
1314pbroadcastN_old(
const __UNPACK_TYPE__(Packet) *a,
1315 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
1317 a0 = pset1<Packet>(a[0]);
1319 a1 = pset1<Packet>(a[1]);
1321 EIGEN_UNUSED_VARIABLE(a1);
1324 a2 = pset1<Packet>(a[2]);
1326 EIGEN_UNUSED_VARIABLE(a2);
1329 a3 = pset1<Packet>(a[3]);
1331 EIGEN_UNUSED_VARIABLE(a3);
1336EIGEN_ALWAYS_INLINE
void pbroadcastN_old<Packet4f,4>(
const float* a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
1338 pbroadcast4<Packet4f>(a, a0, a1, a2, a3);
1342EIGEN_ALWAYS_INLINE
void pbroadcastN_old<Packet2d,4>(
const double* a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
1344 a1 = pload<Packet2d>(a);
1345 a3 = pload<Packet2d>(a + 2);
1346 a0 = vec_splat(a1, 0);
1347 a1 = vec_splat(a1, 1);
1348 a2 = vec_splat(a3, 0);
1349 a3 = vec_splat(a3, 1);
1352template<
typename Packet,
int N> EIGEN_ALWAYS_INLINE
void
1353pbroadcastN(
const __UNPACK_TYPE__(Packet) *a,
1354 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
1356 a0 = pset1<Packet>(a[0]);
1358 a1 = pset1<Packet>(a[1]);
1360 EIGEN_UNUSED_VARIABLE(a1);
1363 a2 = pset1<Packet>(a[2]);
1365 EIGEN_UNUSED_VARIABLE(a2);
1368 a3 = pset1<Packet>(a[3]);
1370 EIGEN_UNUSED_VARIABLE(a3);
1374template<> EIGEN_ALWAYS_INLINE
void
1375pbroadcastN<Packet4f,4>(
const float *a,
1376 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
1378 a3 = pload<Packet4f>(a);
1379 a0 = vec_splat(a3, 0);
1380 a1 = vec_splat(a3, 1);
1381 a2 = vec_splat(a3, 2);
1382 a3 = vec_splat(a3, 3);
1389#define MICRO_UNROLL_PEEL(func) \
1390 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
1392#define MICRO_ZERO_PEEL(peel) \
1393 if ((PEEL_ROW > peel) && (peel != 0)) { \
1394 bsetzero<Scalar, Packet, accRows>(accZero##peel); \
1396 EIGEN_UNUSED_VARIABLE(accZero##peel); \
1399#define MICRO_ZERO_PEEL_ROW \
1400 MICRO_UNROLL_PEEL(MICRO_ZERO_PEEL);
1402#define MICRO_WORK_PEEL(peel) \
1403 if (PEEL_ROW > peel) { \
1404 pbroadcastN<Packet,accRows>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1405 pger<accRows, Scalar, Packet, false>(&accZero##peel, lhs_ptr + (remaining_rows * peel), rhsV##peel); \
1407 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1410#define MICRO_WORK_PEEL_ROW \
1411 Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4], rhsV4[4], rhsV5[4], rhsV6[4], rhsV7[4]; \
1412 MICRO_UNROLL_PEEL(MICRO_WORK_PEEL); \
1413 lhs_ptr += (remaining_rows * PEEL_ROW); \
1414 rhs_ptr += (accRows * PEEL_ROW);
1416#define MICRO_ADD_PEEL(peel, sum) \
1417 if (PEEL_ROW > peel) { \
1418 for (Index i = 0; i < accRows; i++) { \
1419 accZero##sum.packet[i] += accZero##peel.packet[i]; \
1423#define MICRO_ADD_PEEL_ROW \
1424 MICRO_ADD_PEEL(4, 0) MICRO_ADD_PEEL(5, 1) MICRO_ADD_PEEL(6, 2) MICRO_ADD_PEEL(7, 3) \
1425 MICRO_ADD_PEEL(2, 0) MICRO_ADD_PEEL(3, 1) MICRO_ADD_PEEL(1, 0)
1427template<
typename Scalar,
typename Packet,
typename Index, const Index accRows, const Index remaining_rows>
1428EIGEN_ALWAYS_INLINE
void MICRO_EXTRA_ROW(
1429 const Scalar* &lhs_ptr,
1430 const Scalar* &rhs_ptr,
1431 PacketBlock<Packet,accRows> &accZero)
1434 pbroadcastN<Packet,accRows>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1435 pger<accRows, Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);
1436 lhs_ptr += remaining_rows;
1440template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols, const Index remaining_rows>
1441EIGEN_ALWAYS_INLINE
void gemm_unrolled_row_iteration(
1442 const DataMapper& res,
1443 const Scalar* lhs_base,
1444 const Scalar* rhs_base,
1452 const Packet& pAlpha,
1453 const Packet& pMask)
1455 const Scalar* rhs_ptr = rhs_base;
1456 const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;
1457 PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7, acc;
1459 bsetzero<Scalar, Packet, accRows>(accZero0);
1461 Index remaining_depth = (col + quad_traits<Scalar>::rows < cols) ? depth : (depth & -quad_traits<Scalar>::rows);
1463 if (remaining_depth >= PEEL_ROW) {
1467 EIGEN_POWER_PREFETCH(rhs_ptr);
1468 EIGEN_POWER_PREFETCH(lhs_ptr);
1470 }
while ((k += PEEL_ROW) + PEEL_ROW <= remaining_depth);
1473 for(; k < remaining_depth; k++)
1475 MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows, remaining_rows>(lhs_ptr, rhs_ptr, accZero0);
1478 if ((remaining_depth == depth) && (rows >= accCols))
1480 bload<DataMapper, Packet, Index, 0, ColMajor, false, accRows>(acc, res, row, 0);
1481 bscale<Packet,accRows>(acc, accZero0, pAlpha, pMask);
1482 res.template storePacketBlock<Packet,accRows>(row, 0, acc);
1484 for(; k < depth; k++)
1487 pbroadcastN<Packet,accRows>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1488 pger<accRows, Scalar, Packet, Index, false, remaining_rows>(&accZero0, lhs_ptr, rhsV);
1489 lhs_ptr += remaining_rows;
1493 for(
Index j = 0; j < accRows; j++) {
1494 accZero0.packet[j] = vec_mul(pAlpha, accZero0.packet[j]);
1495 for(
Index i = 0; i < remaining_rows; i++) {
1496 res(row + i, j) += accZero0.packet[j][i];
1502template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1503EIGEN_ALWAYS_INLINE
void gemm_extra_row(
1504 const DataMapper& res,
1505 const Scalar* lhs_base,
1506 const Scalar* rhs_base,
1514 Index remaining_rows,
1515 const Packet& pAlpha,
1516 const Packet& pMask)
1518 switch(remaining_rows) {
1520 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 1>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1523 if (
sizeof(Scalar) ==
sizeof(
float)) {
1524 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 2>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1528 if (
sizeof(Scalar) ==
sizeof(
float)) {
1529 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 3>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1535#define MICRO_UNROLL(func) \
1536 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
1538#define MICRO_UNROLL_WORK(func, func2, peel) \
1539 MICRO_UNROLL(func2); \
1540 func(0,peel) func(1,peel) func(2,peel) func(3,peel) \
1541 func(4,peel) func(5,peel) func(6,peel) func(7,peel)
1543#define MICRO_LOAD_ONE(iter) \
1544 if (unroll_factor > iter) { \
1545 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
1546 lhs_ptr##iter += accCols; \
1548 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
1551#define MICRO_WORK_ONE(iter, peel) \
1552 if (unroll_factor > iter) { \
1553 pger_common<Packet, false, accRows>(&accZero##iter, lhsV##iter, rhsV##peel); \
1556#define MICRO_TYPE_PEEL4(func, func2, peel) \
1557 if (PEEL > peel) { \
1558 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
1559 pbroadcastN<Packet,accRows>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1560 MICRO_UNROLL_WORK(func, func2, peel) \
1562 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1565#define MICRO_UNROLL_TYPE_PEEL(M, func, func1, func2) \
1566 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M]; \
1567 func(func1,func2,0); func(func1,func2,1); \
1568 func(func1,func2,2); func(func1,func2,3); \
1569 func(func1,func2,4); func(func1,func2,5); \
1570 func(func1,func2,6); func(func1,func2,7);
1572#define MICRO_UNROLL_TYPE_ONE(M, func, func1, func2) \
1574 func(func1,func2,0);
1576#define MICRO_ONE_PEEL4 \
1577 MICRO_UNROLL_TYPE_PEEL(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1578 rhs_ptr += (accRows * PEEL);
1581 MICRO_UNROLL_TYPE_ONE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1584#define MICRO_DST_PTR_ONE(iter) \
1585 if (unroll_factor > iter) { \
1586 bsetzero<Scalar, Packet, accRows>(accZero##iter); \
1588 EIGEN_UNUSED_VARIABLE(accZero##iter); \
1591#define MICRO_DST_PTR MICRO_UNROLL(MICRO_DST_PTR_ONE)
1593#define MICRO_SRC_PTR_ONE(iter) \
1594 if (unroll_factor > iter) { \
1595 lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols; \
1597 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
1600#define MICRO_SRC_PTR MICRO_UNROLL(MICRO_SRC_PTR_ONE)
1602#define MICRO_PREFETCH_ONE(iter) \
1603 if (unroll_factor > iter) { \
1604 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
1607#define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)
1609#define MICRO_STORE_ONE(iter) \
1610 if (unroll_factor > iter) { \
1611 bload<DataMapper, Packet, Index, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
1612 bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
1613 res.template storePacketBlock<Packet,accRows>(row + iter*accCols, 0, acc); \
1616#define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)
1618template<
int unroll_factor,
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1619EIGEN_STRONG_INLINE
void gemm_unrolled_iteration(
1620 const DataMapper& res,
1621 const Scalar* lhs_base,
1622 const Scalar* rhs_base,
1626 const Packet& pAlpha)
1628 const Scalar* rhs_ptr = rhs_base;
1629 const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
1630 PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
1631 PacketBlock<Packet,accRows> acc;
1637 for(; k + PEEL <= depth; k+= PEEL)
1639 EIGEN_POWER_PREFETCH(rhs_ptr);
1643 for(; k < depth; k++)
1649 row += unroll_factor*accCols;
1652template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1653EIGEN_ALWAYS_INLINE
void gemm_cols(
1654 const DataMapper& res,
1655 const Scalar* blockA,
1656 const Scalar* blockB,
1665 Index remaining_rows,
1666 const Packet& pAlpha,
1667 const Packet& pMask)
1669 const DataMapper res3 = res.getSubMapper(0, col);
1671 const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
1672 const Scalar* lhs_base = blockA + accCols*offsetA;
1676 while(row + MAX_UNROLL*accCols <= rows) {
1677 gemm_unrolled_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1679 switch( (rows-row)/accCols ) {
1682 gemm_unrolled_iteration<7, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1687 gemm_unrolled_iteration<6, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1692 gemm_unrolled_iteration<5, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1697 gemm_unrolled_iteration<4, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1702 gemm_unrolled_iteration<3, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1707 gemm_unrolled_iteration<2, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1712 gemm_unrolled_iteration<1, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1720 if(remaining_rows > 0)
1722 gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, blockA, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);
1726template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accCols>
1727EIGEN_STRONG_INLINE
void gemm_extra_cols(
1728 const DataMapper& res,
1729 const Scalar* blockA,
1730 const Scalar* blockB,
1739 Index remaining_rows,
1740 const Packet& pAlpha,
1741 const Packet& pMask)
1743 for (; col < cols; col++) {
1744 gemm_cols<Scalar, Packet, DataMapper, Index, 1, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1751template<
typename Scalar,
typename Index,
typename Packet,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols>
1752EIGEN_STRONG_INLINE
void gemm(
const DataMapper& res,
const Scalar* blockA,
const Scalar* blockB,
Index rows,
Index depth,
Index cols, Scalar alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
1754 const Index remaining_rows = rows % accCols;
1756 if( strideA == -1 ) strideA = depth;
1757 if( strideB == -1 ) strideB = depth;
1759 const Packet pAlpha = pset1<Packet>(alpha);
1760 const Packet pMask = bmask<Packet>((
const int)(remaining_rows));
1763 for(; col + accRows <= cols; col += accRows)
1765 gemm_cols<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1768 gemm_extra_cols<Scalar, Packet, DataMapper, Index, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1771#define accColsC (accCols / 2)
1772#define advanceRows ((LhsIsReal) ? 1 : 2)
1773#define advanceCols ((RhsIsReal) ? 1 : 2)
1776#define PEEL_COMPLEX 3
1777#define PEEL_COMPLEX_ROW 3
1779#define MICRO_COMPLEX_UNROLL_PEEL(func) \
1780 func(0) func(1) func(2) func(3)
1782#define MICRO_COMPLEX_ZERO_PEEL(peel) \
1783 if ((PEEL_COMPLEX_ROW > peel) && (peel != 0)) { \
1784 bsetzero<Scalar, Packet, accRows>(accReal##peel); \
1785 bsetzero<Scalar, Packet, accRows>(accImag##peel); \
1787 EIGEN_UNUSED_VARIABLE(accReal##peel); \
1788 EIGEN_UNUSED_VARIABLE(accImag##peel); \
1791#define MICRO_COMPLEX_ZERO_PEEL_ROW \
1792 MICRO_COMPLEX_UNROLL_PEEL(MICRO_COMPLEX_ZERO_PEEL);
1794#define MICRO_COMPLEX_WORK_PEEL(peel) \
1795 if (PEEL_COMPLEX_ROW > peel) { \
1796 pbroadcastN_old<Packet,accRows>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1797 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
1798 pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##peel, &accImag##peel, lhs_ptr_real + (remaining_rows * peel), lhs_ptr_imag + (remaining_rows * peel), rhsV##peel, rhsVi##peel); \
1800 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1801 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
1804#define MICRO_COMPLEX_WORK_PEEL_ROW \
1805 Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4]; \
1806 Packet rhsVi0[4], rhsVi1[4], rhsVi2[4], rhsVi3[4]; \
1807 MICRO_COMPLEX_UNROLL_PEEL(MICRO_COMPLEX_WORK_PEEL); \
1808 lhs_ptr_real += (remaining_rows * PEEL_COMPLEX_ROW); \
1809 if(!LhsIsReal) lhs_ptr_imag += (remaining_rows * PEEL_COMPLEX_ROW); \
1810 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag); \
1811 rhs_ptr_real += (accRows * PEEL_COMPLEX_ROW); \
1812 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_ROW); \
1813 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1815#define MICRO_COMPLEX_ADD_PEEL(peel, sum) \
1816 if (PEEL_COMPLEX_ROW > peel) { \
1817 for (Index i = 0; i < accRows; i++) { \
1818 accReal##sum.packet[i] += accReal##peel.packet[i]; \
1819 accImag##sum.packet[i] += accImag##peel.packet[i]; \
1823#define MICRO_COMPLEX_ADD_PEEL_ROW \
1824 MICRO_COMPLEX_ADD_PEEL(2, 0) MICRO_COMPLEX_ADD_PEEL(3, 1) \
1825 MICRO_COMPLEX_ADD_PEEL(1, 0)
1827template<
typename Scalar,
typename Packet,
typename Index, const Index accRows,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1828EIGEN_ALWAYS_INLINE
void MICRO_COMPLEX_EXTRA_ROW(
1829 const Scalar* &lhs_ptr_real,
const Scalar* &lhs_ptr_imag,
1830 const Scalar* &rhs_ptr_real,
const Scalar* &rhs_ptr_imag,
1831 PacketBlock<Packet,accRows> &accReal, PacketBlock<Packet,accRows> &accImag)
1833 Packet rhsV[4], rhsVi[4];
1834 pbroadcastN_old<Packet,accRows>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1835 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
1836 pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
1837 lhs_ptr_real += remaining_rows;
1838 if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
1839 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1840 rhs_ptr_real += accRows;
1841 if(!RhsIsReal) rhs_ptr_imag += accRows;
1842 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1845template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1846EIGEN_ALWAYS_INLINE
void gemm_unrolled_complex_row_iteration(
1847 const DataMapper& res,
1848 const Scalar* lhs_base,
1849 const Scalar* rhs_base,
1858 const Packet& pAlphaReal,
1859 const Packet& pAlphaImag,
1860 const Packet& pMask)
1862 const Scalar* rhs_ptr_real = rhs_base;
1863 const Scalar* rhs_ptr_imag = NULL;
1864 if(!RhsIsReal) rhs_ptr_imag = rhs_base + accRows*strideB;
1865 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1866 const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;
1867 const Scalar* lhs_ptr_imag = NULL;
1868 if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;
1869 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1870 PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3;
1871 PacketBlock<Packet,accRows> taccReal, taccImag;
1872 PacketBlock<Packetc,accRows> acc0, acc1;
1873 PacketBlock<Packetc,accRows*2> tRes;
1875 bsetzero<Scalar, Packet, accRows>(accReal0);
1876 bsetzero<Scalar, Packet, accRows>(accImag0);
1878 Index remaining_depth = (col + quad_traits<Scalar>::rows < cols) ? depth : (depth & -quad_traits<Scalar>::rows);
1880 if (remaining_depth >= PEEL_COMPLEX_ROW) {
1881 MICRO_COMPLEX_ZERO_PEEL_ROW
1884 EIGEN_POWER_PREFETCH(rhs_ptr_real);
1886 EIGEN_POWER_PREFETCH(rhs_ptr_imag);
1888 EIGEN_POWER_PREFETCH(lhs_ptr_real);
1890 EIGEN_POWER_PREFETCH(lhs_ptr_imag);
1892 MICRO_COMPLEX_WORK_PEEL_ROW
1893 }
while ((k += PEEL_COMPLEX_ROW) + PEEL_COMPLEX_ROW <= remaining_depth);
1894 MICRO_COMPLEX_ADD_PEEL_ROW
1896 for(; k < remaining_depth; k++)
1898 MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, remaining_rows>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal0, accImag0);
1901 if ((remaining_depth == depth) && (rows >= accCols))
1903 bload<DataMapper, Packetc, Index, accColsC, ColMajor, true, accRows>(tRes, res, row, 0);
1904 bscalec<Packet,accRows>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);
1905 bcouple<Packet, Packetc, accRows>(taccReal, taccImag, tRes, acc0, acc1);
1906 res.template storePacketBlock<Packetc,accRows>(row + 0, 0, acc0);
1907 res.template storePacketBlock<Packetc,accRows>(row + accColsC, 0, acc1);
1909 for(; k < depth; k++)
1911 Packet rhsV[4], rhsVi[4];
1912 pbroadcastN_old<Packet,accRows>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1913 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
1914 pgerc<accRows, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, remaining_rows>(&accReal0, &accImag0, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
1915 lhs_ptr_real += remaining_rows;
1916 if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
1917 rhs_ptr_real += accRows;
1918 if(!RhsIsReal) rhs_ptr_imag += accRows;
1921 bscalec<Packet,accRows>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag);
1922 bcouple_common<Packet, Packetc, accRows>(taccReal, taccImag, acc0, acc1);
1924 if ((
sizeof(Scalar) ==
sizeof(
float)) && (remaining_rows == 1))
1926 for(
Index j = 0; j < accRows; j++) {
1927 res(row + 0, j) += pfirst<Packetc>(acc0.packet[j]);
1930 for(
Index j = 0; j < accRows; j++) {
1931 PacketBlock<Packetc,1> acc2;
1932 acc2.packet[0] = res.template loadPacket<Packetc>(row + 0, j) + acc0.packet[j];
1933 res.template storePacketBlock<Packetc,1>(row + 0, j, acc2);
1934 if(remaining_rows > accColsC) {
1935 res(row + accColsC, j) += pfirst<Packetc>(acc1.packet[j]);
1942template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1943EIGEN_ALWAYS_INLINE
void gemm_complex_extra_row(
1944 const DataMapper& res,
1945 const Scalar* lhs_base,
1946 const Scalar* rhs_base,
1955 Index remaining_rows,
1956 const Packet& pAlphaReal,
1957 const Packet& pAlphaImag,
1958 const Packet& pMask)
1960 switch(remaining_rows) {
1962 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 1>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1965 if (
sizeof(Scalar) ==
sizeof(
float)) {
1966 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 2>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1970 if (
sizeof(Scalar) ==
sizeof(
float)) {
1971 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 3>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1977#define MICRO_COMPLEX_UNROLL(func) \
1978 func(0) func(1) func(2) func(3)
1980#define MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
1981 MICRO_COMPLEX_UNROLL(func2); \
1982 func(0,peel) func(1,peel) func(2,peel) func(3,peel)
1984#define MICRO_COMPLEX_LOAD_ONE(iter) \
1985 if (unroll_factor > iter) { \
1986 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
1988 lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter + imag_delta); \
1990 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
1992 lhs_ptr_real##iter += accCols; \
1994 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
1995 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
1998#define MICRO_COMPLEX_WORK_ONE4(iter, peel) \
1999 if (unroll_factor > iter) { \
2000 pgerc_common<accRows, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
2003#define MICRO_COMPLEX_TYPE_PEEL4(func, func2, peel) \
2004 if (PEEL_COMPLEX > peel) { \
2005 Packet lhsV0, lhsV1, lhsV2, lhsV3; \
2006 Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3; \
2007 pbroadcastN_old<Packet,accRows>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
2009 pbroadcastN_old<Packet,accRows>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
2011 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2013 MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
2015 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
2016 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2019#define MICRO_COMPLEX_UNROLL_TYPE_PEEL(M, func, func1, func2) \
2020 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M]; \
2021 Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M]; \
2022 func(func1,func2,0); func(func1,func2,1); \
2023 func(func1,func2,2); func(func1,func2,3);
2025#define MICRO_COMPLEX_UNROLL_TYPE_ONE(M, func, func1, func2) \
2026 Packet rhsV0[M], rhsVi0[M];\
2027 func(func1,func2,0);
2029#define MICRO_COMPLEX_ONE_PEEL4 \
2030 MICRO_COMPLEX_UNROLL_TYPE_PEEL(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2031 rhs_ptr_real += (accRows * PEEL_COMPLEX); \
2032 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX);
2034#define MICRO_COMPLEX_ONE4 \
2035 MICRO_COMPLEX_UNROLL_TYPE_ONE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2036 rhs_ptr_real += accRows; \
2037 if(!RhsIsReal) rhs_ptr_imag += accRows;
2039#define MICRO_COMPLEX_DST_PTR_ONE(iter) \
2040 if (unroll_factor > iter) { \
2041 bsetzero<Scalar, Packet, accRows>(accReal##iter); \
2042 bsetzero<Scalar, Packet, accRows>(accImag##iter); \
2044 EIGEN_UNUSED_VARIABLE(accReal##iter); \
2045 EIGEN_UNUSED_VARIABLE(accImag##iter); \
2048#define MICRO_COMPLEX_DST_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_DST_PTR_ONE)
2050#define MICRO_COMPLEX_SRC_PTR_ONE(iter) \
2051 if (unroll_factor > iter) { \
2052 lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols; \
2054 EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
2057#define MICRO_COMPLEX_SRC_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)
2059#define MICRO_COMPLEX_PREFETCH_ONE(iter) \
2060 if (unroll_factor > iter) { \
2061 EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
2064#define MICRO_COMPLEX_PREFETCH MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)
2066#define MICRO_COMPLEX_STORE_ONE(iter) \
2067 if (unroll_factor > iter) { \
2068 bload<DataMapper, Packetc, Index, accColsC, ColMajor, true, accRows>(tRes, res, row + iter*accCols, 0); \
2069 bscalec<Packet,accRows>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
2070 bcouple<Packet, Packetc, accRows>(taccReal, taccImag, tRes, acc0, acc1); \
2071 res.template storePacketBlock<Packetc,accRows>(row + iter*accCols + 0, 0, acc0); \
2072 res.template storePacketBlock<Packetc,accRows>(row + iter*accCols + accColsC, 0, acc1); \
2075#define MICRO_COMPLEX_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_STORE_ONE)
2077template<
int unroll_factor,
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2078EIGEN_STRONG_INLINE
void gemm_complex_unrolled_iteration(
2079 const DataMapper& res,
2080 const Scalar* lhs_base,
2081 const Scalar* rhs_base,
2086 const Packet& pAlphaReal,
2087 const Packet& pAlphaImag)
2089 const Scalar* rhs_ptr_real = rhs_base;
2090 const Scalar* rhs_ptr_imag = NULL;
2091 const Index imag_delta = accCols*strideA;
2093 rhs_ptr_imag = rhs_base + accRows*strideB;
2095 EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
2097 const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_real1 = NULL;
2098 const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_real3 = NULL;
2099 PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1;
2100 PacketBlock<Packet,accRows> accReal2, accImag2, accReal3, accImag3;
2101 PacketBlock<Packet,accRows> taccReal, taccImag;
2102 PacketBlock<Packetc,accRows> acc0, acc1;
2103 PacketBlock<Packetc,accRows*2> tRes;
2105 MICRO_COMPLEX_SRC_PTR
2106 MICRO_COMPLEX_DST_PTR
2109 for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)
2111 EIGEN_POWER_PREFETCH(rhs_ptr_real);
2113 EIGEN_POWER_PREFETCH(rhs_ptr_imag);
2115 MICRO_COMPLEX_PREFETCH
2116 MICRO_COMPLEX_ONE_PEEL4
2118 for(; k < depth; k++)
2124 row += unroll_factor*accCols;
2127template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2128EIGEN_ALWAYS_INLINE
void gemm_complex_cols(
2129 const DataMapper& res,
2130 const Scalar* blockA,
2131 const Scalar* blockB,
2140 Index remaining_rows,
2141 const Packet& pAlphaReal,
2142 const Packet& pAlphaImag,
2143 const Packet& pMask)
2145 const DataMapper res3 = res.getSubMapper(0, col);
2147 const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
2148 const Scalar* lhs_base = blockA + accCols*offsetA;
2151#define MAX_COMPLEX_UNROLL 3
2152 while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {
2153 gemm_complex_unrolled_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2155 switch( (rows-row)/accCols ) {
2156#if MAX_COMPLEX_UNROLL > 4
2158 gemm_complex_unrolled_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2161#if MAX_COMPLEX_UNROLL > 3
2163 gemm_complex_unrolled_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2166#if MAX_COMPLEX_UNROLL > 2
2168 gemm_complex_unrolled_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2171#if MAX_COMPLEX_UNROLL > 1
2173 gemm_complex_unrolled_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2179#undef MAX_COMPLEX_UNROLL
2181 if(remaining_rows > 0)
2183 gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2187template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2188EIGEN_STRONG_INLINE
void gemm_complex_extra_cols(
2189 const DataMapper& res,
2190 const Scalar* blockA,
2191 const Scalar* blockB,
2200 Index remaining_rows,
2201 const Packet& pAlphaReal,
2202 const Packet& pAlphaImag,
2203 const Packet& pMask)
2205 for (; col < cols; col++) {
2206 gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, Index, 1, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2210template<
typename LhsScalar,
typename RhsScalar,
typename Scalarc,
typename Scalar,
typename Index,
typename Packet,
typename Packetc,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2211EIGEN_STRONG_INLINE
void gemm_complex(
const DataMapper& res,
const LhsScalar* blockAc,
const RhsScalar* blockBc,
Index rows,
Index depth,
Index cols, Scalarc alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
2213 const Index remaining_rows = rows % accCols;
2215 if( strideA == -1 ) strideA = depth;
2216 if( strideB == -1 ) strideB = depth;
2218 const Packet pAlphaReal = pset1<Packet>(alpha.real());
2219 const Packet pAlphaImag = pset1<Packet>(alpha.imag());
2220 const Packet pMask = bmask<Packet>((
const int)(remaining_rows));
2222 const Scalar* blockA = (Scalar *) blockAc;
2223 const Scalar* blockB = (Scalar *) blockBc;
2226 for(; col + accRows <= cols; col += accRows)
2228 gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2231 gemm_complex_extra_cols<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2241template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2242struct gemm_pack_lhs<double,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2244 void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2247template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2248void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
2249 ::operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2251 dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, true> pack;
2252 pack(blockA, lhs, depth, rows, stride, offset);
2255template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2256struct gemm_pack_lhs<double,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2258 void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2261template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2262void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
2263 ::operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2265 dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, true> pack;
2266 pack(blockA, lhs, depth, rows, stride, offset);
2269#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2270template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2271struct gemm_pack_rhs<double,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2273 void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2276template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2277void gemm_pack_rhs<double, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
2278 ::operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2280 dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, false> pack;
2281 pack(blockB, rhs, depth, cols, stride, offset);
2284template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2285struct gemm_pack_rhs<double,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2287 void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2290template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2291void gemm_pack_rhs<double, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
2292 ::operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2294 dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, false> pack;
2295 pack(blockB, rhs, depth, cols, stride, offset);
2299template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2300struct gemm_pack_lhs<float,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2302 void operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2305template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2306void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
2307 ::operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2309 dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, true> pack;
2310 pack(blockA, lhs, depth, rows, stride, offset);
2313template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2314struct gemm_pack_lhs<float,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2316 void operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2319template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2320void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
2321 ::operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2323 dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, true> pack;
2324 pack(blockA, lhs, depth, rows, stride, offset);
2327template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2328struct gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2330 void operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2333template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2334void gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2335 ::operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2337 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, true> pack;
2338 pack(blockA, lhs, depth, rows, stride, offset);
2341template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2342struct gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2344 void operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2347template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2348void gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2349 ::operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2351 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, true> pack;
2352 pack(blockA, lhs, depth, rows, stride, offset);
2355#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2356template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2357struct gemm_pack_rhs<float,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2359 void operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2362template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2363void gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
2364 ::operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2366 dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, false> pack;
2367 pack(blockB, rhs, depth, cols, stride, offset);
2370template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2371struct gemm_pack_rhs<float,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2373 void operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2376template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2377void gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
2378 ::operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2380 dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, false> pack;
2381 pack(blockB, rhs, depth, cols, stride, offset);
2385template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2386struct gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2388 void operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2391template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2392void gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2393 ::operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2395 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, false> pack;
2396 pack(blockB, rhs, depth, cols, stride, offset);
2399template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2400struct gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2402 void operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2405template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2406void gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2407 ::operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2409 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, false> pack;
2410 pack(blockB, rhs, depth, cols, stride, offset);
2413template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2414struct gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2416 void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2419template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2420void gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2421 ::operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2423 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, true> pack;
2424 pack(blockA, lhs, depth, rows, stride, offset);
2427template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2428struct gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2430 void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2433template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2434void gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2435 ::operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2437 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, true> pack;
2438 pack(blockA, lhs, depth, rows, stride, offset);
2441template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2442struct gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2444 void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2447template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2448void gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2449 ::operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2451 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, false> pack;
2452 pack(blockB, rhs, depth, cols, stride, offset);
2455template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2456struct gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2458 void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2461template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2462void gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2463 ::operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2465 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, false> pack;
2466 pack(blockB, rhs, depth, cols, stride, offset);
2470template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2471struct gebp_kernel<float, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2473 typedef typename quad_traits<float>::vectortype Packet;
2474 typedef typename quad_traits<float>::rhstype RhsPacket;
2476 void operator()(
const DataMapper& res,
const float* blockA,
const float* blockB,
2481template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2482void gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2483 ::operator()(
const DataMapper& res,
const float* blockA,
const float* blockB,
2487 const Index accRows = quad_traits<float>::rows;
2488 const Index accCols = quad_traits<float>::size;
2489 void (*gemm_function)(
const DataMapper&,
const float*,
const float*,
Index,
Index,
Index, float,
Index,
Index,
Index,
Index);
2491 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2493 gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2494 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2495 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2496 gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2499 gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2502 gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2504 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2507template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2508struct gebp_kernel<std::complex<float>, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2510 typedef Packet4f Packet;
2511 typedef Packet2cf Packetc;
2512 typedef Packet4f RhsPacket;
2514 void operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const std::complex<float>* blockB,
2519template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2520void gebp_kernel<std::complex<float>, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2521 ::operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const std::complex<float>* blockB,
2525 const Index accRows = quad_traits<float>::rows;
2526 const Index accCols = quad_traits<float>::size;
2527 void (*gemm_function)(
const DataMapper&,
const std::complex<float>*,
const std::complex<float>*,
2530 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2532 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2533 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2534 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2535 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2538 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2541 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2543 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2546template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2547struct gebp_kernel<float, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2549 typedef Packet4f Packet;
2550 typedef Packet2cf Packetc;
2551 typedef Packet4f RhsPacket;
2553 void operator()(
const DataMapper& res,
const float* blockA,
const std::complex<float>* blockB,
2558template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2559void gebp_kernel<float, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2560 ::operator()(
const DataMapper& res,
const float* blockA,
const std::complex<float>* blockB,
2564 const Index accRows = quad_traits<float>::rows;
2565 const Index accCols = quad_traits<float>::size;
2566 void (*gemm_function)(
const DataMapper&,
const float*,
const std::complex<float>*,
2568 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2570 gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2571 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2572 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2573 gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2576 gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2579 gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2581 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2584template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2585struct gebp_kernel<std::complex<float>, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2587 typedef Packet4f Packet;
2588 typedef Packet2cf Packetc;
2589 typedef Packet4f RhsPacket;
2591 void operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const float* blockB,
2596template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2597void gebp_kernel<std::complex<float>, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2598 ::operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const float* blockB,
2602 const Index accRows = quad_traits<float>::rows;
2603 const Index accCols = quad_traits<float>::size;
2604 void (*gemm_function)(
const DataMapper&,
const std::complex<float>*,
const float*,
2606 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2608 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2609 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2610 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2611 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2614 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2617 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2619 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2622template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2623struct gebp_kernel<double, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2625 typedef typename quad_traits<double>::vectortype Packet;
2626 typedef typename quad_traits<double>::rhstype RhsPacket;
2628 void operator()(
const DataMapper& res,
const double* blockA,
const double* blockB,
2633template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2634void gebp_kernel<double, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2635 ::operator()(
const DataMapper& res,
const double* blockA,
const double* blockB,
2639 const Index accRows = quad_traits<double>::rows;
2640 const Index accCols = quad_traits<double>::size;
2641 void (*gemm_function)(
const DataMapper&,
const double*,
const double*,
Index,
Index,
Index, double,
Index,
Index,
Index,
Index);
2643 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2645 gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2646 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2647 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2648 gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2651 gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2654 gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2656 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2659template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2660struct gebp_kernel<std::complex<double>, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2662 typedef quad_traits<double>::vectortype Packet;
2663 typedef Packet1cd Packetc;
2664 typedef quad_traits<double>::rhstype RhsPacket;
2666 void operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2671template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2672void gebp_kernel<std::complex<double>, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2673 ::operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2677 const Index accRows = quad_traits<double>::rows;
2678 const Index accCols = quad_traits<double>::size;
2679 void (*gemm_function)(
const DataMapper&,
const std::complex<double>*,
const std::complex<double>*,
2681 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2683 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2684 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2685 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2686 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2689 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2692 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2694 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2697template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2698struct gebp_kernel<std::complex<double>, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2700 typedef quad_traits<double>::vectortype Packet;
2701 typedef Packet1cd Packetc;
2702 typedef quad_traits<double>::rhstype RhsPacket;
2704 void operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const double* blockB,
2709template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2710void gebp_kernel<std::complex<double>, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2711 ::operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const double* blockB,
2715 const Index accRows = quad_traits<double>::rows;
2716 const Index accCols = quad_traits<double>::size;
2717 void (*gemm_function)(
const DataMapper&,
const std::complex<double>*,
const double*,
2719 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2721 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2722 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2723 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2724 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2727 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2730 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2732 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2735template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2736struct gebp_kernel<double, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2738 typedef quad_traits<double>::vectortype Packet;
2739 typedef Packet1cd Packetc;
2740 typedef quad_traits<double>::rhstype RhsPacket;
2742 void operator()(
const DataMapper& res,
const double* blockA,
const std::complex<double>* blockB,
2747template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2748void gebp_kernel<double, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2749 ::operator()(
const DataMapper& res,
const double* blockA,
const std::complex<double>* blockB,
2753 const Index accRows = quad_traits<double>::rows;
2754 const Index accCols = quad_traits<double>::size;
2755 void (*gemm_function)(
const DataMapper&,
const double*,
const std::complex<double>*,
2757 #if defined(EIGEN_ALTIVEC_MMA_ONLY)
2759 gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2760 #elif defined(EIGEN_ALTIVEC_MMA_DYNAMIC_DISPATCH)
2761 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2762 gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2765 gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2768 gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2770 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
@ ColMajor
Definition Constants.h:319
@ RowMajor
Definition Constants.h:321
Namespace containing all symbols from the Eigen library.
Definition B01_Experimental.dox:1
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_real_op< typename Derived::Scalar >, const Derived > real(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_imag_op< typename Derived::Scalar >, const Derived > imag(const Eigen::ArrayBase< Derived > &x)