Eigen  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
Eigen::internal::unrolls::transB< Scalar > Class Template Reference

#include <Eigen/src/Core/arch/AVX512/TrsmKernel.h>

Detailed Description

template<typename Scalar>
class Eigen::internal::unrolls::transB< Scalar >

Unrolls for copyBToRowMajor

Idea: 1) Load a block of right-hand sides to registers (using loadB). 2) Convert the block from column-major to row-major (transposeLxL) 3) Store the blocks from register either to a temp array (toTemp == true), or back to B (toTemp == false).

We use at most EIGEN_AVX_MAX_NUM_ACC avx registers to store the blocks of B. The remaining registers are used as temps for transposing.

Blocks will be of size Lx{U1,U2,U3}. packetIndexOffset is used to index between these subblocks For fp32, PacketSize = 2*EIGEN_AVX_MAX_NUM_ROW, so we reinterpret packets as packets half the size (zmm -> ymm).

Static Public Member Functions

template<int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadB (Scalar *B_arr, int64_t LDB, PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > &ymm, int64_t remM_=0)
 
template<int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadBBlock (Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_, PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > &ymm, int64_t remM_=0)
 
template<int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remK, bool remM>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeB (Scalar *B_arr, int64_t LDB, PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > &ymm, int64_t rem_=0)
 
template<int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remK_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeBBlock (Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_, PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > &ymm, int64_t remM_=0)
 
template<int64_t unrollN, bool toTemp, bool remM>
static EIGEN_ALWAYS_INLINE void transB_kernel (Scalar *B_arr, int64_t LDB, Scalar *B_temp, int64_t LDB_, PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > &ymm, int64_t remM_=0)
 

Member Function Documentation

◆ aux_loadB()

template<typename Scalar>
template<int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::transB< Scalar >::aux_loadB ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > & ymm,
int64_t remM_ = 0 )
inlinestatic

aux_loadB

1-D unroll for(startN = 0; startN < endN; startN++)

◆ aux_loadBBlock()

template<typename Scalar>
template<int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remN_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::transB< Scalar >::aux_loadBBlock ( Scalar * B_arr,
int64_t LDB,
Scalar * B_temp,
int64_t LDB_,
PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > & ymm,
int64_t remM_ = 0 )
inlinestatic

aux_loadBBlock

1-D unroll for(startN = 0; startN < endN; startN += EIGEN_AVX_MAX_NUM_ROW)

◆ aux_storeB()

template<typename Scalar>
template<int64_t endN, int64_t counter, int64_t packetIndexOffset, bool remK, bool remM>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::transB< Scalar >::aux_storeB ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > & ymm,
int64_t rem_ = 0 )
inlinestatic

aux_storeB

1-D unroll for(startN = 0; startN < endN; startN++)

◆ aux_storeBBlock()

template<typename Scalar>
template<int64_t endN, int64_t counter, bool toTemp, bool remM, int64_t remK_>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::transB< Scalar >::aux_storeBBlock ( Scalar * B_arr,
int64_t LDB,
Scalar * B_temp,
int64_t LDB_,
PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > & ymm,
int64_t remM_ = 0 )
inlinestatic

aux_storeBBlock

1-D unroll for(startN = 0; startN < endN; startN += EIGEN_AVX_MAX_NUM_ROW)

◆ transB_kernel()

template<typename Scalar>
template<int64_t unrollN, bool toTemp, bool remM>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::transB< Scalar >::transB_kernel ( Scalar * B_arr,
int64_t LDB,
Scalar * B_temp,
int64_t LDB_,
PacketBlock< vecHalf, EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS > & ymm,
int64_t remM_ = 0 )
inlinestatic

Unrolls needed for each case:

  • AVX512 fp32 48 32 16 8 4 2 1
  • AVX512 fp64 24 16 8 4 2 1

For fp32 L and U1 are 1:2 so for U3/U2 cases the loads/stores need to be split up.


The documentation for this class was generated from the following file: