Eigen  5.0.1-dev+284dcc12
 
Loading...
Searching...
No Matches
Eigen::internal::unrolls::trsm< Scalar > Class Template Reference

#include <Eigen/src/Core/arch/AVX512/TrsmKernel.h>

Detailed Description

template<typename Scalar>
class Eigen::internal::unrolls::trsm< Scalar >

Unrolls for triSolveKernel

Idea: 1) Load a block of right-hand sides to registers in RHSInPacket (using loadRHS). 2) Do triangular solve with RHSInPacket and a small block of A (triangular matrix) stored in AInPacket (using triSolveMicroKernel). 3) Store final results (in avx registers) back into memory (using storeRHS).

RHSInPacket uses at most EIGEN_AVX_MAX_NUM_ACC avx registers and AInPacket uses at most EIGEN_AVX_MAX_NUM_ROW registers.

Static Public Member Functions

template<int64_t currM, int64_t endK, int64_t counter>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0 &&currM >=0)> aux_divRHSByDiag (PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 
template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_loadRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0)
 
template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_storeRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0)
 
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t counter, int64_t numK>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_triSolveMicroKernel (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t initM, int64_t endM, int64_t endK, int64_t counter, int64_t currentM>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> aux_updateRHS (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 
template<int64_t currM, int64_t endK>
static EIGEN_ALWAYS_INLINE void divRHSByDiag (PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 
template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
static EIGEN_ALWAYS_INLINE void loadRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0)
 
template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
static EIGEN_ALWAYS_INLINE void storeRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0)
 
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t numK>
static EIGEN_ALWAYS_INLINE void triSolveMicroKernel (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t startM, int64_t endM, int64_t endK, int64_t currentM>
static EIGEN_ALWAYS_INLINE void updateRHS (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket)
 

Member Function Documentation

◆ aux_divRHSByDiag()

template<typename Scalar>
template<int64_t currM, int64_t endK, int64_t counter>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0 &&currM >=0)> Eigen::internal::unrolls::trsm< Scalar >::aux_divRHSByDiag ( PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

aux_divRHSByDiag

currM may be -1, (currM >=0) in enable_if checks for this

1-D unroll for(startK = 0; startK < endK; startK++)

◆ aux_loadRHS()

template<typename Scalar>
template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::trsm< Scalar >::aux_loadRHS ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
int64_t rem = 0 )
inlinestatic

aux_loadRHS

2-D unroll for(startM = 0; startM < endM; startM++) for(startK = 0; startK < endK; startK++)

◆ aux_storeRHS()

template<typename Scalar>
template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::trsm< Scalar >::aux_storeRHS ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
int64_t rem = 0 )
inlinestatic

aux_storeRHS

2-D unroll for(startM = 0; startM < endM; startM++) for(startK = 0; startK < endK; startK++)

◆ aux_triSolveMicroKernel()

template<typename Scalar>
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t counter, int64_t numK>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::trsm< Scalar >::aux_triSolveMicroKernel ( Scalar * A_arr,
int64_t LDA,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

aux_triSolverMicroKernel

1-D unroll for(startM = 0; startM < endM; startM++)

◆ aux_updateRHS()

template<typename Scalar>
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t initM, int64_t endM, int64_t endK, int64_t counter, int64_t currentM>
static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> Eigen::internal::unrolls::trsm< Scalar >::aux_updateRHS ( Scalar * A_arr,
int64_t LDA,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

aux_updateRHS

2-D unroll for(startM = initM; startM < endM; startM++) for(startK = 0; startK < endK; startK++)

◆ divRHSByDiag()

template<typename Scalar>
template<int64_t currM, int64_t endK>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::trsm< Scalar >::divRHSByDiag ( PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

Only used if Triangular matrix has non-unit diagonal values

◆ loadRHS()

template<typename Scalar>
template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::trsm< Scalar >::loadRHS ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
int64_t rem = 0 )
inlinestatic

Load endMxendK block of B to RHSInPacket Masked loads are used for cases where endK is not a multiple of PacketSize

◆ storeRHS()

template<typename Scalar>
template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::trsm< Scalar >::storeRHS ( Scalar * B_arr,
int64_t LDB,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
int64_t rem = 0 )
inlinestatic

Load endMxendK block of B to RHSInPacket Masked loads are used for cases where endK is not a multiple of PacketSize

◆ triSolveMicroKernel()

template<typename Scalar>
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t numK>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::trsm< Scalar >::triSolveMicroKernel ( Scalar * A_arr,
int64_t LDA,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

endM: dimension of A. 1 <= endM <= EIGEN_AVX_MAX_NUM_ROW numK: number of avx registers to use for each row of B (ex fp32: 48 rhs => 3 avx reg used). 1 <= endK <= 3. isFWDSolve: true => forward substitution, false => backwards substitution isUnitDiag: true => triangular matrix has unit diagonal.

◆ updateRHS()

template<typename Scalar>
template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t startM, int64_t endM, int64_t endK, int64_t currentM>
static EIGEN_ALWAYS_INLINE void Eigen::internal::unrolls::trsm< Scalar >::updateRHS ( Scalar * A_arr,
int64_t LDA,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > & RHSInPacket,
PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > & AInPacket )
inlinestatic

Update right-hand sides (stored in avx registers) Traversing along the column A_{i,currentM}, where currentM <= i <= endM, and broadcasting each value to AInPacket.


The documentation for this class was generated from the following file: