template<typename Scalar>
class Eigen::internal::unrolls::trsm< Scalar >
Unrolls for triSolveKernel
Idea: 1) Load a block of right-hand sides to registers in RHSInPacket (using loadRHS). 2) Do triangular solve with RHSInPacket and a small block of A (triangular matrix) stored in AInPacket (using triSolveMicroKernel). 3) Store final results (in avx registers) back into memory (using storeRHS).
RHSInPacket uses at most EIGEN_AVX_MAX_NUM_ACC avx registers and AInPacket uses at most EIGEN_AVX_MAX_NUM_ROW registers.
|
| template<int64_t currM, int64_t endK, int64_t counter> |
| static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0 &&currM >=0)> | aux_divRHSByDiag (PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |
| template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem> |
| static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> | aux_loadRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0) |
| |
| template<bool isFWDSolve, int64_t endM, int64_t endK, int64_t counter, bool krem> |
| static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> | aux_storeRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0) |
| |
| template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t counter, int64_t numK> |
| static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> | aux_triSolveMicroKernel (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |
| template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t initM, int64_t endM, int64_t endK, int64_t counter, int64_t currentM> |
| static EIGEN_ALWAYS_INLINE std::enable_if_t<(counter > 0)> | aux_updateRHS (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |
| template<int64_t currM, int64_t endK> |
| static EIGEN_ALWAYS_INLINE void | divRHSByDiag (PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |
| template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false> |
| static EIGEN_ALWAYS_INLINE void | loadRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0) |
| |
| template<bool isFWDSolve, int64_t endM, int64_t endK, bool krem = false> |
| static EIGEN_ALWAYS_INLINE void | storeRHS (Scalar *B_arr, int64_t LDB, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, int64_t rem=0) |
| |
| template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t endM, int64_t numK> |
| static EIGEN_ALWAYS_INLINE void | triSolveMicroKernel (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |
| template<bool isARowMajor, bool isFWDSolve, bool isUnitDiag, int64_t startM, int64_t endM, int64_t endK, int64_t currentM> |
| static EIGEN_ALWAYS_INLINE void | updateRHS (Scalar *A_arr, int64_t LDA, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ACC > &RHSInPacket, PacketBlock< vec, EIGEN_AVX_MAX_NUM_ROW > &AInPacket) |
| |