Mentions légales du service

Skip to content
Snippets Groups Projects
Commit e5005374 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

coreblas: Update the getrf kernels to take into account some blocked updates

parent dac5d83e
No related branches found
No related tags found
1 merge request!405GETRF: Add a blocked version of the LU partial pivoting algorithm
......@@ -12,7 +12,7 @@
* @version 1.3.0
* @author Mathieu Faverge
* @author Matthieu Kuhn
* @date 2023-08-22
* @date 2023-09-11
* @precisions normal z -> c d s
*
*/
......@@ -20,6 +20,7 @@
#include "coreblas.h"
static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
static const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
/**
******************************************************************************
......@@ -83,13 +84,18 @@ static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
*
*/
int
CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
CORE_zgetrf_panel_diag( int m, int n, int h, int m0, int ib,
CHAMELEON_Complex64_t *A, int lda,
CHAMELEON_Complex64_t *U, int ldu,
int *IPIV, CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
{
CHAMELEON_Complex64_t *subA = A + lda * h + h;
CHAMELEON_Complex64_t alpha;
/* Compute localn, the number of columns to update inside the block */
int localnt = chameleon_ceil( chameleon_min(m, n), ib );
int localn = (( h / ib ) == (localnt-1)) ? n - ( h / ib ) * ib : ib;
if ( h > 0 ) {
CHAMELEON_Complex64_t *L, *pivrow;
......@@ -110,6 +116,12 @@ CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
A + h-1, lda );
}
if( (U != NULL) && (h%ib != 0 ) )
{
cblas_zcopy( n, pivrow, 1,
U + ((h-1)%ib), ldu );
}
/* Store the pivot */
IPIV[h-1] = pivot + 1;
......@@ -136,11 +148,30 @@ CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
if ( h < chameleon_min( m, n ) ) {
/* Applying the update */
cblas_zgeru(CblasColMajor, m-h, n-h,
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
if ( (h % ib) != 0 ) {
/* We apply a BLAS-2 operation with a single outer-product */
cblas_zgeru( CblasColMajor, m-h, localn-(h%ib),
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
}
else {
/* We apply a BLAS-3 operation with a matrix-matrix product */
const CHAMELEON_Complex64_t *L = A + (h-ib) * lda + h;
const CHAMELEON_Complex64_t *subU = U + h * ldu;
LAPACKE_zlacpy_work(
LAPACK_COL_MAJOR, 'A', ib, n - h,
subU, ldu, A + h * lda + (h-ib), lda );
cblas_zgemm( CblasColMajor,
(CBLAS_TRANSPOSE)CblasNoTrans, (CBLAS_TRANSPOSE)CblasNoTrans,
m - h, n - h, ib,
CBLAS_SADDR(mzone), L, lda,
subU, ldu,
CBLAS_SADDR( zone), subA, lda );
}
}
else {
return 0;
......@@ -225,13 +256,18 @@ CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
*
*/
int
CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0, int ib,
CHAMELEON_Complex64_t *A, int lda,
CHAMELEON_Complex64_t *U, int ldu,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
{
CHAMELEON_Complex64_t *subA = A + lda * h;
CHAMELEON_Complex64_t alpha;
/* Compute the number of columns to update inside the block */
int localnt = chameleon_ceil( n, ib );
int localn = (( h / ib ) == (localnt-1)) ? n - ( h / ib ) * ib : ib;
if ( h != 0 ) {
CHAMELEON_Complex64_t *L, *pivrow;
......@@ -266,11 +302,26 @@ CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
*/
if ( h < n ) {
/* Applying the update */
cblas_zgeru(CblasColMajor, m, n-h,
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
if ( (h % ib) != 0 ) {
/* We apply a BLAS-2 operation with a single outer-product */
cblas_zgeru( CblasColMajor, m, localn-(h%ib),
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
}
else {
/* We apply a BLAS-3 operation with a matrix-matrix product */
const CHAMELEON_Complex64_t *L = A + (h-ib) * lda;
const CHAMELEON_Complex64_t *subU = U + h * ldu;
cblas_zgemm( CblasColMajor,
(CBLAS_TRANSPOSE)CblasNoTrans, (CBLAS_TRANSPOSE)CblasNoTrans,
m, n - h, ib,
CBLAS_SADDR(mzone), L, lda,
subU, ldu,
CBLAS_SADDR( zone), subA, lda );
}
}
else {
return 0;
......
......@@ -22,7 +22,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Matthieu Kuhn
* @date 2023-08-31
* @date 2023-09-11
* @precisions normal z -> c d s
*
*/
......@@ -87,12 +87,14 @@ int CORE_zgetrf_incpiv(int M, int N, int IB,
int CORE_zgetrf_nopiv(int M, int N, int IB,
CHAMELEON_Complex64_t *A, int LDA,
int *INFO);
int CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
int CORE_zgetrf_panel_diag( int m, int n, int h, int m0, int ib,
CHAMELEON_Complex64_t *A, int lda,
CHAMELEON_Complex64_t *U, int ldu,
int *IPIV,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv);
int CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
int CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0, int ib,
CHAMELEON_Complex64_t *A, int lda,
CHAMELEON_Complex64_t *U, int ldu,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv );
int CORE_zgetrf_reclap(int M, int N,
CHAMELEON_Complex64_t *A, int LDA,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment