Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 3b968ca8 authored by Matthieu KUHN's avatar Matthieu KUHN Committed by Mathieu Faverge
Browse files

coreblas: Add kernels for getrf

parent 0d7356bb
No related branches found
No related tags found
1 merge request!387GETRF: Add sequential kernels
...@@ -53,6 +53,7 @@ set(ZSRC ...@@ -53,6 +53,7 @@ set(ZSRC
core_zgetrf.c core_zgetrf.c
core_zgetrf_incpiv.c core_zgetrf_incpiv.c
core_zgetrf_nopiv.c core_zgetrf_nopiv.c
core_zgetrf_panel.c
core_zgram.c core_zgram.c
core_zhe2ge.c core_zhe2ge.c
core_zherfb.c core_zherfb.c
......
/**
*
* @file core_zgetrf_panel.c
*
* @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon core_zgetrf with partial pivoting CPU kernel
*
* @version 1.2.0
* @author Mathieu Faverge
* @author Matthieu Kuhn
* @date 2022-02-22
* @precisions normal z -> c d s
*
*/
#include "coreblas/lapacke.h"
#include "coreblas.h"
static const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
/**
******************************************************************************
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zgetrf_panel_diag computes the LU factorization of a single column on
* the diagonal block. Works in combination with CORE_zgetrf_panel_offdiag() to
* perform the full LU factorization with partial pivoting of a single column in
* a panel.
*
*******************************************************************************
*
* @param[in] m
* The number of rows of the matrix A. m >= 0.
*
* @param[in] n
* The number of columns of the matrix A. n >= 0.
*
* @param[in] h
* The index of the column to factorize in the matrix A.
*
* @param[in,out] A
* On entry, the matrix A where column h-1 needs to be factorized, and
* pivot for column h needs to be selected.
* The lower pentagonal part m-by-(h-1) are the L factors of the LU
* decomposition of A for the first (h-1) columns/rows.
* The upper pentagonal part (h-1)-by-n are the U factors of the LU
* decomposition of A for the first (h-1) columns/rows.
* On exit, A is factorized up to the (h-1)^th column/row and the
* remaining part is updated. The pivot of column h is selected.
*
* @param[in] lda
* The leading dimension of the array A. lda >= max(1,m).
*
* @param[in,out] IPIV
* On entry, the pivot array of size min(m,n) with the first h-2 columns initialized.
* On exit, IPIV[h-1] is updated with the selected pivot for the previous column.
*
* @param[in,out] nextpiv
* On entry, the allocated and initialized CHAM_piv_t structure to
* store the information related to pivot at stage h.
* On exit, the diagrow is initialized with the diagonal h row from A,
* and the pivot row is updated if a better pivot is found for step h.
*
* @param[in] prevpiv
* Holds the CHAM_piv_t structure of the previous (h-1) step with the
* (h-1) diagonal row, and the pivot selected row for (h-1) step.
* Not referenced if h is equal to 0.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
*
*/
int
CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
CHAMELEON_Complex64_t *A, int lda,
int *IPIV, CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
{
CHAMELEON_Complex64_t *subA = A + lda * h + h;
CHAMELEON_Complex64_t alpha;
if ( h > 0 ) {
CHAMELEON_Complex64_t *L, *pivrow;
int pivot_blkm0 = prevpiv->blkm0;
int pivot_blkidx = prevpiv->blkidx;
int pivot = pivot_blkm0 + pivot_blkidx;
pivrow = (CHAMELEON_Complex64_t *)(prevpiv->pivrow);
/*
* Copy the previously selected row to the right place if it's not the original
* diagonal row or if does not belong to the diagonal block
*/
if( (pivot_blkm0 != m0) ||
( (pivot_blkm0 == m0) && (pivot_blkidx != (h-1)) ) )
{
cblas_zcopy( n, pivrow, 1,
A + h-1, lda );
}
/* Store the pivot */
IPIV[h-1] = pivot + 1;
/*
* Copy the diagonal row in place of the selected one (previous pivot) to the right place if:
* - we own the previous pivot
* - pivot index was not last step index
*/
if( (pivot_blkm0 == m0 ) &&
(pivot_blkidx != h-1) )
{
cblas_zcopy( n, prevpiv->diagrow, 1,
A + pivot_blkidx, lda );
}
/* Take the pointer to the element below the current diagonal element */
L = A + lda * (h-1) + h;
if ( h < m ) {
/* Scaling off-diagonal terms */
alpha = (CHAMELEON_Complex64_t)1. / pivrow[h-1];
cblas_zscal( m-h, CBLAS_SADDR( alpha ), L, 1 );
}
/*
* h is compared only to n, because if we are on the last column of a
* tile, m might be much smaller than n, and still we need to apply
* the geru call. If this is the diagonal tile, we will just look for
* the next maximum for nothing.
*/
if ( h < n ) {
/* Applying the update */
cblas_zgeru(CblasColMajor, m-h, n-h,
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
}
else {
return 0;
}
}
/* Looking for the local max index */
{
CHAMELEON_Complex64_t *next_pivrow = (CHAMELEON_Complex64_t *)(nextpiv->pivrow);
int index;
index = cblas_izamax( m-h, subA, 1 );
alpha = subA[index];
if ( cabs(alpha) > cabs(next_pivrow[h]) ) {
/* We save this index as a potential pivot for the next column of the panel */
nextpiv->blkm0 = m0; /* block index */
nextpiv->blkidx = index + h; /* index in local numbering */
/* Save the row holding the selected pivot */
cblas_zcopy( n, A + index + h, lda,
next_pivrow, 1 );
}
}
/* Store current diagonal row (in full) into pivot structure */
cblas_zcopy( n, A + h, lda,
nextpiv->diagrow, 1 );
return 0;
}
/**
******************************************************************************
*
* @ingroup CORE_CHAMELEON_Complex64_t
*
* CORE_zgetrf_panel_offdiag computes the LU factorization of a single column on
* an off-diagonal block. Works in combination with CORE_zgetrf_panel_diag() to
* perform the full LU factorization with partial pivoting of a single column in
* a panel.
*
*******************************************************************************
*
* @param[in] m
* The number of rows of the matrix A. m >= 0.
*
* @param[in] n
* The number of columns of the matrix A. n >= 0.
*
* @param[in] h
* The index of the column to factorize in the matrix A.
*
* @param[in,out] A
* On entry, the matrix A where column h-1 needs to be factorized, and
* pivot for column h needs to be selected.
* The lower pentagonal part m-by-(h-1) are the L factors of the LU
* decomposition of A for the first (h-1) columns/rows.
* The upper pentagonal part (h-1)-by-n are the U factors of the LU
* decomposition of A for the first (h-1) columns/rows.
* On exit, A is factorized up to the (h-1)^th column/row and the
* remaining part is updated. The pivot of column h is selected.
*
* @param[in] lda
* The leading dimension of the array A. lda >= max(1,m).
*
* @param[in,out] nextpiv
* On entry, the allocated and initialized CHAM_piv_t structure to
* store the information related to pivot at stage h.
* On exit, the diagrow is initialized with the diagonal h row from A,
* and the pivot row is updated if a better pivot is found for step h.
*
* @param[in] prevpiv
* Holds the CHAM_piv_t structure of the previous (h-1) step with the
* (h-1) diagonal row, and the pivot selected row for (h-1) step.
* Not referenced if h is equal to 0.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
*
*/
int
CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
CHAMELEON_Complex64_t *A, int lda,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv )
{
CHAMELEON_Complex64_t *subA = A + lda * h;
CHAMELEON_Complex64_t alpha;
if ( h != 0 ) {
CHAMELEON_Complex64_t *L, *pivrow;
int pivot_blkm0 = prevpiv->blkm0;
int pivot_blkidx = prevpiv->blkidx;
pivrow = (CHAMELEON_Complex64_t *)(prevpiv->pivrow);
/*
* Copy the diagonal row in place of the selected one (previous pivot) to the right place if:
* - the previous pivot block was this one
* - it was not the previous row ??
*/
if( pivot_blkm0 == m0 )
{
cblas_zcopy( n, prevpiv->diagrow, 1,
A + pivot_blkidx, lda );
}
/* Take the pointer to the element below the current diagonal element */
L = A + lda * (h-1);
/* Scaling off-diagonal terms */
alpha = (CHAMELEON_Complex64_t)1. / pivrow[h-1];
cblas_zscal( m, CBLAS_SADDR( alpha ), L, 1 );
/*
* h is compared only to n, because if we are on the last column of a
* tile, m might be much smaller than n, and still we need to apply
* the geru call. If this is the diagonal tile, we will just look for
* the next maximum for nothing.
*/
if ( h < n ) {
/* Applying the update */
cblas_zgeru(CblasColMajor, m, n-h,
CBLAS_SADDR(mzone),
L, 1,
pivrow + h, 1,
subA, lda );
}
else {
return 0;
}
}
/* Looking for the local max index */
{
CHAMELEON_Complex64_t *next_pivrow = (CHAMELEON_Complex64_t *) nextpiv->pivrow;
int index;
index = cblas_izamax( m, subA, 1 );
alpha = subA[index];
if ( cabs(alpha) > cabs(next_pivrow[h]) )
{
/* We save this index as a potential pivot for the next column of the panel */
nextpiv->blkm0 = m0; /* block index */
nextpiv->blkidx = index; /* index in local numbering */
/* Save the row holding the selected pivot */
cblas_zcopy( n, A + index, lda,
next_pivrow, 1 );
}
}
return 0;
}
...@@ -86,6 +86,13 @@ int CORE_zgetrf_incpiv(int M, int N, int IB, ...@@ -86,6 +86,13 @@ int CORE_zgetrf_incpiv(int M, int N, int IB,
int CORE_zgetrf_nopiv(int M, int N, int IB, int CORE_zgetrf_nopiv(int M, int N, int IB,
CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *A, int LDA,
int *INFO); int *INFO);
int CORE_zgetrf_panel_diag( int m, int n, int h, int m0,
CHAMELEON_Complex64_t *A, int lda,
int *IPIV,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv);
int CORE_zgetrf_panel_offdiag( int m, int n, int h, int m0,
CHAMELEON_Complex64_t *A, int lda,
CHAM_pivot_t *nextpiv, const CHAM_pivot_t *prevpiv );
int CORE_zgetrf_reclap(int M, int N, int CORE_zgetrf_reclap(int M, int N,
CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *A, int LDA,
int *IPIV, int *info); int *IPIV, int *info);
......
...@@ -35,6 +35,16 @@ BEGIN_C_DECLS ...@@ -35,6 +35,16 @@ BEGIN_C_DECLS
#define CHAMELEON_TILE_DESC (1 << 1) #define CHAMELEON_TILE_DESC (1 << 1)
#define CHAMELEON_TILE_HMAT (1 << 2) #define CHAMELEON_TILE_HMAT (1 << 2)
/**
* @brief CHAMELEON structure to hold pivot informations for the LU factorization with partial pivoting
*/
typedef struct chameleon_pivot_s {
int blkm0; /**> The row index of the first row in the tile where the pivot has been selected */
int blkidx; /**> The relative row index in the tile where the pivot has been selected */
void *pivrow; /**> The copy of the row with the selected pivot */
void *diagrow; /**> The copy of the diagonal row to permute */
} CHAM_pivot_t;
typedef struct chameleon_tile_s { typedef struct chameleon_tile_s {
#if defined(CHAMELEON_KERNELS_TRACE) #if defined(CHAMELEON_KERNELS_TRACE)
char *name; char *name;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment