Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 0d58113d authored by Matthieu KUHN's avatar Matthieu KUHN Committed by Mathieu Faverge
Browse files

getrf: Add a version with no pivoting per column

parent f340a05a
No related branches found
No related tags found
1 merge request!365GETRF: Add a version without pivotin column per column
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -16,13 +16,14 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Matthieu Kuhn
* @date 2022-02-22
* @date 2023-02-21
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#define A(m,n) A, m, n
#define U(m,n) &(ws->U), m, n
/*
* All the functions below are panel factorization variant.
......@@ -44,10 +45,10 @@
* The runtime options data structure to pass through all insert_task calls.
*/
static inline void
chameleon_pzgetrf_panel_facto_nopiv( void *ws,
CHAM_desc_t *A,
int k,
RUNTIME_option_t *options )
chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
int k,
RUNTIME_option_t *options )
{
const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0;
int m, tempkm, tempkn, tempmm;
......@@ -60,7 +61,7 @@ chameleon_pzgetrf_panel_facto_nopiv( void *ws,
*/
INSERT_TASK_zgetrf_nopiv(
options,
tempkm, tempkn, 32, A->mb,
tempkm, tempkn, ws->ib, A->mb,
A(k, k), 0);
for (m = k+1; m < A->mt; m++) {
......@@ -73,24 +74,61 @@ chameleon_pzgetrf_panel_facto_nopiv( void *ws,
A(m, k) );
}
}
static inline void
chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
int k,
RUNTIME_option_t *options )
{
int m, h;
int tempkm, tempkn, tempmm, minmn;
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
minmn = chameleon_min( tempkm, tempkn );
/*
* Algorithm per column without pivoting
*/
for(h=0; h<minmn; h++){
INSERT_TASK_zgetrf_panel_nopiv_percol_diag(
options, tempkm, tempkn, h,
A( k, k ), U( k, k ), A->mb * k );
for (m = k+1; m < A->mt; m++) {
tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
INSERT_TASK_zgetrf_panel_nopiv_percol_trsm(
options, tempmm, tempkn, h,
A( m, k ), U( k, k ) );
}
}
RUNTIME_data_flush( options->sequence, U(k, k) );
}
static inline void
chameleon_pzgetrf_panel_facto( void *ws,
CHAM_desc_t *A,
int k,
RUNTIME_option_t *options )
chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
int k,
RUNTIME_option_t *options )
{
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
#else
chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
#endif
}
/**
* Permutation of the panel n at step k
*/
static inline void
chameleon_pzgetrf_panel_permute( void *ws,
CHAM_desc_t *A,
int k,
int n,
RUNTIME_option_t *options )
chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
int k,
int n,
RUNTIME_option_t *options )
{
(void)ws;
(void)A;
......@@ -100,11 +138,11 @@ chameleon_pzgetrf_panel_permute( void *ws,
}
static inline void
chameleon_pzgetrf_panel_update( void *ws,
CHAM_desc_t *A,
int k,
int n,
RUNTIME_option_t *options )
chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
int k,
int n,
RUNTIME_option_t *options )
{
const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0;
const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0;
......@@ -141,10 +179,10 @@ chameleon_pzgetrf_panel_update( void *ws,
/**
* Parallel tile LU factorization with no pivoting - dynamic scheduling
*/
void chameleon_pzgetrf( void *ws,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -18,7 +18,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Matthieu Kuhn
* @date 2022-09-19
* @date 2023-02-21
*
* @precisions normal z -> s d c
*
......@@ -52,14 +52,26 @@
void *
CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *options;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
return NULL;
}
return NULL;
options = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) );
options->ib = CHAMELEON_IB;
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_desc_init( &(options->U), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, A->nb, A->nb,
A->mt, A->nt * A->nb, 0, 0,
A->mt, A->nt * A->nb, A->p, A->q,
NULL, NULL, A->get_rankof_init );
#endif
return options;
}
/**
......@@ -84,14 +96,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
void
CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
return;
}
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_desc_destroy( &(ws->U) );
#endif
(void)user_ws;
free( ws );
}
#if defined(NOT_AVAILABLE_YET)
......
......@@ -21,7 +21,7 @@
* @author Florent Pruvost
* @author Alycia Lisito
* @author Matthieu Kuhn
* @date 2022-09-19
* @date 2023-02-21
* @precisions normal z -> c d s
*
*/
......@@ -37,6 +37,14 @@ struct chameleon_pzgemm_s {
CHAM_desc_t WB;
};
/**
* @brief Data structure to handle the GETRF workspaces with partial pivoting
*/
struct chameleon_pzgetrf_s {
int ib; /* Internal blocking parameter */
CHAM_desc_t U;
};
/**
* @brief Data structure to handle the Centering-Scaled workspaces
*/
......@@ -78,7 +86,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf( void *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment