Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7fc32871 authored by Ana Hourcau's avatar Ana Hourcau Committed by Mathieu Faverge
Browse files

Preventing the conversion in half precision for diagonal tiles

parent 70fc142c
No related branches found
No related tags found
1 merge request!488Mixed precision
...@@ -28,8 +28,10 @@ ...@@ -28,8 +28,10 @@
#define W( desc, m, n ) (desc), (m), (n) #define W( desc, m, n ) (desc), (m), (n)
static inline void static inline void
chameleon_pzgered_frb( cham_uplo_t uplo, chameleon_pzgered_frb( cham_uplo_t uplo,
CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, CHAM_desc_t *A,
CHAM_desc_t *Wnorm,
CHAM_desc_t *Welt,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
double alpha = 1.0; double alpha = 1.0;
...@@ -155,14 +157,17 @@ chameleon_pzgered_frb( cham_uplo_t uplo, ...@@ -155,14 +157,17 @@ chameleon_pzgered_frb( cham_uplo_t uplo,
/** /**
* *
*/ */
void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, void chameleon_pzgered( cham_uplo_t uplo,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) double prec,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
CHAM_desc_t Wcol; CHAM_desc_t Wcol;
CHAM_desc_t Welt; CHAM_desc_t Welt;
double gnorm, threshold, eps; double gnorm, threshold, eps, eps_diag, threshold_diag;
int workmt, worknt; int workmt, worknt;
int m, n; int m, n;
...@@ -202,37 +207,36 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, ...@@ -202,37 +207,36 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
/** /**
* Reduce the precision of the tiles if possible * Reduce the precision of the tiles if possible
*/ */
eps_diag = CHAMELEON_slamch();
if ( prec < 0. ) { if ( prec < 0. ) {
#if !defined(CHAMELEON_SIMULATION) eps = CHAMELEON_dlamch();
eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
eps = 1.e-15;
#else
eps = 1.e-7;
#endif
#endif
} }
else { else {
eps = prec; eps = prec;
} }
threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
threshold_diag = ( eps < eps_diag ) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt));
#if defined(CHAMELEON_DEBUG_GERED) #if defined(CHAMELEON_DEBUG_GERED)
fprintf( stderr, fprintf( stderr,
"[%2d] The norm of A is: %e\n" "[%2d] The norm of A is: %e\n"
"[%2d] The requested precision is: %e\n" "[%2d] The requested precision is: %e\n"
"[%2d] The computed threshold is: %e\n", "[%2d] The computed threshold is: %e\n"
"[%2d] The threshold diag is : %e\n",
A->myrank, gnorm, A->myrank, gnorm,
A->myrank, eps, A->myrank, eps,
A->myrank, threshold ); A->myrank, threshold,
A->myrank, threshold_diag );
#endif #endif
for(m = 0; m < A->mt; m++) {
for(m = 0; m < A->mt; m++)
{
int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb; int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb;
int nmin = ( uplo == ChamUpper ) ? m : 0; int nmin = ( uplo == ChamUpper ) ? m : 0;
int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt;
for(n = nmin; n < nmax; n++) { for(n = nmin; n < nmax; n++)
{
int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb;
/* /*
...@@ -241,8 +245,14 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, ...@@ -241,8 +245,14 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
* ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low})
* ||A_{i,j}||_F < threshold / u_{low} * ||A_{i,j}||_F < threshold / u_{low}
*/ */
INSERT_TASK_zgered( &options, threshold, if ( m == n ) {
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); INSERT_TASK_zgered( &options, threshold_diag,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
else {
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
} }
} }
...@@ -250,6 +260,6 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, ...@@ -250,6 +260,6 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
RUNTIME_sequence_wait( chamctxt, sequence ); RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &Wcol ); chameleon_desc_destroy( &Wcol );
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free( &options );
RUNTIME_options_finalize(&options, chamctxt); RUNTIME_options_finalize( &options, chamctxt );
} }
...@@ -28,8 +28,11 @@ ...@@ -28,8 +28,11 @@
#define W(desc, m, n) (desc), (m), (n) #define W(desc, m, n) (desc), (m), (n)
static inline void static inline void
chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, chameleon_pzhered_frb( cham_trans_t trans,
CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, cham_uplo_t uplo,
CHAM_desc_t *A,
CHAM_desc_t *Wnorm,
CHAM_desc_t *Welt,
RUNTIME_option_t *options ) RUNTIME_option_t *options )
{ {
double alpha = 1.0; double alpha = 1.0;
...@@ -84,8 +87,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -84,8 +87,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
{ {
int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb;
if (n == m) if ( n == m ) {
{
if ( trans == ChamConjTrans ) { if ( trans == ChamConjTrans ) {
INSERT_TASK_zhessq( INSERT_TASK_zhessq(
options, ChamEltwise, uplo, tempmm, options, ChamEltwise, uplo, tempmm,
...@@ -97,8 +99,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -97,8 +99,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
A(m, n), W( Wnorm, m, n) ); A(m, n), W( Wnorm, m, n) );
} }
} }
else else {
{
INSERT_TASK_zgessq( INSERT_TASK_zgessq(
options, ChamEltwise, tempmm, tempnn, options, ChamEltwise, tempmm, tempnn,
A(m, n), W( Wnorm, m, n )); A(m, n), W( Wnorm, m, n ));
...@@ -166,7 +167,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -166,7 +167,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
{ {
for (n = 0; n < A->q; n++) for (n = 0; n < A->q; n++)
{ {
if ((m != 0) || (n != 0)) if ( ( m != 0 ) || ( n != 0 ) )
{ {
INSERT_TASK_dlacpy( INSERT_TASK_dlacpy(
options, options,
...@@ -180,14 +181,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, ...@@ -180,14 +181,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo,
/** /**
* *
*/ */
void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, void chameleon_pzhered( cham_trans_t trans,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) cham_uplo_t uplo,
double prec,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
CHAM_desc_t Wcol; CHAM_desc_t Wcol;
CHAM_desc_t Welt; CHAM_desc_t Welt;
double gnorm, threshold, eps; double gnorm, threshold, eps, eps_diag, threshold_diag;
int workmt, worknt; int workmt, worknt;
int m, n; int m, n;
...@@ -205,22 +210,22 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -205,22 +210,22 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
RUNTIME_options_ws_alloc(&options, 1, 0); RUNTIME_options_ws_alloc(&options, 1, 0);
/* Matrix to store the norm of each element */ /* Matrix to store the norm of each element */
chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q,
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
/* Matrix to compute the global frobenius norm */ /* Matrix to compute the global frobenius norm */
chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q,
NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL );
chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options );
CHAMELEON_Desc_Flush(&Wcol, sequence); CHAMELEON_Desc_Flush( &Wcol, sequence );
CHAMELEON_Desc_Flush(&Welt, sequence); CHAMELEON_Desc_Flush( &Welt, sequence );
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q));
chameleon_desc_destroy(&Welt); chameleon_desc_destroy(&Welt);
...@@ -228,33 +233,28 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -228,33 +233,28 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
/** /**
* Reduce the precision of the tiles if possible * Reduce the precision of the tiles if possible
*/ */
if (prec < 0.) eps_diag = CHAMELEON_slamch();
{ if (prec < 0.) {
#if !defined(CHAMELEON_SIMULATION) eps = CHAMELEON_dlamch();
eps = LAPACKE_dlamch_work('e');
#else
#if defined(PRECISION_z) || defined(PRECISION_d)
eps = 1.e-15;
#else
eps = 1.e-7;
#endif
#endif
} }
else else {
{
eps = prec; eps = prec;
} }
threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
threshold_diag = (eps < eps_diag) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt));
#if defined(CHAMELEON_DEBUG_GERED) #if defined(CHAMELEON_DEBUG_GERED)
fprintf(stderr, fprintf( stderr,
"[%2d] The norm of A is: %e\n" "[%2d] The norm of A is: %e\n"
"[%2d] The requested precision is: %e\n" "[%2d] The requested precision is: %e\n"
"[%2d] The computed threshold is: %e\n", "[%2d] The computed threshold is: %e\n"
A->myrank, gnorm, "[%2d] The threshold diag is: %e\n",
A->myrank, eps, A->myrank, gnorm,
A->myrank, threshold); A->myrank, eps,
A->myrank, threshold,
A->myrank, threshold_diag );
#endif #endif
for (m = 0; m < A->mt; m++) for (m = 0; m < A->mt; m++)
{ {
int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb;
...@@ -271,15 +271,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ ...@@ -271,15 +271,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_
* ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low})
* ||A_{i,j}||_F < threshold / u_{low} * ||A_{i,j}||_F < threshold / u_{low}
*/ */
INSERT_TASK_zgered( &options, threshold, if ( m == n ) {
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); INSERT_TASK_zgered( &options, threshold_diag,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
else {
INSERT_TASK_zgered( &options, threshold,
tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) );
}
} }
} }
CHAMELEON_Desc_Flush(A, sequence); CHAMELEON_Desc_Flush( A, sequence );
RUNTIME_sequence_wait(chamctxt, sequence); RUNTIME_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy(&Wcol); chameleon_desc_destroy( &Wcol );
RUNTIME_options_ws_free(&options); RUNTIME_options_ws_free( &options );
RUNTIME_options_finalize(&options, chamctxt); RUNTIME_options_finalize( &options, chamctxt );
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment