diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index cc25fb7d233c7206ced2d9e5fc59766e1bef25fe..3d6a7caf4dedb592cd1aa8422212bd091c287687 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -40,11 +40,12 @@ set(CHAMELEON_CONTROL ../control/context.c ../control/control.c ../control/descriptor.c - ../control/descriptor_rec.c ../control/descriptor_helpers.c ../control/descriptor_ipiv.c - ../control/workspace.c + ../control/descriptor_rec.c + ../control/lamch.c ../control/tile.c + ../control/workspace.c ../control/chameleon_f77.c ../control/chameleon_mf77.c map.c diff --git a/compute/pzgepdf_qdwh.c b/compute/pzgepdf_qdwh.c index 2f640e7a798bae6232d18bee57e035134ac81524..de7a82cb1a1f8e99c79bbea4a3551e8c5549d835 100644 --- a/compute/pzgepdf_qdwh.c +++ b/compute/pzgepdf_qdwh.c @@ -604,15 +604,7 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t double normest, Unorm; int it, itconv, facto = -1; -#if !defined(CHAMELEON_SIMULATION) - double eps = LAPACKE_dlamch_work('e'); -#else -#if defined(PRECISION_z) || defined(PRECISION_d) - double eps = 1.e-15; -#else - double eps = 1.e-7; -#endif -#endif + double eps = CHAMELEON_dlamch(); double tol1 = 5. * eps; double tol3 = pow( tol1, 1./3. ); double id_flops_ratio = ( _zgepdf_qdwh_opt_id == 1 ) ? .5 : 1.5; diff --git a/compute/pzgered.c b/compute/pzgered.c index c43e9a0e4704b958bbe6380032f8c32ee3b060fe..e7feeed9d7ba8f0a3e433295e1d5af449035d531 100644 --- a/compute/pzgered.c +++ b/compute/pzgered.c @@ -28,8 +28,10 @@ #define W( desc, m, n ) (desc), (m), (n) static inline void -chameleon_pzgered_frb( cham_uplo_t uplo, - CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, +chameleon_pzgered_frb( cham_uplo_t uplo, + CHAM_desc_t *A, + CHAM_desc_t *Wnorm, + CHAM_desc_t *Welt, RUNTIME_option_t *options ) { double alpha = 1.0; @@ -155,14 +157,17 @@ chameleon_pzgered_frb( cham_uplo_t uplo, /** * */ -void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +void chameleon_pzgered( cham_uplo_t uplo, + double prec, + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; CHAM_desc_t Wcol; CHAM_desc_t Welt; - double gnorm, threshold, eps; + double gnorm, threshold, eps, eps_diag, threshold_diag; int workmt, worknt; int m, n; @@ -202,37 +207,36 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, /** * Reduce the precision of the tiles if possible */ + eps_diag = CHAMELEON_slamch(); if ( prec < 0. ) { -#if !defined(CHAMELEON_SIMULATION) - eps = LAPACKE_dlamch_work('e'); -#else -#if defined(PRECISION_z) || defined(PRECISION_d) - eps = 1.e-15; -#else - eps = 1.e-7; -#endif -#endif + eps = CHAMELEON_dlamch(); } else { eps = prec; } threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); + threshold_diag = ( eps < eps_diag ) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt)); #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] The norm of A is: %e\n" "[%2d] The requested precision is: %e\n" - "[%2d] The computed threshold is: %e\n", + "[%2d] The computed threshold is: %e\n" + "[%2d] The threshold diag is : %e\n", A->myrank, gnorm, A->myrank, eps, - A->myrank, threshold ); + A->myrank, threshold, + A->myrank, threshold_diag ); #endif - for(m = 0; m < A->mt; m++) { + + for(m = 0; m < A->mt; m++) + { int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb; int nmin = ( uplo == ChamUpper ) ? m : 0; int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt; - for(n = nmin; n < nmax; n++) { + for(n = nmin; n < nmax; n++) + { int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb; /* @@ -241,8 +245,14 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < threshold / u_{low} */ - INSERT_TASK_zgered( &options, threshold, - tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + if ( m == n ) { + INSERT_TASK_zgered( &options, threshold_diag, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + } + else { + INSERT_TASK_zgered( &options, threshold, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + } } } @@ -250,6 +260,6 @@ void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A, RUNTIME_sequence_wait( chamctxt, sequence ); chameleon_desc_destroy( &Wcol ); - RUNTIME_options_ws_free(&options); - RUNTIME_options_finalize(&options, chamctxt); + RUNTIME_options_ws_free( &options ); + RUNTIME_options_finalize( &options, chamctxt ); } diff --git a/compute/pzhered.c b/compute/pzhered.c index cc32f4243c81dd2e63656cf9972850e53dddbd83..869d748cf49c4d131ccb90292b492bb3658167da 100644 --- a/compute/pzhered.c +++ b/compute/pzhered.c @@ -28,8 +28,11 @@ #define W(desc, m, n) (desc), (m), (n) static inline void -chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, - CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt, +chameleon_pzhered_frb( cham_trans_t trans, + cham_uplo_t uplo, + CHAM_desc_t *A, + CHAM_desc_t *Wnorm, + CHAM_desc_t *Welt, RUNTIME_option_t *options ) { double alpha = 1.0; @@ -84,8 +87,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, { int tempnn = (n == (NT - 1)) ? N - n * A->nb : A->nb; - if (n == m) - { + if ( n == m ) { if ( trans == ChamConjTrans ) { INSERT_TASK_zhessq( options, ChamEltwise, uplo, tempmm, @@ -97,8 +99,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, A(m, n), W( Wnorm, m, n) ); } } - else - { + else { INSERT_TASK_zgessq( options, ChamEltwise, tempmm, tempnn, A(m, n), W( Wnorm, m, n )); @@ -166,7 +167,7 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, { for (n = 0; n < A->q; n++) { - if ((m != 0) || (n != 0)) + if ( ( m != 0 ) || ( n != 0 ) ) { INSERT_TASK_dlacpy( options, @@ -180,14 +181,18 @@ chameleon_pzhered_frb( cham_trans_t trans, cham_uplo_t uplo, /** * */ -void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +void chameleon_pzhered( cham_trans_t trans, + cham_uplo_t uplo, + double prec, + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; CHAM_desc_t Wcol; CHAM_desc_t Welt; - double gnorm, threshold, eps; + double gnorm, threshold, eps, eps_diag, threshold_diag; int workmt, worknt; int m, n; @@ -205,22 +210,22 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ RUNTIME_options_ws_alloc(&options, 1, 0); /* Matrix to store the norm of each element */ - chameleon_desc_init(&Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, - NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg); + chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); /* Matrix to compute the global frobenius norm */ - chameleon_desc_init(&Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, - workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, - NULL, NULL, NULL, NULL); + chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2, + workmt * 2, worknt, 0, 0, workmt * 2, worknt, A->p, A->q, + NULL, NULL, NULL, NULL ); chameleon_pzhered_frb( trans, uplo, A, &Wcol, &Welt, &options ); - CHAMELEON_Desc_Flush(&Wcol, sequence); - CHAMELEON_Desc_Flush(&Welt, sequence); - CHAMELEON_Desc_Flush(A, sequence); + CHAMELEON_Desc_Flush( &Wcol, sequence ); + CHAMELEON_Desc_Flush( &Welt, sequence ); + CHAMELEON_Desc_Flush( A, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); + RUNTIME_sequence_wait( chamctxt, sequence ); gnorm = *((double *)Welt.get_blkaddr(&Welt, A->myrank / A->q, A->myrank % A->q)); chameleon_desc_destroy(&Welt); @@ -228,33 +233,28 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ /** * Reduce the precision of the tiles if possible */ - if (prec < 0.) - { -#if !defined(CHAMELEON_SIMULATION) - eps = LAPACKE_dlamch_work('e'); -#else -#if defined(PRECISION_z) || defined(PRECISION_d) - eps = 1.e-15; -#else - eps = 1.e-7; -#endif -#endif + eps_diag = CHAMELEON_slamch(); + if (prec < 0.) { + eps = CHAMELEON_dlamch(); } - else - { + else { eps = prec; } threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt)); + threshold_diag = (eps < eps_diag) ? threshold : (eps_diag * gnorm) / (double)(chameleon_min(A->mt, A->nt)); #if defined(CHAMELEON_DEBUG_GERED) - fprintf(stderr, - "[%2d] The norm of A is: %e\n" - "[%2d] The requested precision is: %e\n" - "[%2d] The computed threshold is: %e\n", - A->myrank, gnorm, - A->myrank, eps, - A->myrank, threshold); + fprintf( stderr, + "[%2d] The norm of A is: %e\n" + "[%2d] The requested precision is: %e\n" + "[%2d] The computed threshold is: %e\n" + "[%2d] The threshold diag is: %e\n", + A->myrank, gnorm, + A->myrank, eps, + A->myrank, threshold, + A->myrank, threshold_diag ); #endif + for (m = 0; m < A->mt; m++) { int tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; @@ -271,15 +271,21 @@ void chameleon_pzhered( cham_trans_t trans, cham_uplo_t uplo, double prec, CHAM_ * ||A_{i,j}||_F < u_{high} * || A ||_F / (nt * u_{low}) * ||A_{i,j}||_F < threshold / u_{low} */ - INSERT_TASK_zgered( &options, threshold, - tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + if ( m == n ) { + INSERT_TASK_zgered( &options, threshold_diag, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + } + else { + INSERT_TASK_zgered( &options, threshold, + tempmm, tempnn, A( m, n ), W( &Wcol, m, n ) ); + } } } - CHAMELEON_Desc_Flush(A, sequence); - RUNTIME_sequence_wait(chamctxt, sequence); + CHAMELEON_Desc_Flush( A, sequence ); + RUNTIME_sequence_wait( chamctxt, sequence ); - chameleon_desc_destroy(&Wcol); - RUNTIME_options_ws_free(&options); - RUNTIME_options_finalize(&options, chamctxt); + chameleon_desc_destroy( &Wcol ); + RUNTIME_options_ws_free( &options ); + RUNTIME_options_finalize( &options, chamctxt ); } diff --git a/control/lamch.c b/control/lamch.c new file mode 100644 index 0000000000000000000000000000000000000000..7dccb23f07fd4db5ef46b5ddcb971c2ae9d03a33 --- /dev/null +++ b/control/lamch.c @@ -0,0 +1,56 @@ +/** + * + * @file lamch.c + * + * @copyright 2024-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon lamch wrapper to factorize the test with simulation compilations + * + * @version 1.3.0 + * @author Mathieu Faverge + * @date 2024-09-18 + * + */ +#include "control/common.h" +#if !defined(CHAMELEON_SIMULATION) +#include "coreblas/lapacke.h" +#endif + +/** + * + * Wrapper to the LAPACKE_slamch_work('e') call that returns a pre-defined value + * if compile with SimGrid. + * + * @return The epsilon value for single precision + * + */ +float +CHAMELEON_slamch( void ) +{ +#if !defined(CHAMELEON_SIMULATION) + return LAPACKE_slamch_work( 'e' ); +#else + return 1.e-7; +#endif +} + +/** + * + * Wrapper to the LAPACKE_dlamch_work('e') call that returns a pre-defined value + * if compile with SimGrid. + * + * @return The epsilon value for single precision + * + */ +double +CHAMELEON_dlamch( void ) +{ +#if !defined(CHAMELEON_SIMULATION) + return LAPACKE_dlamch_work( 'e' ); +#else + return 1.e-15; +#endif +} diff --git a/example/lapack_to_chameleon/step0.h b/example/lapack_to_chameleon/step0.h index 60c98fc18c20ac9c32d7186660f194f73dc24439..1f940475fcae8b141e0da8c90a8b581b69a08c55 100644 --- a/example/lapack_to_chameleon/step0.h +++ b/example/lapack_to_chameleon/step0.h @@ -96,11 +96,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %s\n" diff --git a/example/lapack_to_chameleon/step1.h b/example/lapack_to_chameleon/step1.h index abec8c3ea598dbb972d398cdcc6f9e668063a7e1..d3bc0724abeea5322e93a84b77ea95c3c57d2029 100644 --- a/example/lapack_to_chameleon/step1.h +++ b/example/lapack_to_chameleon/step1.h @@ -88,11 +88,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step2.h b/example/lapack_to_chameleon/step2.h index 470466c287f018a6a8e9c70318ec8b6ba53eeb19..1c43fc94446886240e588e72b82334ee76e65ada 100644 --- a/example/lapack_to_chameleon/step2.h +++ b/example/lapack_to_chameleon/step2.h @@ -88,11 +88,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step3.h b/example/lapack_to_chameleon/step3.h index 63c1ff0f47806e36477223c6eaf593877a6072bf..cbeeccaad4bff639ef8c266789fc28dec17dcfb4 100644 --- a/example/lapack_to_chameleon/step3.h +++ b/example/lapack_to_chameleon/step3.h @@ -88,11 +88,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step4.h b/example/lapack_to_chameleon/step4.h index 28f0c4ca3ce3960fc901f38cd199bfc602f5d04d..d5b1a1c255d73b712f56e5e6af001e9424e2bac2 100644 --- a/example/lapack_to_chameleon/step4.h +++ b/example/lapack_to_chameleon/step4.h @@ -88,11 +88,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step5.h b/example/lapack_to_chameleon/step5.h index e2ebf14d99f2d1feed0627ac45cc5f1b02c07778..8ecf015e4a41daaec0b5d85d1f0dd57f834d8f0f 100644 --- a/example/lapack_to_chameleon/step5.h +++ b/example/lapack_to_chameleon/step5.h @@ -103,11 +103,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step6.h b/example/lapack_to_chameleon/step6.h index d60683987471c510aafa20ce55a6a87f0e358bcb..98043e257eedaa0225d06eb5f20877bdcd43880e 100644 --- a/example/lapack_to_chameleon/step6.h +++ b/example/lapack_to_chameleon/step6.h @@ -115,11 +115,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/example/lapack_to_chameleon/step7.h b/example/lapack_to_chameleon/step7.h index ad975a20ba97de204732af99a660a8f4b080638f..7622f5ddbbe669e4cdf2810d761c0aa74181f2fb 100644 --- a/example/lapack_to_chameleon/step7.h +++ b/example/lapack_to_chameleon/step7.h @@ -165,11 +165,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif + double eps = LAPACKE_dlamch_work( 'e' ); printf( "#\n" "# CHAMELEON %d.%d.%d, %s\n" diff --git a/include/chameleon.h b/include/chameleon.h index 7aeaf7d331227c35cd34288db6a95b68fc50f2fc..cb7e23dedaf7330ab0deffef714f6d9bc01fb7b1 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -227,6 +227,10 @@ int CHAMELEON_Ipiv_Gather( CHAM_ipiv_t *ipivdesc, int root ); void CHAMELEON_Ipiv_Print ( const CHAM_ipiv_t *ipiv ); +/* Numerical helpers */ +float CHAMELEON_slamch( void ); +double CHAMELEON_dlamch( void ); + /** * * @ingroup Control diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c index d7a132200d603dca976f5c3d90ecf69cd2a33ab3..430db00ff7881550a473fee6bf24478ddbf9b9a8 100644 --- a/runtime/starpu/codelets/codelet_zgered.c +++ b/runtime/starpu/codelets/codelet_zgered.c @@ -71,8 +71,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #if defined(CHAMELEON_USE_MPI) /* Backup the MPI tag */ - if (A->myrank == tileA->rank) - { + if ( A->myrank == tileA->rank ) { tag = starpu_mpi_data_get_tag( *handleAin ); } #endif /* defined(CHAMELEON_USE_MPI) */ @@ -89,39 +88,41 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, "[%2d] Convert the tile ( %d, %d ) to half precision\n", - A->myrank, Am, An); + A->myrank, Am, An); #endif - starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf ); + if ( A->myrank == tileA->rank ) + { + starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf ); - rt_shm_starpu_insert_task( - &cl_dlag2h, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, - STARPU_EXECUTE_ON_WORKER, options->workerid, + rt_shm_starpu_insert_task( + &cl_dlag2h, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, + STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "dlag2h", + STARPU_NAME, "dlag2h", #endif - 0); + 0); - starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; - tileA->flttype = ChamComplexHalf; - starpu_mpi_data_register( handleAout, tag, tileA->rank ); - } - else - { - tileA->flttype = ChamComplexHalf; - if (*handleAin != NULL) + starpu_data_unregister_no_coherency( *handleAin ); + *handleAin = handleAout; + tileA->flttype = ChamComplexHalf; + starpu_mpi_data_register( handleAout, tag, tileA->rank ); + } + else { - starpu_data_unregister_no_coherency(*handleAin); - *handleAin = NULL; + tileA->flttype = ChamComplexHalf; + if ( *handleAin != NULL ) + { + starpu_data_unregister_no_coherency( *handleAin ); + *handleAin = NULL; + } } + return; } - return; - } #endif #endif @@ -129,11 +130,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, /* * Check for single precision */ -#if !defined(CHAMELEON_SIMULATION) - u_low = LAPACKE_slamch_work('e'); -#else - u_low = 1e-8; -#endif + u_low = CHAMELEON_slamch(); if ( lnorm < (threshold / u_low) ) { #if defined(CHAMELEON_DEBUG_GERED) @@ -141,34 +138,34 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options, "[%2d] Convert the tile ( %d, %d ) to single precision\n", A->myrank, Am, An ); #endif - if (A->myrank == tileA->rank) + if ( A->myrank == tileA->rank ) { starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat ); rt_shm_starpu_insert_task( &cl_zlag2c, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zlag2c", + STARPU_NAME, "zlag2c", #endif 0); starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; + *handleAin = handleAout; tileA->flttype = ChamComplexFloat; starpu_mpi_data_register( *handleAin, tag, tileA->rank ); } else { tileA->flttype = ChamComplexFloat; - if (*handleAin != NULL) + if ( *handleAin != NULL ) { - starpu_data_unregister_no_coherency(*handleAin); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = NULL; } } diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c index f0fbdc1a40cffc2f88f46f8cb32807acb0cd5720..c2faaf9398f2f5455fe3a85d6d3d5c42bfeafb8e 100644 --- a/runtime/starpu/codelets/codelet_zgerst.c +++ b/runtime/starpu/codelets/codelet_zgerst.c @@ -39,7 +39,8 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, handleAin = A->schedopt; handleAin += ((int64_t)A->lmt) * nn + mm; - if ( tileA->flttype == ChamComplexDouble ) { + if ( tileA->flttype == ChamComplexDouble ) + { starpu_data_handle_t *copy = handleAin; /* Remove first copy */ @@ -59,12 +60,12 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, return; } - if (A->myrank != tileA->rank) + if ( A->myrank != tileA->rank ) { tileA->flttype = ChamComplexDouble; - if (*handleAin != NULL) + if ( *handleAin != NULL ) { - starpu_data_unregister_no_coherency(*handleAin); + starpu_data_unregister_no_coherency( *handleAin ); *handleAin = NULL; } return; @@ -79,9 +80,9 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, switch( tileA->flttype ) { #if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500) #if defined(PRECISION_d) - /* - * Restore from half precision - */ + /* + * Restore from half precision + */ case ChamComplexHalf: assert( options->withcuda ); #if defined(CHAMELEON_DEBUG_GERED) @@ -91,14 +92,14 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, #endif rt_shm_starpu_insert_task( &cl_hlag2d, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "hlag2d", + STARPU_NAME, "hlag2d", #endif 0); break; @@ -108,19 +109,20 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, case ChamComplexFloat: #if defined(CHAMELEON_DEBUG_GERED) fprintf( stderr, - "[%2d] Convert back the tile ( %d, %d ) from half precision\n", + "[%2d] Convert back the tile ( %d, %d ) from single precision\n", A->myrank, Am, An ); #endif + rt_shm_starpu_insert_task( &cl_clag2z, - STARPU_VALUE, &m, sizeof(int), - STARPU_VALUE, &n, sizeof(int), - STARPU_R, *handleAin, - STARPU_W, handleAout, - STARPU_PRIORITY, options->priority, + STARPU_VALUE, &m, sizeof(int), + STARPU_VALUE, &n, sizeof(int), + STARPU_R, *handleAin, + STARPU_W, handleAout, + STARPU_PRIORITY, options->priority, STARPU_EXECUTE_ON_WORKER, options->workerid, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "clag2z", + STARPU_NAME, "clag2z", #endif 0); break; @@ -130,7 +132,7 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options, } starpu_data_unregister_no_coherency( *handleAin ); - *handleAin = handleAout; + *handleAin = handleAout; tileA->flttype = ChamComplexDouble; starpu_mpi_data_register( handleAout, tag, tileA->rank ); } diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index 9b7336c18fc0803900b087f878ca287c0dc97ac5..4c2dd37c0fa0b44dfd6ba4fd66a3cd99361683ae 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -167,7 +167,6 @@ int main (int argc, char **argv) { testing_options_init( &options ); -#if !defined(CHAMELEON_SIMULATION) /* Let's initialize the accuracy for the checks */ { #if (defined(PRECISION_z) || defined(PRECISION_d)) @@ -178,10 +177,9 @@ int main (int argc, char **argv) { else #endif { - testing_setaccuracy( LAPACKE_dlamch_work('e') ); + testing_setaccuracy( CHAMELEON_dlamch() ); } } -#endif rc = CHAMELEON_Init( options.threads, options.gpus ); if ( rc != CHAMELEON_SUCCESS ) { diff --git a/testing/parameters.c b/testing/parameters.c index 5beb021c61b6abdc0d020e53f9dd408efa938cce..9607d83e901a779375cd70703ec49dec7fc96f10 100644 --- a/testing/parameters.c +++ b/testing/parameters.c @@ -13,6 +13,7 @@ * @author Mathieu Faverge * @author Alycia Lisito * @author Lionel Eyraud-Dubois + * @author Lucas Barros De Assis * @date 2023-07-05 * */ @@ -489,6 +490,12 @@ parameters_desc_create( const char *id, CHAM_desc_t **descptr, cham_flttype_t dt mtxfmt = -mtxfmt; /* Inverse sign to get the defined values */ + if ( cham_is_mixed( dtyp ) && ( (void*)mtxfmt != CHAMELEON_MAT_ALLOC_TILE ) ) + { + fprintf( stderr, "parameters_desc_create: Mixed precision descriptors can only be used with tiled allocation. Please enforce '--mtxfmt=1' \n" ); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( !custom ) { int P = parameters_getvalue_int( "P" ); int Q = parameters_compute_q( P ); diff --git a/testing/vendor_ztesting.c b/testing/vendor_ztesting.c index 72b484b214e496a8fd84aad8a1038b915ff3bffd..fd358eed9e022e752a801d201eac831f4a028093 100644 --- a/testing/vendor_ztesting.c +++ b/testing/vendor_ztesting.c @@ -118,10 +118,8 @@ int main (int argc, char **argv) { testing_options_init( &options ); -#if !defined(CHAMELEON_SIMULATION) /* Let's initialize the accuracy for the checks */ - testing_setaccuracy( LAPACKE_dlamch_work('e') ); -#endif + testing_setaccuracy( CHAMELEON_dlamch() ); rc = CHAMELEON_Init( options.threads, 0 ); if ( rc != CHAMELEON_SUCCESS ) {