diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 967e83f2fbb24a949412fb127a2f5f5cac9137f9..165801efb0e1bf4cfc5f5f36edf33def54063797 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -113,11 +113,13 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, int k, RUNTIME_option_t *options ) { -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); -#else - chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); -#endif + /* TODO: Should be replaced by a function pointer */ + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); + } + else { + chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); + } } /** @@ -180,9 +182,9 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, * Parallel tile LU factorization with no pivoting - dynamic scheduling */ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, - CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; diff --git a/compute/zgetrf.c b/compute/zgetrf.c index c99ae3f80b2d2c3afaee44832c10a5519bbccdf6..bcb8ee0c8c560863cf27248f180ce6af0d11e628 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -52,26 +52,44 @@ void * CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) { - CHAM_context_t *chamctxt; - struct chameleon_pzgetrf_s *options; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *ws; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { return NULL; } - options = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) ); - options->ib = CHAMELEON_IB; + ws = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) ); + ws->alg = ChamGetrfNoPiv; + ws->ib = CHAMELEON_IB; + + { + char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" ); + + if ( algostr ) { + if ( strcasecmp( algostr, "nopiv" ) ) { + ws->alg = ChamGetrfNoPiv; + } + else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0 ) { + ws->alg = ChamGetrfNoPivPerColumn; + } + else { + fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" ); + } + } + chameleon_cleanenv( algostr ); + } -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_desc_init( &(options->U), CHAMELEON_MAT_ALLOC_TILE, - ChamComplexDouble, 1, A->nb, A->nb, - A->mt, A->nt * A->nb, 0, 0, - A->mt, A->nt * A->nb, A->p, A->q, - NULL, NULL, A->get_rankof_init ); -#endif + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, 1, A->nb, A->nb, + A->mt, A->nt * A->nb, 0, 0, + A->mt, A->nt * A->nb, A->p, A->q, + NULL, NULL, A->get_rankof_init ); + } - return options; + return ws; } /** @@ -94,14 +112,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) * */ void -CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws ) +CHAMELEON_zgetrf_WS_Free( void *user_ws ) { struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws; -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_desc_destroy( &(ws->U) ); -#endif - + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_desc_destroy( &(ws->U) ); + } free( ws ); } @@ -210,7 +227,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA ) chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ - CHAMELEON_zgetrf_WS_Free( &descAt, ws ); + CHAMELEON_zgetrf_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); status = sequence->status; @@ -275,7 +292,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ) CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); - CHAMELEON_zgetrf_WS_Free( A, ws ); + CHAMELEON_zgetrf_WS_Free( ws ); status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); @@ -321,7 +338,8 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *ws; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { @@ -364,7 +382,19 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_WS_Alloc( A ); + } + else { + ws = user_ws; + } + chameleon_pzgetrf( user_ws, A, sequence, request ); + if ( user_ws == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_WS_Free( ws ); + } return CHAMELEON_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index bf9fdc8dca77911c439b8071b2264180bbf0ce19..1dd9b13055672181c935ebf67fe8ff1db6b0199e 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -41,8 +41,9 @@ struct chameleon_pzgemm_s { * @brief Data structure to handle the GETRF workspaces with partial pivoting */ struct chameleon_pzgetrf_s { - int ib; /* Internal blocking parameter */ - CHAM_desc_t U; + cham_getrf_t alg; + int ib; /* Internal blocking parameter */ + CHAM_desc_t U; }; /** diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index e587d015c4239a90fb10c228d9bb41faaca7dda1..1f04c61aad249f13d4653e94886eba891236da35 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -324,7 +324,7 @@ void CHAMELEON_zcesca_WS_Free( void *ws ); void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgram_WS_Free( void *ws ); void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ); -void CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *ws ); +void CHAMELEON_zgetrf_WS_Free( void *ws ); int CHAMELEON_Alloc_Workspace_zgesv_incpiv( int N, CHAM_desc_t **descL, int **IPIV, int p, int q); int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q); diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index ba252005b72f862af1896ca1cac14ab0742f62a3..d3d2b43178ad3f0dcca7532a5c3f9b233875eb54 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -194,6 +194,14 @@ typedef enum chameleon_gemm_e { ChamGemmAlgSummaC } cham_gemm_t; +/** + * @brief Chameleon GETRF algorithm variants + */ +typedef enum chameleon_getrf_e { + ChamGetrfNoPiv, + ChamGetrfNoPivPerColumn, +} cham_getrf_t; + #define ChameleonTrd 1001 #define ChameleonBrd 1002 diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c index e075741c3f99764cae4b5c2eea957d5cc79d0dab..b41ee056dfbcdfb89590841041c11014b9f296d6 100644 --- a/testing/testing_zgetrf.c +++ b/testing/testing_zgetrf.c @@ -122,7 +122,7 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) #endif /* !defined(CHAMELEON_SIMULATION) */ if ( ws != NULL ) { - CHAMELEON_zgetrf_WS_Free( descA, ws ); + CHAMELEON_zgetrf_WS_Free( ws ); } CHAMELEON_Desc_Destroy( &descA );