diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c index 3ad6b0c255f8abe018936a3065921d770dc2cd5b..cf5c756eb00259ded3da6bc1c8dcfb5965768089 100644 --- a/compute/zgesv_nopiv.c +++ b/compute/zgesv_nopiv.c @@ -93,6 +93,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; CHAM_desc_t descAl, descAt; CHAM_desc_t descBl, descBt; + void *ws = NULL; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -139,7 +140,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt ); + CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, ws, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -150,6 +152,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt ); @@ -196,10 +199,11 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, */ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -208,12 +212,15 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_zgesv_nopiv_Tile_Async( A, B, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + CHAMELEON_zgesv_nopiv_Tile_Async( A, B, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( B, sequence ); chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); return status; @@ -249,10 +256,14 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) * @sa CHAMELEON_zcgesv_Tile_Async * */ -int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, + CHAM_desc_t *B, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -295,11 +306,23 @@ int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B, return CHAMELEON_SUCCESS; */ - chameleon_pzgetrf_nopiv( NULL, A, sequence, request ); + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + } + else { + ws = user_ws; + } + chameleon_pzgetrf_nopiv( ws, A, sequence, request ); chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + if ( user_ws == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( B, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + } return CHAMELEON_SUCCESS; } diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index c139111befda8146f20194193b908b87b5548fa1..9b085201650e266247ade2308e6522b58d4070a9 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -211,7 +211,7 @@ int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesv_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/testing/testing_zgesv_nopiv.c b/testing/testing_zgesv_nopiv.c index 8855334349498a9a28f1ca2cc3e70cfe1f95ea9e..d1fd9b1df9c7b745c178f42b69892b781c87efed 100644 --- a/testing/testing_zgesv_nopiv.c +++ b/testing/testing_zgesv_nopiv.c @@ -13,7 +13,7 @@ * @author Lucas Barros de Assis * @author Mathieu Faverge * @author Alycia Lisito - * @date 2023-07-05 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -64,7 +64,7 @@ testing_zgesv_nopiv_desc( run_arg_list_t *args, int check ) /* Calculates the solution */ testing_start( &test_data ); if ( async ) { - hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX, + hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX, NULL, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); CHAMELEON_Desc_Flush( descX, test_data.sequence );