diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index 1f45349147c331212cc6b72e4b802683fc69424e..3409e97f7a80cfb0844b4eefa66d6646be6239f1 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -167,6 +167,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + void *ws = NULL; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -207,7 +208,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, A, NB, NB, LDA, N, M, N, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt ); + CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, ws, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -216,6 +218,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); status = sequence->status; @@ -259,10 +262,11 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, */ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -271,11 +275,14 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_zgetrf_nopiv_Tile_Async( A, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + CHAMELEON_zgetrf_nopiv_Tile_Async( A, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); return status; @@ -314,11 +321,13 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) * @sa CHAMELEON_zgetrs_Tile_Async * */ -int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) +int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -353,7 +362,19 @@ int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } - chameleon_pzgetrf_nopiv( NULL, A, sequence, request ); + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + } + else { + ws = user_ws; + } + chameleon_pzgetrf_nopiv( ws, A, sequence, request ); + + if ( user_ws == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + } return CHAMELEON_SUCCESS; } diff --git a/control/chameleon_zf77.c b/control/chameleon_zf77.c index 70541b792b2b3ee527c1737af4f314a86a7f57d3..64fad284a4f118b79bef98e0c622f415e3a2ac0a 100644 --- a/control/chameleon_zf77.c +++ b/control/chameleon_zf77.c @@ -11,7 +11,7 @@ * * @brief Chameleon Fortran77 computational routines * - * @version 1.2.0 + * @version 1.3.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @comment This file is automatically generated by tools/genf77interface.pl @@ -21,7 +21,8 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -727,7 +728,7 @@ void CHAMELEON_ZGETRF_INCPIV_TILE_ASYNC(CHAM_desc_t *A, CHAM_desc_t *L, int *IPI { *info = CHAMELEON_zgetrf_incpiv_Tile_Async(A, L, IPIV, sequence, request); } void CHAMELEON_ZGETRF_NOPIV_TILE_ASYNC(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info) -{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, sequence, request); } +{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, NULL, sequence, request); } //void CHAMELEON_ZGETRI_TILE_ASYNC(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info) //{ *info = CHAMELEON_zgetri_Tile_Async(A, IPIV, W, sequence, request); } diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 9bd22083b926e2e51051848557500a5c83571e47..c139111befda8146f20194193b908b87b5548fa1 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -24,7 +24,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Ana Hourcau - * @date 2024-07-17 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -215,7 +215,7 @@ int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_seq int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/testing/testing_zgetrf_nopiv.c b/testing/testing_zgetrf_nopiv.c index afa44ede3d749a4caa695a8f780ee9f30615e793..657513eaf26371399a7402e1e0b305afdc8fb854 100644 --- a/testing/testing_zgetrf_nopiv.c +++ b/testing/testing_zgetrf_nopiv.c @@ -13,7 +13,9 @@ * @author Lucas Barros de Assis * @author Mathieu Faverge * @author Alycia Lisito - * @date 2023-07-05 + * @author Lucas Barros De Assis + * @author Matthieu Kuhn + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -53,7 +55,7 @@ testing_zgetrf_nopiv_desc( run_arg_list_t *args, int check ) /* Calculates the solution */ testing_start( &test_data ); if ( async ) { - hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, test_data.sequence, &test_data.request ); + hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, NULL, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); } else {