diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 165801efb0e1bf4cfc5f5f36edf33def54063797..c4501d591df9b485e1fd2624f10e3a41eb4a4f2d 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -183,6 +183,7 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, */ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, + CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { diff --git a/compute/zgetrf.c b/compute/zgetrf.c index bcb8ee0c8c560863cf27248f180ce6af0d11e628..72c595373df2f4c95bf844597993b1c7ed8aec38 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -167,7 +167,7 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) * */ int -CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA ) +CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ) { int NB; int status; @@ -271,7 +271,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA ) * */ int -CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ) +CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ) { CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; @@ -287,8 +287,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ) chameleon_sequence_create( chamctxt, &sequence ); ws = CHAMELEON_zgetrf_WS_Alloc( A ); - CHAMELEON_zgetrf_Tile_Async( A, ws, sequence, &request ); - + CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); @@ -334,6 +333,7 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ) */ int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, + CHAM_desc_t *IPIV, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) @@ -375,12 +375,24 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" ); return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } /* Check input arguments */ if ( A->nb != A->mb ) { chameleon_error( "CHAMELEON_zgetrf_Tile", "only square tiles supported" ); return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + if ( IPIV->mb != A->mb ) { + chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( IPIV->nb != 1 ) { + chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } if ( user_ws == NULL ) { ws = CHAMELEON_zgetrf_WS_Alloc( A ); @@ -389,7 +401,7 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, ws = user_ws; } - chameleon_pzgetrf( user_ws, A, sequence, request ); + chameleon_pzgetrf( user_ws, A, IPIV, sequence, request ); if ( user_ws == NULL ) { CHAMELEON_Desc_Flush( A, sequence ); diff --git a/control/compute_z.h b/control/compute_z.h index 1dd9b13055672181c935ebf67fe8ff1db6b0199e..634bd2d5cf8088a8cab84724a9c1d82498d70610 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -87,7 +87,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 1f04c61aad249f13d4653e94886eba891236da35..da5a5bc651ad2e4803f2c05425ad923bb58c8daf 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -53,7 +53,7 @@ int CHAMELEON_zgesvd(cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_ //int CHAMELEON_zgetrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV); int CHAMELEON_zgetrf_incpiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV); int CHAMELEON_zgetrf_nopiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA); -int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA ); +int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ); //int CHAMELEON_zgetri(int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV); //int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgetrs_incpiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); @@ -134,7 +134,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV); int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV); int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A); -int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ); +int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV ); //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV); //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B); int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B); @@ -211,7 +211,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t * //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);