diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index 37bce4cbc6b44869ec837c914b731794ad9f360c..fdf4c92ee3e218e8c1eb5354c1bcddcbef10756a 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit 37bce4cbc6b44869ec837c914b731794ad9f360c +Subproject commit fdf4c92ee3e218e8c1eb5354c1bcddcbef10756a diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index aef8d85d2ad1a0a198296b7b1353d5ef78a1dc9e..9c0ceed426a0bbed8395a24af6772b44310fd22d 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -196,8 +196,7 @@ set(ZSRC # OTHERS ################## pztile2band.c - #pzgebrd_gb2bd.c - pzgebrd_ge2gb.c + pzgebrd.c #pzgetrf_reclap.c #pzgetrf_rectil.c #pzhegst.c diff --git a/compute/pzgebrd.c b/compute/pzgebrd.c new file mode 100644 index 0000000000000000000000000000000000000000..273824cd6251cd86b12555cd9d12157faeb3597b --- /dev/null +++ b/compute/pzgebrd.c @@ -0,0 +1,320 @@ +/** + * + * @file pzgebrd.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgebrd parallel algorithm + * + * @version 1.2.0 + * @author Hatem Ltaief + * @author Azzam Haidar + * @author Mathieu Faverge + * @author Alycia Lisito + * @date 2022-02-22 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" +#if !defined(CHAMELEON_SIMULATION) +#include "coreblas/lapacke.h" +#endif + +void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + int k; + int tempkm, tempkn; + CHAM_desc_t *A1, *A2, *T1, *D1 = NULL; + + if ( A->m >= A->n ){ + for ( k = 0; k < A->nt; k++ ) { + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + A1 = chameleon_desc_submatrix( A, k*A->mb, k*A->nb, A->m-k*A->mb, tempkn ); + A2 = chameleon_desc_submatrix( A, k*A->mb, (k+1)*A->nb, A->m-k*A->mb, A->n-(k+1)*A->nb ); + T1 = chameleon_desc_submatrix( T, k*T->mb, k*T->nb, T->m-k*T->mb, T->nb ); + if ( D != NULL ) { + D1 = chameleon_desc_submatrix( D, k*D->mb, k*D->nb, D->m-k*D->mb, tempkn ); + } + + chameleon_pzgeqrf( genD, A1, T1, D1, sequence, request ); + chameleon_pzunmqr( 0, ChamLeft, ChamConjTrans, A1, A2, T1, D1, sequence, request ); + + free( A1 ); + free( A2 ); + free( T1 ); + if ( D != NULL ) { + free( D1 ); + } + + if ( k+1 < A->nt ) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + + A1 = chameleon_desc_submatrix( A, k*A->mb, (k+1)*A->nb, tempkm, A->n-(k+1)*A->nb ); + A2 = chameleon_desc_submatrix( A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb ); + T1 = chameleon_desc_submatrix( T, k*T->mb, (k+1)*T->nb, T->mb, T->n-(k+1)*T->nb ); + if ( D != NULL ) { + D1 = chameleon_desc_submatrix( D, k*D->mb, (k+1)*D->nb, tempkm, D->n-(k+1)*D->nb ); + } + + chameleon_pzgelqf( genD, A1, T1, D1, sequence, request ); + chameleon_pzunmlq( 0, ChamRight, ChamConjTrans, A1, A2, T1, D1, sequence, request ); + + free( A1 ); + free( A2 ); + free( T1 ); + if ( D != NULL ) { + free( D1 ); + } + } + } + } + else { + for ( k = 0; k < A->mt; k++ ) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + + A1 = chameleon_desc_submatrix( A, k*A->mb, k*A->nb, tempkm, A->n-k*A->nb ); + A2 = chameleon_desc_submatrix( A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, A->n-k*A->nb ); + T1 = chameleon_desc_submatrix( T, k*T->mb, k*T->nb, T->mb, T->n-k*T->nb ); + if ( D != NULL ) { + D1 = chameleon_desc_submatrix( D, k*D->mb, k*D->nb, tempkm, D->n-k*D->nb ); + } + + chameleon_pzgelqf( genD, A1, T1, D1, sequence, request ); + chameleon_pzunmlq( 0, ChamRight, ChamConjTrans, A1, A2, T1, D1, sequence, request ); + + free( A1 ); + free( A2 ); + free( T1 ); + if ( D != NULL ) { + free( D1 ); + } + + if ( k+1 < A->mt ) { + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + A1 = chameleon_desc_submatrix( A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, tempkn ); + A2 = chameleon_desc_submatrix( A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb ); + T1 = chameleon_desc_submatrix( T, (k+1)*T->mb, k*T->nb, T->m-(k+1)*T->mb, T->nb ); + if ( D != NULL ) { + D1 = chameleon_desc_submatrix( D, (k+1)*D->mb, k*D->nb, D->m-(k+1)*D->mb, tempkn ); + } + + chameleon_pzgeqrf( genD, A1, T1, D1, sequence, request ); + chameleon_pzunmqr( 0, ChamLeft, ChamConjTrans, A1, A2, T1, D1, sequence, request ); + + free( A1 ); + free( A2 ); + free( T1 ); + if ( D != NULL ) { + free( D1 ); + } + } + } + } + + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( T, sequence ); + if ( D != NULL ) { + CHAMELEON_Desc_Flush( D, sequence ); + } +} + +int chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, + CHAMELEON_Complex64_t *U, int LDU, + CHAMELEON_Complex64_t *VT, int LDVT, + double *E, double *S, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t descAB; + cham_uplo_t uplo; + int M, N, MINMN, NB, LDAB, ABn; + int info; + int KL, KU; + + chamctxt = chameleon_context_self(); + if ( sequence->status != CHAMELEON_SUCCESS ) { + return sequence->status; + } + RUNTIME_options_init( &options, chamctxt, sequence, request ); + + M = A->m; + N = A->n; + MINMN = chameleon_min(M, N); + NB = A->mb; + LDAB = NB + 1; + uplo = M >= N ? ChamUpper : ChamLower; + ABn = MINMN; + + /* Allocate band structure */ + chameleon_zdesc_alloc( descAB, LDAB, NB, /* mb, nb */ + LDAB, ABn, /* lm, ln */ + 0, 0, /* i, j */ + LDAB, ABn, /* m, n */ + NULL ); + + /* Convert matrix to band form */ + chameleon_pztile2band( uplo, A, &descAB, sequence, request ); + + /* NCC = 0, C = NULL, we do not update any matrix with new singular vectors */ + /* On exit, AB = U (S +~ E) VT */ + KL = uplo == ChamUpper ? 0 : NB; + KU = uplo == ChamUpper ? NB : 0; + + /* Manage the case where only singular values are required */ + char gbbrd_vect; + if ( jobu == ChamNoVec ) { + if ( jobvt == ChamNoVec ) { + gbbrd_vect = 'N'; + } + else { + gbbrd_vect = 'P'; + } + } + else { + if ( jobvt == ChamNoVec ) { + gbbrd_vect = 'Q'; + } + else { + gbbrd_vect = 'B'; + } + } + + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( &descAB, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + +#if !defined(CHAMELEON_SIMULATION) + info = LAPACKE_zgbbrd( LAPACK_COL_MAJOR, gbbrd_vect, M, N, 0, KL, KU, + (CHAMELEON_Complex64_t *) descAB.mat, LDAB, S, E, + U, LDU, VT, LDVT, NULL, 1 ); + if ( info != 0 ) { + fprintf( stderr, "CHAMELEON_zgesvd_Tile_Async: LAPACKE_zgbbrd = %d\n", info ); + } + assert( info == 0 ); +#endif /* !defined(CHAMELEON_SIMULATION) */ + + chameleon_desc_destroy( &descAB ); + + RUNTIME_options_finalize( &options, chamctxt ); +} + +int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt, + CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, + CHAMELEON_Complex64_t *U, int LDU, + CHAMELEON_Complex64_t *VT, int LDVT, + double *E, double *S, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t *subA, *subT, *subUVT, *subD; + CHAM_desc_t descUl, descUt; + CHAM_desc_t descVTl, descVTt; + int M, N, NB; + + chamctxt = chameleon_context_self(); + if ( sequence->status != CHAMELEON_SUCCESS ) { + return sequence->status; + } + RUNTIME_options_init( &options, chamctxt, sequence, request ); + + chameleon_pzgebrd_ge2gb( genD, A, T, D, sequence, request ); + chameleon_pzgebrd_gb2bd( jobu, jobvt, A, U, LDU, VT, LDVT, E, S, sequence, request ); + + /* Update U and Vt according to jobu and jobvt */ + subA = NULL; + subT = NULL; + subUVT = NULL; + subD = NULL; + M = A->m; + N = A->n; + NB = A->mb; + + if ( jobu != ChamNoVec ) { + chameleon_zlap2tile( chamctxt, &descUl, &descUt, ChamDescInout, ChamUpperLower, + U, NB, NB, LDU, M, M, M, sequence, request ); + + if ( M < N ) { + subA = chameleon_desc_submatrix( A, chameleon_min(A->mb, A->m), 0, + chameleon_max(0, A->m - A->mb), A->n ); + subUVT = chameleon_desc_submatrix( &descUt, chameleon_min(descUt.mb, descUt.m), 0, + chameleon_max(0, descUt.m - descUt.mb), descUt.n); + subT = chameleon_desc_submatrix( T, chameleon_min(T->mb, T->m), 0, + chameleon_max(0, T->m - T->mb), T->n ); + if ( D != NULL ) { + subD = chameleon_desc_submatrix( D, chameleon_min(D->mb, D->m), 0, + chameleon_max(0, D->m - D->mb), D->n ); + } + chameleon_pzunmqr( 0, ChamLeft, ChamNoTrans, subA, subUVT, subT, subD, sequence, request ); + + free( subA ); + free( subUVT ); + free( subT ); + if ( D != NULL ) { + free( subD ); + } + } + else { + chameleon_pzunmqr( 0, ChamLeft, ChamNoTrans, A, &descUt, T, D, sequence, request ); + } + + chameleon_ztile2lap( chamctxt, &descUl, &descUt, ChamDescInout, ChamUpperLower, sequence, request ); + } + if ( jobvt != ChamNoVec ) { + chameleon_zlap2tile( chamctxt, &descVTl, &descVTt, ChamDescInout, ChamUpperLower, + VT, NB, NB, LDVT, N, N, N, sequence, request ); + + if ( M < N ){ + chameleon_pzunmlq( 0, ChamRight, ChamNoTrans, A, &descVTt, T, D, sequence, request ); + } + else { + subA = chameleon_desc_submatrix( A, 0, chameleon_min(A->nb, A->n), + A->m, chameleon_max(0, A->n - A->nb) ); + subUVT = chameleon_desc_submatrix( &descVTt, 0, chameleon_min(descVTt.nb, descVTt.n), + descVTt.m, chameleon_max(0, descVTt.n - descVTt.nb) ); + subT = chameleon_desc_submatrix( T, 0, chameleon_min(T->nb, T->n), + T->m, chameleon_max(0, T->n - T->nb) ); + if ( D != NULL ) { + subD = chameleon_desc_submatrix( D, 0, chameleon_min(D->nb, D->n), + D->m, chameleon_max(0, D->n - D->nb) ); + } + + chameleon_pzunmlq( 0, ChamRight, ChamNoTrans, subA, subUVT, subT, subD, sequence, request ); + + free( subA ); + free( subUVT ); + free( subT ); + if ( D != NULL ) { + free( subD ); + } + } + + chameleon_ztile2lap( chamctxt, &descVTl, &descVTt, + ChamDescInout, ChamUpperLower, sequence, request ); + } + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( T, sequence ); + if ( D != NULL ) { + CHAMELEON_Desc_Flush( D, sequence ); + } + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + if ( jobu != ChamNoVec ) { + chameleon_ztile2lap_cleanup( chamctxt, &descUl, &descUt ); + } + if ( jobvt != ChamNoVec ) { + chameleon_ztile2lap_cleanup( chamctxt, &descVTl, &descVTt ); + } + + RUNTIME_options_finalize( &options, chamctxt ); +} diff --git a/compute/pzgebrd_ge2gb.c b/compute/pzgebrd_ge2gb.c deleted file mode 100644 index 1e7b82d9ad47627da15309cf754fd671d78e9d70..0000000000000000000000000000000000000000 --- a/compute/pzgebrd_ge2gb.c +++ /dev/null @@ -1,104 +0,0 @@ -/** - * - * @file pzgebrd_ge2gb.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zgebrd_ge2gb parallel algorithm - * - * @version 1.2.0 - * @author Hatem Ltaief - * @author Azzam Haidar - * @author Mathieu Faverge - * @date 2022-02-22 - * @precisions normal z -> s d c - * - */ -#include "control/common.h" - -void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) -{ - int k; - int tempkm, tempkn; - CHAM_desc_t *A1, *A2, *T1, *D1 = NULL; - - if (A->m >= A->n){ - for (k = 0; k < A->nt; k++) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - - A1 = chameleon_desc_submatrix(A, k*A->mb, k*A->nb, A->m-k*A->mb, tempkn); - A2 = chameleon_desc_submatrix(A, k*A->mb, (k+1)*A->nb, A->m-k*A->mb, A->n-(k+1)*A->nb); - T1 = chameleon_desc_submatrix(T, k*T->mb, k*T->nb, T->m-k*T->mb, T->nb ); - if ( D != NULL ) { - D1 = chameleon_desc_submatrix(D, k*D->mb, k*D->nb, D->m-k*D->mb, tempkn); - } - - chameleon_pzgeqrf( genD, A1, T1, D1, - sequence, request); - - chameleon_pzunmqr( 0, ChamLeft, ChamConjTrans, - A1, A2, T1, D1, - sequence, request); - - if (k+1 < A->nt){ - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - - A1 = chameleon_desc_submatrix(A, k*A->mb, (k+1)*A->nb, tempkm, A->n-(k+1)*A->nb); - A2 = chameleon_desc_submatrix(A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb); - T1 = chameleon_desc_submatrix(T, k*T->mb, (k+1)*T->nb, T->mb, T->n-(k+1)*T->nb); - if ( D != NULL ) { - D1 = chameleon_desc_submatrix(D, k*D->mb, (k+1)*D->nb, tempkm, D->n-(k+1)*D->nb); - } - - chameleon_pzgelqf( genD, A1, T1, D1, - sequence, request); - - chameleon_pzunmlq( 0, ChamRight, ChamConjTrans, - A1, A2, T1, D1, - sequence, request); - } - } - } - else{ - for (k = 0; k < A->mt; k++) { - tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; - - A1 = chameleon_desc_submatrix(A, k*A->mb, k*A->nb, tempkm, A->n-k*A->nb); - A2 = chameleon_desc_submatrix(A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, A->n-k*A->nb); - T1 = chameleon_desc_submatrix(T, k*T->mb, k*T->nb, T->mb, T->n-k*T->nb); - if ( D != NULL ) { - D1 = chameleon_desc_submatrix(D, k*D->mb, k*D->nb, tempkm, D->n-k*D->nb); - } - chameleon_pzgelqf( genD, A1, T1, D1, - sequence, request); - - chameleon_pzunmlq( 0, ChamRight, ChamConjTrans, - A1, A2, T1, D1, - sequence, request); - - if (k+1 < A->mt){ - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - - A1 = chameleon_desc_submatrix(A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, tempkn); - A2 = chameleon_desc_submatrix(A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb); - T1 = chameleon_desc_submatrix(T, (k+1)*T->mb, k*T->nb, T->m-(k+1)*T->mb, T->nb ); - if ( D != NULL ) { - D1 = chameleon_desc_submatrix(D, (k+1)*D->mb, k*D->nb, D->m-(k+1)*D->mb, tempkn); - } - - chameleon_pzgeqrf( genD, A1, T1, D1, - sequence, request); - - chameleon_pzunmqr( 0, ChamLeft, ChamConjTrans, - A1, A2, T1, D1, - sequence, request); - } - } - } -} diff --git a/compute/zgesvd.c b/compute/zgesvd.c index e85f280719c04804d87d8ede3377f8c2e76e2881..59ef98d92feee5297509b78d75613083e10eda0f 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -15,6 +15,7 @@ * @author Gregoire Pichon * @author Mathieu Faverge * @author Raphael Boucherie + * @author Alycia Lisito * @date 2022-02-22 * @precisions normal z -> s d c * @@ -51,30 +52,30 @@ * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: - * = ChamVec = 'A'(lapack): all M columns of U are returned - * in array U; - * = ChamNoVec = 'N': no columns of U (no left singular vectors) - * are computed. - * = ChamSVec = 'S': the first min(m,n) columns of U (the left - * singular vectors) are returned in the array U; - * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) columns of U (the left - * singular vectors) are overwritten on the array A; - * NOT SUPPORTTED YET + * = ChamAllVec = 'A'(lapack): all M columns of U are returned + * in array U; + * = ChamNoVec = 'N': no columns of U (no left singular vectors) + * are computed. + * = ChamSVec = 'S': the first min(m,n) columns of U (the left + * singular vectors) are returned in the array U; + * NOT SUPPORTED YET + * = ChamOVec = 'O': the first min(m,n) columns of U (the left + * singular vectors) are overwritten on the array A; + * NOT SUPPORTED YET * * @param[in] jobvt * Specifies options for computing all or part of the matrix V^H. * Intended usage: - * = ChamVec = 'A'(lapack): all N rows of V^H are returned - * in the array VT; - * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) - * are computed. - * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right - * singular vectors) are returned in the array VT; - * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right - * singular vectors) are overwritten on the array A; - * NOT SUPPORTTED YET + * = ChamAllVec = 'A'(lapack): all N rows of V^H are returned + * in the array VT; + * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) + * are computed. + * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right + * singular vectors) are returned in the array VT; + * NOT SUPPORTED YET + * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right + * singular vectors) are overwritten on the array A; + * NOT SUPPORTED YET * * Note: jobu and jobvt cannot both be ChamOVec. * @@ -87,14 +88,14 @@ * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, - * if JOBU = 'O', A is overwritten with the first min(m,n) - * columns of U (the left singular vectors, - * stored columnwise); - * if JOBVT = 'O', A is overwritten with the first min(m,n) - * rows of V^H (the right singular vectors, - * stored rowwise); - * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A - * are destroyed. + * if jobu == ChamOVec, A is overwritten with the first min(m,n) + * columns of U (the left singular vectors, + * stored columnwise); + * if jobvt == ChamOVec, A is overwritten with the first min(m,n) + * rows of V^H (the right singular vectors, + * stored rowwise); + * if jobu != ChamOVec and jobvt != ChamOVec, the content of A + * is destroyed. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). @@ -107,26 +108,28 @@ * On exit, contains auxiliary factorization data. * * @param[out] U - * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. - * If JOBU = 'A', U contains the M-by-M unitary matrix U; - * if JOBU = 'S', U contains the first min(m,n) columns of U - * (the left singular vectors, stored columnwise); - * if JOBU = 'N' or 'O', U is not referenced. + * If jobu == ChamAllVec, U contains the M-by-M unitary matrix U; + * if jobu == ChamSVec, U contains the first min(m,n) columns of U + * (the left singular vectors, stored columnwise); + * if jobu == ChamNoVec or ChamOVec, U is not referenced. + * * @param[in] LDU - * The leading dimension of the array U. LDU >= 1; if - * JOBU = 'S' or 'A', LDU >= M. + * The leading dimension of the array U. LDU >= 1; + * if jobu == ChamSVec or ChamAllVec, LDU >= M. * * @param[out] VT - * If JOBVT = 'A', VT contains the N-by-N unitary matrix - * V^H; - * if JOBVT = 'S', VT contains the first min(m,n) rows of - * V^H (the right singular vectors, stored rowwise); - * if JOBVT = 'N' or 'O', VT is not referenced. + * If jobvt == ChamAllVec, VT contains the N-by-N unitary matrix + * V^H; + * if jobvt == ChamSVec, VT contains the first min(m,n) rows of + * V^H (the right singular vectors, stored rowwise); + * if jobvt == ChamNoVec or ChamOVec, VT is not referenced. + * * @param[in] LDVT - * The leading dimension of the array VT. LDVT >= 1; if - * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). + * The leading dimension of the array VT. LDVT >= 1; + * if jobvt == ChamAllVec, LDVT >= N; + * if jobvt == ChamSVec, LDVT >= min(M,N). * ******************************************************************************* * @@ -158,49 +161,50 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t descAl, descAt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zgesvd", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesvd", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } + assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC ); /* Check input arguments */ - if (jobu != ChamNoVec && jobu != ChamVec) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of jobu"); + if ( (jobu != ChamNoVec) && (jobu != ChamAllVec) ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of jobu" ); return -1; } - if (jobvt != ChamNoVec && jobvt != ChamVec) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of jobvt"); + if ( (jobvt != ChamNoVec) && (jobvt != ChamAllVec) ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of jobvt" ); return -2; } - if (M < 0) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of M"); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of M") ; return -3; } - if (N < 0) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of N"); + if ( N < 0 ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of N" ); return -4; } - if (LDA < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of LDA"); + if ( LDA < chameleon_max(1, M) ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of LDA" ); return -6; } - if (LDU < 1) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of LDU"); + if ( LDU < 1 ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of LDU" ); return -9; } - if (LDVT < 1) { - chameleon_error("CHAMELEON_zgesvd", "illegal value of LDVT"); + if ( LDVT < 1 ) { + chameleon_error( "CHAMELEON_zgesvd", "illegal value of LDVT" ); return -11; } /* Quick return */ - if (chameleon_min(M, N) == 0) { + if ( chameleon_min(M, N ) == 0 ) { return CHAMELEON_SUCCESS; } /* Tune NB & IB depending on M & N; Set NBNB */ - status = chameleon_tune(CHAMELEON_FUNC_ZGESVD, M, N, 0); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zgesvd", "chameleon_tune() failed"); + status = chameleon_tune( CHAMELEON_FUNC_ZGESVD, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesvd", "chameleon_tune() failed" ); return status; } @@ -220,6 +224,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, chameleon_ztile2lap( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower, sequence, &request ); + CHAMELEON_Desc_Flush( descT, sequence ); chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ @@ -248,44 +253,44 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * @param[in] jobu * Specifies options for computing all or part of the matrix U. * Intended usage: - * = ChamVec = 'A'(lapack): all M columns of U are returned - * in array U; - * = ChamNoVec = 'N': no columns of U (no left singular vectors) - * are computed. - * = ChamSVec = 'S': the first min(m,n) columns of U (the left - * singular vectors) are returned in the array U; - * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) columns of U (the left - * singular vectors) are overwritten on the array A; - * NOT SUPPORTTED YET + * = ChamAllVec = 'A'(lapack): all M columns of U are returned + * in array U; + * = ChamNoVec = 'N': no columns of U (no left singular vectors) + * are computed. + * = ChamSVec = 'S': the first min(m,n) columns of U (the left + * singular vectors) are returned in the array U; + * NOT SUPPORTED YET + * = ChamOVec = 'O': the first min(m,n) columns of U (the left + * singular vectors) are overwritten on the array A; + * NOT SUPPORTED YET * * @param[in] jobvt * Specifies options for computing all or part of the matrix V^H. * Intended usage: - * = ChamVec = 'A'(lapack): all N rows of V^H are returned - * in the array VT; - * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) - * are computed. - * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right - * singular vectors) are returned in the array VT; - * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right - * singular vectors) are overwritten on the array A; - * NOT SUPPORTTED YET + * = ChamAllVec = 'A'(lapack): all N rows of V^H are returned + * in the array VT; + * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) + * are computed. + * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right + * singular vectors) are returned in the array VT; + * NOT SUPPORTED YET + * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right + * singular vectors) are overwritten on the array A; + * NOT SUPPORTED YET * * Note: jobu and jobvt cannot both be ChamOVec. * * @param[in,out] A * On entry, the M-by-N matrix A. * On exit, - * if JOBU = 'O', A is overwritten with the first min(m,n) - * columns of U (the left singular vectors, - * stored columnwise); - * if JOBVT = 'O', A is overwritten with the first min(m,n) - * rows of V^H (the right singular vectors, - * stored rowwise); - * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A - * are destroyed. + * if jobu == ChamOVec, A is overwritten with the first min(m,n) + * columns of U (the left singular vectors, + * stored columnwise); + * if jobvt == ChamOVec, A is overwritten with the first min(m,n) + * rows of V^H (the right singular vectors, + * stored rowwise); + * if jobu != ChamOVec and jobvt != ChamOVec, the content of A + * is destroyed. * * @param[out] S * The singular values of A, sorted so that S(i) >= S(i+1). @@ -295,26 +300,28 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * On exit, contains auxiliary factorization data. * * @param[out] U - * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. - * If JOBU = 'A', U contains the M-by-M unitary matrix U; - * if JOBU = 'S', U contains the first min(m,n) columns of U - * (the left singular vectors, stored columnwise); - * if JOBU = 'N' or 'O', U is not referenced. + * If jobu == ChamAllVec, U contains the M-by-M unitary matrix U; + * if jobu == ChamSVec, U contains the first min(m,n) columns of U + * (the left singular vectors, stored columnwise); + * if jobu == ChamNoVec or ChamOVec, U is not referenced. + * * @param[in] LDU - * The leading dimension of the array U. LDU >= 1; if - * JOBU = 'S' or 'A', LDU >= M. + * The leading dimension of the array U. LDU >= 1; + * if jobu == ChamSVec or ChamAllVec, LDU >= M. * * @param[out] VT - * If JOBVT = 'A', VT contains the N-by-N unitary matrix - * V^H; - * if JOBVT = 'S', VT contains the first min(m,n) rows of - * V^H (the right singular vectors, stored rowwise); - * if JOBVT = 'N' or 'O', VT is not referenced. + * If jobvt == ChamAllVec, VT contains the N-by-N unitary matrix + * V^H; + * if jobvt == ChamSVec, VT contains the first min(m,n) rows of + * V^H (the right singular vectors, stored rowwise); + * if jobvt == ChamNoVec or ChamOVec, VT is not referenced. + * * @param[in] LDVT - * The leading dimension of the array VT. LDVT >= 1; if - * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). + * The leading dimension of the array VT. LDVT >= 1; + * if jobvt == ChamAllVec, LDVT >= N; + * if jobvt == ChamSVec, LDVT >= min(M,N). * ******************************************************************************* * @@ -342,10 +349,11 @@ int CHAMELEON_zgesvd_Tile( cham_job_t jobu, cham_job_t jobvt, int status; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zgesvd_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesvd_Tile", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } + assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC ); chameleon_sequence_create( chamctxt, &sequence ); CHAMELEON_zgesvd_Tile_Async( jobu, jobvt, A, S, T, U, LDU, VT, LDVT, sequence, &request ); @@ -399,246 +407,129 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt, { CHAM_desc_t descA; CHAM_desc_t descT; - CHAM_desc_t descUl, descUt; - CHAM_desc_t descVTl, descVTt; - CHAM_desc_t descAB; CHAM_desc_t D, *Dptr = NULL; - CHAM_desc_t *subA, *subT, *subUVT; double *E; - int M, N, MINMN, NB, LDAB; - cham_uplo_t uplo; -#if !defined(CHAMELEON_SIMULATION) - int KL, KU, nru, ncvt; -#endif + int M, N, MINMN, NB; CHAM_context_t *chamctxt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zgesvd_Tile_Async", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesvd_Tile_Async", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } - if (sequence == NULL) { - chameleon_fatal_error("CHAMELEON_zgesvd_Tile_Async", "NULL sequence"); + assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC ); + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesvd_Tile_Async", "NULL sequence" ); return CHAMELEON_ERR_UNALLOCATED; } - if (request == NULL) { - chameleon_fatal_error("CHAMELEON_zgesvd_Tile_Async", "NULL request"); + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesvd_Tile_Async", "NULL request" ); return CHAMELEON_ERR_UNALLOCATED; } /* Check sequence status */ - if (sequence->status == CHAMELEON_SUCCESS) { + if ( sequence->status == CHAMELEON_SUCCESS ) { request->status = CHAMELEON_SUCCESS; } else { - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); } /* Check descriptors for correctness */ - if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zgesvd_Tile_Async", "invalid first descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check(A) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } else { descA = *A; } - if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zgesvd_Tile_Async", "invalid fourth descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check(T) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "invalid fourth descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } else { descT = *T; } /* Check input arguments */ - if (jobu != ChamNoVec && jobu != ChamVec) { - chameleon_error("CHAMELEON_zgesvd_Tile_Async", "illegal value of jobu"); + if ( (jobu != ChamNoVec) && (jobu != ChamAllVec) ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "illegal value of jobu" ); return CHAMELEON_ERR_NOT_SUPPORTED; } - if (jobvt != ChamNoVec && jobvt != ChamVec) { - chameleon_error("CHAMELEON_zgesvd_Tile_Async", "illegal value of jobvt"); + if ( (jobvt != ChamNoVec) && (jobvt != ChamAllVec) ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "illegal value of jobvt" ); return CHAMELEON_ERR_NOT_SUPPORTED; } - if (descA.nb != descA.mb) { - chameleon_error("CHAMELEON_zgesvd_Tile_Async", "only square tiles supported"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( descA.nb != descA.mb ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } M = descA.m; N = descA.n; MINMN = chameleon_min(M, N); NB = descA.mb; - LDAB = NB + 1; - uplo = M >= N ? ChamUpper : ChamLower; - #if defined(CHAMELEON_COPY_DIAG) { - chameleon_zdesc_alloc_diag( &D, A->mb, A->m, A->n, A->p, A->q ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, A->n ); Dptr = &D; } #endif - /* Reduction to band */ - chameleon_pzgebrd_ge2gb( 1, &descA, &descT, Dptr, - sequence, request ); - - /* Allocate band structure */ - chameleon_zdesc_alloc( descAB, - LDAB, NB, /* mb, nb */ - LDAB, N, /* lm, ln */ - 0, 0, /* i, j */ - LDAB, N, /* m, n */ - ); - - /* Convert matrix to band form */ - chameleon_pztile2band( uplo, - &descA, &descAB, - sequence, request ); E = malloc( MINMN * sizeof(double) ); - if (E == NULL) { - chameleon_error("CHAMELEON_zheevd_Tile_Async", "malloc(E) failed"); - free(E); + if ( E == NULL ) { + chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "malloc(E) failed" ); + free( E ); return CHAMELEON_ERR_OUT_OF_RESOURCES; } - memset(E, 0, MINMN * sizeof(double) ); + memset( E, 0, MINMN * sizeof(double) ); -#if !defined(CHAMELEON_SIMULATION) - { - char gbbrd_vect; - int info; - - /* NCC = 0, C = NULL, we do not update any matrix with new singular vectors */ - /* On exit, AB = U (S +~ E) VT */ - if (uplo == ChamUpper){ - KL = 0; - KU = NB; - } - else{ - KL = NB; - KU = 0; - } - - /* Manage the case where only singular values are required */ - if (jobu == ChamNoVec) { - nru = 0; - if (jobvt == ChamNoVec) { - gbbrd_vect = 'N'; - ncvt = 0; - } - else { - gbbrd_vect = 'P'; - ncvt = N; - } - } - else { - nru = M; - if (jobvt == ChamNoVec) { - gbbrd_vect = 'Q'; - ncvt = 0; - } - else { - gbbrd_vect = 'B'; - ncvt = N; - } - } - - chameleon_sequence_wait( chamctxt, sequence ); - - info = LAPACKE_zgbbrd( LAPACK_COL_MAJOR, - gbbrd_vect, - M, N, - 0, KL, KU, - (CHAMELEON_Complex64_t *) descAB.mat, LDAB, - S, E, - U, LDU, - VT, LDVT, - NULL, 1 ); - if (info != 0) { - fprintf(stderr, "CHAMELEON_zgesvd_Tile_Async: LAPACKE_zgbbrd = %d\n", info ); - } - } -#else - chameleon_sequence_wait( chamctxt, sequence ); -#endif /* !defined(CHAMELEON_SIMULATION) */ - chameleon_desc_destroy( &descAB ); - - subA = NULL; - subT = NULL; - subUVT = NULL; - - if ( jobu != ChamNoVec ) { - chameleon_zlap2tile( chamctxt, &descUl, &descUt, ChamDescInout, ChamUpperLower, - U, NB, NB, LDU, M, M, M, sequence, request ); - - if ( M < N ){ - subA = chameleon_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb); - subUVT = chameleon_desc_submatrix(&descUt, descUt.mb, 0, descUt.m-descUt.mb, descUt.n); - subT = chameleon_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb); - - chameleon_pzunmqr( 0, ChamLeft, ChamNoTrans, - subA, subUVT, subT, Dptr, - sequence, request ); - - free(subA); free(subUVT); free(subT); - } - else { - chameleon_pzunmqr( 0, ChamLeft, ChamNoTrans, - &descA, &descUt, &descT, Dptr, - sequence, request ); - } - - chameleon_ztile2lap( chamctxt, &descUl, &descUt, - ChamDescInout, ChamUpperLower, sequence, request ); - } - - if ( jobvt != ChamNoVec ) { - chameleon_zlap2tile( chamctxt, &descVTl, &descVTt, ChamDescInout, ChamUpperLower, - VT, NB, NB, LDVT, N, N, N, sequence, request ); - - if ( M < N ){ - chameleon_pzunmlq( 0, ChamRight, ChamNoTrans, - &descA, &descVTt, &descT, Dptr, - sequence, request ); - } - else { - subA = chameleon_desc_submatrix(&descA, 0, descA.nb, descA.m-descA.mb, descA.n -descA.nb ); - subUVT = chameleon_desc_submatrix(&descVTt, 0, descVTt.nb, descVTt.m, descVTt.n-descVTt.nb); - subT = chameleon_desc_submatrix(&descT, 0, descT.nb, descT.m-descT.mb, descT.n -descT.nb ); - - chameleon_pzunmlq( 0, ChamRight, ChamNoTrans, - subA, subUVT, subT, Dptr, - sequence, request ); - - free(subA); free(subUVT); free(subT); - } - - chameleon_ztile2lap( chamctxt, &descVTl, &descVTt, - ChamDescInout, ChamUpperLower, sequence, request ); - } - chameleon_sequence_wait( chamctxt, sequence ); - - /* Cleanup the temporary data */ - if ( jobu != ChamNoVec ) { - chameleon_ztile2lap_cleanup( chamctxt, &descUl, &descUt ); - } - if ( jobvt != ChamNoVec ) { - chameleon_ztile2lap_cleanup( chamctxt, &descVTl, &descVTt ); - } + /* Reduction to band + bidiagonal */ + chameleon_pzgebrd( 1, jobu, jobvt, &descA, &descT, Dptr, + U, LDU, VT, LDVT, E, S, sequence, request ); /* Solve the bidiagonal SVD problem */ /* On exit, U and VT are updated with bidiagonal matrix singular vectors */ #if !defined(CHAMELEON_SIMULATION) { - int info = LAPACKE_zbdsqr( LAPACK_COL_MAJOR, 'U', - MINMN, ncvt, nru, 0, - S, E, - VT, LDVT, U, LDU, NULL, 1 ); - if (info != 0) { + int nru, ncvt; + switch ( jobu ) { + case ChamNoVec : + nru = 0; + break; + case ChamOVec : + case ChamAllVec : + nru = M; + break; + case ChamSVec : + nru = MINMN; + break; + default: + ; + } + switch ( jobvt ) { + case ChamNoVec : + ncvt = 0; + break; + case ChamOVec : + case ChamAllVec : + ncvt = N; + break; + case ChamSVec : + ncvt = MINMN; + break; + default: + ; + } + cham_uplo_t uplo = M >= N ? ChamUpper : ChamLower; + int info = LAPACKE_zbdsqr( LAPACK_COL_MAJOR, chameleon_lapack_const(uplo), MINMN, + ncvt, nru, 0, S, E, VT, LDVT, U, LDU, NULL, 1 ); + if ( info != 0 ) { fprintf(stderr, "CHAMELEON_zgesvd_Tile_Async: LAPACKE_zbdsqr = %d\n", info ); } } #endif /* !defined(CHAMELEON_SIMULATION) */ - free(E); - if ( Dptr ) { + free( E ); + if ( Dptr != NULL ) { chameleon_desc_destroy( Dptr ); } (void)D; diff --git a/control/chameleon_zf77.c b/control/chameleon_zf77.c index 717548e3102c1283c689f4434d627c915d291fcb..cf1b8d94ebd98bf55ae746818056e17a9e44d075 100644 --- a/control/chameleon_zf77.c +++ b/control/chameleon_zf77.c @@ -44,7 +44,7 @@ //#define CHAMELEON_ZGESV CHAMELEON_FNAME(zgesv , ZGESV ) #define CHAMELEON_ZGESV_INCPIV CHAMELEON_FNAME(zgesv_incpiv , ZGESV_INCPIV ) #define CHAMELEON_ZGESV_NOPIV CHAMELEON_FNAME(zgesv_nopiv , ZGESV_NOPIV ) -//#define CHAMELEON_ZGESVD CHAMELEON_FNAME(zgesvd , ZGESVD ) +#define CHAMELEON_ZGESVD CHAMELEON_FNAME(zgesvd , ZGESVD ) //#define CHAMELEON_ZGETMI CHAMELEON_FNAME(zgetmi , ZGETMI ) //#define CHAMELEON_ZGETMI_ASYNC CHAMELEON_FNAME(zgetmi_async , ZGETMI_ASYNC ) //#define CHAMELEON_ZGETRF CHAMELEON_FNAME(zgetrf , ZGETRF ) @@ -121,7 +121,7 @@ //#define CHAMELEON_ZGESV_TILE CHAMELEON_TILE_FNAME(zgesv , ZGESV ) #define CHAMELEON_ZGESV_INCPIV_TILE CHAMELEON_TILE_FNAME(zgesv_incpiv , ZGESV_INCPIV ) #define CHAMELEON_ZGESV_NOPIV_TILE CHAMELEON_TILE_FNAME(zgesv_nopiv , ZGESV_NOPIV ) -//#define CHAMELEON_ZGESVD_TILE CHAMELEON_TILE_FNAME(zgesvd , ZGESVD ) +#define CHAMELEON_ZGESVD_TILE CHAMELEON_TILE_FNAME(zgesvd , ZGESVD ) //#define CHAMELEON_ZGETRF_TILE CHAMELEON_TILE_FNAME(zgetrf , ZGETRF ) #define CHAMELEON_ZGETRF_INCPIV_TILE CHAMELEON_TILE_FNAME(zgetrf_incpiv, ZGETRF_INCPIV) #define CHAMELEON_ZGETRF_NOPIV_TILE CHAMELEON_TILE_FNAME(zgetrf_nopiv , ZGETRF_NOPIV ) @@ -195,7 +195,7 @@ //#define CHAMELEON_ZGESV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgesv , ZGESV ) #define CHAMELEON_ZGESV_INCPIV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgesv_incpiv , ZGESV_INCPIV ) #define CHAMELEON_ZGESV_NOPIV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgesv_nopiv , ZGESV_NOPIV ) -//#define CHAMELEON_ZGESVD_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgesvd , ZGESVD ) +#define CHAMELEON_ZGESVD_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgesvd , ZGESVD ) //#define CHAMELEON_ZGETRF_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgetrf , ZGETRF ) #define CHAMELEON_ZGETRF_INCPIV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgetrf_incpiv, ZGETRF_INCPIV) #define CHAMELEON_ZGETRF_NOPIV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zgetrf_nopiv , ZGETRF_NOPIV ) diff --git a/control/compute_z.h b/control/compute_z.h index 16b8978e8dcc0ef2bf1c9c54d784130377914f30..d4acf6976e8eb512ff15f9095821292ecca91b27 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -64,8 +64,11 @@ int chameleon_zshift(CHAM_context_t *chamctxt, int m, int n, CHAMELEON_Complex64 /** * Declarations of parallel functions (dynamic scheduling) - alphabetical order */ -void chameleon_pzgebrd_gb2bd(cham_uplo_t uplo, CHAM_desc_t *A, double *D, double *E, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int chameleon_pzgebrd_gb2bd( cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, + double *E, double *S, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, + double *E, double *S, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); @@ -174,7 +177,7 @@ void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, c /** * Gram function prototypes */ -void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzgram( struct chameleon_pzgram_s *ws, cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); /** * LAPACK/Tile Descriptor accesses @@ -200,7 +203,7 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int #define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ { \ int rc; \ - rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_TILE, \ + rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_GLOBAL, \ ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \ (m), (n), (i), (j), (m), (n), 1, 1, \ NULL, NULL, NULL ); \ diff --git a/coreblas/compute/global.c b/coreblas/compute/global.c index e08888e0d66482d80d025aefa789e1d453ab54e6..8b72d7b85c45239d8c7a8c3fb36d2814d3f103eb 100644 --- a/coreblas/compute/global.c +++ b/coreblas/compute/global.c @@ -162,9 +162,12 @@ char *chameleon_lapack_constants[] = "", // 300 "No vectors", // 301 ChamNoVec - "Vectors needed", // 302 ChamVec - "I", // 303 ChamIvec - "", "", "", "", "", "", + "Vectors needed", // 302 ChamVec, ChamSVDvrange + "I", // 303 ChamIvec, ChamSVDirange + "A", // 304 ChamAllVec, ChamSVDall + "S", // 305 ChamSVec + "O", // 306 ChamOVec + "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", diff --git a/cudablas/compute/cudaglobal.c b/cudablas/compute/cudaglobal.c index 4c460a0c7b1bd67197393d0acb17288c4578dd3f..130fa700f4b086a2914976097398f88dd116a2ab 100644 --- a/cudablas/compute/cudaglobal.c +++ b/cudablas/compute/cudaglobal.c @@ -103,9 +103,12 @@ int chameleon_cublas_constants[] = 0, // 299 0, // 300 0, // 301 ChamNoVec - 0, // 302 ChamVec - 0, // 303 ChamIvec - 0, 0, 0, 0, 0, 0, + 0, // 302 ChamVec, ChamSVDvrange + 0, // 303 ChamIvec, ChamSVDirange + 0, // 304 ChamAllVec, ChamSVDall + 0, // 305 ChamSVec + 0, // 306 ChamOVec + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -115,10 +118,10 @@ int chameleon_cublas_constants[] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 390 - 0, // 391 - 0, // 392 + 0, // 391 Forward + 0, // 392 Backward 0, 0, 0, 0, 0, 0, 0, 0, - 0, // 401 - 0, // 402 + 0, // 401 Columnwise + 0, // 402 Rowwise 0, 0, 0, 0, 0, 0, 0, 0 // Remember to add a coma! }; diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index 79bce66b4c3524f31db76357b425ce30272a83d7..71b18d8d41dfb2b87126dc54937b42c5413d4c37 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -156,9 +156,12 @@ typedef enum chameleon_sym_e { * @brief Singular/Eigen vector job description */ typedef enum chameleon_job_e { - ChamNoVec = 301, - ChamVec = 302, - ChamIvec = 303, + ChamNoVec = 301, + ChamVec = 302, + ChamIvec = 303, + ChamAllVec = 304, + ChamSVec = 305, + ChamOVec = 306, } cham_job_t; /** diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c index 65b9bc0542dae69579db42ef476705cebfa4bca7..ded9c005b1c0a8f4cba97ae677ee00abb4edd7df 100644 --- a/runtime/parsec/codelets/codelet_zlacpy.c +++ b/runtime/parsec/codelets/codelet_zlacpy.c @@ -29,7 +29,7 @@ CORE_zlacpy_parsec( parsec_execution_stream_t *context, cham_uplo_t uplo; int M; int N; - CHAMELEON_Complex64_t *A; + const CHAMELEON_Complex64_t *A; int LDA; CHAMELEON_Complex64_t *B; int LDB; @@ -72,7 +72,7 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context, int M; int N; int displA; - CHAMELEON_Complex64_t *A; + const CHAMELEON_Complex64_t *A; int LDA; int displB; CHAMELEON_Complex64_t *B; @@ -95,7 +95,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_zlacpyx_parsec, options->priority, "lacpy", + PARSEC_dtd_taskpool, CORE_zlacpyx_parsec, options->priority, "lacpyx", sizeof(cham_uplo_t), &uplo, VALUE, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c index 90a9ae5fbca18270d2e75f4e95c408c14096ba93..1f4b15798b6bb6a6e0676396af33c10c5bf695da 100644 --- a/runtime/quark/codelets/codelet_zlacpy.c +++ b/runtime/quark/codelets/codelet_zlacpy.c @@ -80,7 +80,7 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, { quark_option_t *opt = (quark_option_t*)(options->schedopt); DAG_CORE_LACPY; - QUARK_Insert_Task(opt->quark, CORE_zlacpy_quark, (Quark_Task_Flags*)opt, + QUARK_Insert_Task(opt->quark, CORE_zlacpyx_quark, (Quark_Task_Flags*)opt, sizeof(int), &uplo, VALUE, sizeof(int), &m, VALUE, sizeof(int), &n, VALUE, diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index eed4d7d4961772f864ed689f97ced39eaa922c21..a2216fb2d7d20a565a751562b4521a3ba46495fb 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -77,6 +77,7 @@ set(ZSRC_WO_STDAPI testing_zsysv.c testing_zgenm2.c testing_zgesv_nopiv.c + testing_zgesvd.c testing_zgetrf_nopiv.c testing_zgetrs_nopiv.c testing_zgeqrf.c @@ -120,6 +121,7 @@ set(ZSRC testing_zcheck_blas.c testing_zcheck_facto.c testing_zcheck_qr_lq.c + testing_zcheck_svd.c testing_zcheck_polar_decomp.c testing_zprint.c ) diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 6d42d203ada128a6b5fb606c2b217929fe09ba34..502add004a084acfc4fdbef08151bca551285c98 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -54,7 +54,7 @@ if (NOT CHAMELEON_SIMULATION) #set( TESTS ${TESTS} geqrs gelqs ) #set( TESTS ${TESTS} geqrs_hqr gelqs_hqr ) set( TESTS ${TESTS} gels gels_hqr ) - set( TESTS ${TESTS} genm2 gepdf_qr gepdf_qdwh ) + set( TESTS ${TESTS} genm2 gepdf_qr gepdf_qdwh gesvd ) set( TESTS ${TESTS} cesca gram ) foreach( cat ${TEST_CATEGORIES} ) @@ -69,6 +69,7 @@ if (NOT CHAMELEON_SIMULATION) if ( ${cat} STREQUAL "mpi" ) set ( PREFIX mpiexec --bind-to none -n ${NP} ) + list( REMOVE_ITEM TESTSTMP gesvd ) else() set ( PREFIX "" ) endif() diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index ca5204ccccb63dbc0bcd106dabfcacb153a6958e..61ae3d8f50059f4df87c04965bfe1e8f9dc8fbd3 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -111,6 +111,10 @@ parameter_t parameters[] = { { "llvl", "Tree used for low level reduction insides nodes", -22, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestValInt, {0}, NULL, pread_int, sprint_int }, { "hlvl", "Tree used for high level reduction between nodes", -23, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestValInt, {0}, NULL, pread_int, sprint_int }, { "domino", "Enable/Disable the domino between upper and lower trees", -24, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 6, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { NULL, "SVD parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "jobu", "Value of the jobu parameter", -50, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestJob, {0}, NULL, pread_job, sprint_job }, + { "jobvt", "Value of the jobvt parameter", -51, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestJob, {0}, NULL, pread_job, sprint_job }, #endif { "tsub", "Graph submission time in s", 999, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl }, diff --git a/testing/input/gesvd.in b/testing/input/gesvd.in new file mode 100644 index 0000000000000000000000000000000000000000..909b6787d359c95ca4470217c2aeb81b6ee728e9 --- /dev/null +++ b/testing/input/gesvd.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GESVD +# nb: Tile size +# M: Number of rows of matrix A and C +# N: Number of columns of matrix B and C +# LDA: Leading dimension of matrix A +# JobU: value of jobu +# JobVt: value of jobvt + +op = gesvd +nb = 3, 4, 16, 17 +m = 15, 32, 37 +n = 13, 24, 35 +jobu = NoVec, AllVec +jobvt = NoVec, AllVec +lda = 41 diff --git a/testing/run_list.c b/testing/run_list.c index 1d13d861d65084c6d569d1c2deb9a413c2b8b13d..708248e0dfd9627bcfd85e71476d398b739a9196 100644 --- a/testing/run_list.c +++ b/testing/run_list.c @@ -441,6 +441,32 @@ run_arg_get_side( run_arg_list_t *arglist, const char *name, cham_side_t defval return rval.side; } +/** + * @brief Searches for a cham_job_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_job_t +run_arg_get_job( run_arg_list_t *arglist, const char *name, cham_job_t defval ) +{ + val_t val, rval; + val.job = defval; + rval = run_arg_get( arglist, name, val ); + return rval.job; +} + /** * @brief Searches for a cham_normtype_t value by its name. * @@ -736,6 +762,7 @@ run_print_header_partial( const char **list, int human, char *str ) case TestUplo: case TestDiag: case TestSide: + case TestJob: case TestNormtype: case TestString: rc = sprintf( str, " %-*s", param->psize, *pname ); diff --git a/testing/testing_zcheck.h b/testing/testing_zcheck.h index 2906ca16a17caa41e33d5cc8766c12f51f50e604..ab8d86977e5805fca7e649843a26040487a20fbe 100644 --- a/testing/testing_zcheck.h +++ b/testing/testing_zcheck.h @@ -99,6 +99,12 @@ static inline int check_zqc ( run_arg_list_t *args, cham_side_t side, static inline int check_zqc_std ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *CC, int LDC, CHAMELEON_Complex64_t *Q, int LDQ ) { return 0; } +/* SVD check */ +static inline int check_zgesvd_std ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) { return 0; } +static inline int check_zgesvd ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) { return 0; } + /* Polar decomposition check */ static inline int check_zgepdf_qr ( run_arg_list_t *args, CHAM_desc_t *descA1, CHAM_desc_t *descA2, CHAM_desc_t *descQ1, CHAM_desc_t *descQ2, CHAM_desc_t *descAF1 ) { return 0; } @@ -172,6 +178,11 @@ int check_zqc ( run_arg_list_t *args, cham_side_t side, cham_trans_t t int check_zqc_std ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *CC, int LDC, CHAMELEON_Complex64_t *Q, int LDQ ); +/* SVD check */ +int check_zgesvd_std ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ); +int check_zgesvd ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ); /* Polar decomposition check */ int check_zgepdf_qr ( run_arg_list_t *args, CHAM_desc_t *descA1, CHAM_desc_t *descA2, CHAM_desc_t *descQ1, CHAM_desc_t *descQ2, CHAM_desc_t *descAF1 ); diff --git a/testing/testing_zcheck_facto.c b/testing/testing_zcheck_facto.c index c09a699345c56b4f84e7597f8fdfd186556a00a4..a9ad49b3ab0e92aa277404ec94699ac984d7fede 100644 --- a/testing/testing_zcheck_facto.c +++ b/testing/testing_zcheck_facto.c @@ -427,7 +427,7 @@ int check_zsolve( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_trans_t } Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descB ); - result = Rnorm / ( Anorm * Xnorm * chameleon_max( M, N ) * eps ); + result = Rnorm / ( Anorm * Xnorm * max( M, N ) * eps ); run_arg_add_double( args, "||A||", Anorm ); run_arg_add_double( args, "||X||", Xnorm ); diff --git a/testing/testing_zcheck_polar_decomp.c b/testing/testing_zcheck_polar_decomp.c index 761f592972188dd37ec0cd290ab7248f619ad175..b8469b232136314c03806227e848a961c096526b 100644 --- a/testing/testing_zcheck_polar_decomp.c +++ b/testing/testing_zcheck_polar_decomp.c @@ -36,10 +36,6 @@ #include "testing_zcheck.h" #include <chameleon/flops.h> -#ifndef max -#define max( _a_, _b_ ) ( (_a_) > (_b_) ? (_a_) : (_b_) ) -#endif - /** ******************************************************************************** * diff --git a/testing/testing_zcheck_qr_lq.c b/testing/testing_zcheck_qr_lq.c index 0de341de191bb7434cbdf7a603701508326d8083..9676a3c4921c94ead85657bf430c6e528ec85fb6 100644 --- a/testing/testing_zcheck_qr_lq.c +++ b/testing/testing_zcheck_qr_lq.c @@ -36,10 +36,6 @@ #include "testing_zcheck.h" #include <chameleon/flops.h> -#ifndef max -#define max( _a_, _b_ ) ( (_a_) > (_b_) ? (_a_) : (_b_) ) -#endif - /** ******************************************************************************** * @@ -870,10 +866,10 @@ int check_zgels( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, C * Whether the matrix A is transposed, conjugate transposed or not transposed. * * @param[in] M - * The number of rows of the matrix A. + * The number of rows of the matrix A. * * @param[in] N - * The number of columns of the matrix A. + * The number of columns of the matrix A. * * @param[in] A * The matrix A. diff --git a/testing/testing_zcheck_svd.c b/testing/testing_zcheck_svd.c new file mode 100644 index 0000000000000000000000000000000000000000..d6c0aa499d5429435357bf10884678414624cf92 --- /dev/null +++ b/testing/testing_zcheck_svd.c @@ -0,0 +1,261 @@ +/** + * + * @file testing_zcheck_svd.c + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings routines + * + * @version 1.2.0 + * @author Alycia Lisito + * @date 2022-03-30 + * @precisions normal z -> c d s + * + */ +#include "../control/common.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <chameleon.h> + +#if !defined(CHAMELEON_SIMULATION) + +#include <coreblas/lapacke.h> +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#endif +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> + +/** + ******************************************************************************** + * + * @ingroup testing + * + * @brief Checks if the Chameleon SVD algorithm works: Ainit = U * mat( S ) * Vt. + * - U and Vt should be orthogonal. + * - Sinit and S should be the same. + * - Ainit = U * mat( S ) * Vt. + * + ******************************************************************************* + * + * @param[in] jobu + * Specifies options for computing all or part of the matrix U. + * + * @param[in] jobvt + * Specifies options for computing all or part of the matrix V^H. + * + * @param[in] M + * The number of rows of the matrices Ainit, A and U. + * + * @param[in] N + * The number of columns of the matrices Ainit, A and Vt. + * + * @param[in] Ainit + * The matrix Ainit (initial matrix A). + * + * @param[in] A + * The matrix A after the SVD, can contain parts of the matrix U or Vt + * or nothing (depending on the values of jobu and jobvt). + * + * @param[in] LDA + * The leading dimension of the matrices A and Ainit. + * + * @param[in] Sinit + * The vector with the singular values of the matrix Ainit + * (contains the K = min(M, N) singular values of Ainit). + * + * @param[in] S + * The vector with the singular values of the matrix Ainit + * computed by the Chameleon SVD algorithm. + * + * @param[in] U + * The orthogonal matrix U computed by the Chameleon SVD algorithm can + * contain all of U, a part of U or nothing (NULL) depending on the value of jobu; + * if jobu == AllVec : dimension: M * M + * if jobu == SVec : dimension: M * min(M, N) + * if jobu == NoVec or OVec : U = NULL + * + * @param[in] LDU + * The leading dimension of the matrix U. + * + * @param[in] Vt + * The orthogonal matrix Vt computed by the Chameleon SVD algorithm can + * contain all of Vt, a part of Vt or nothing (NULL) depending on the value of jobvt; + * if jobuvt == AllVec : dimension: N * N + * if jobvt == SVec : dimension: min(M, N) * N + * if jobvt == NoVec or OVec : Vt = NULL + * + * @param[in] LDVt + * The leading dimension of the matrix Vt. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgesvd_std( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) +{ + int info_solution = 0; + double result; + int k; + int K = chameleon_min(M, N); + double eps = LAPACKE_dlamch_work('e'); + + /* Checks if U is orthogonal */ + switch ( jobu ) { + case ChamAllVec: + info_solution += check_zortho_std( args, M, M, U, LDU ); + break; + case ChamSVec: + info_solution += check_zortho_std( args, M, K, U, LDU ); + break; + case ChamOVec: + info_solution += check_zortho_std( args, M, K, A, LDA ); + break; + default: + ; + } + + /* Checks if Vt is orthogonal */ + switch ( jobvt ) { + case ChamAllVec: + info_solution += check_zortho_std( args, N, N, Vt, LDVt ); + break; + case ChamSVec: + info_solution += check_zortho_std( args, K, N, Vt, LDVt ); + break; + case ChamOVec: + info_solution += check_zortho_std( args, K, N, A, LDA ); + break; + default: + ; + } + + /* checks if Sinit and S are the same */ + double maxSV = chameleon_max( fabs(Sinit[0]), fabs(S[0]) ); + double maxSVdiff = fabs( fabs(Sinit[0]) - fabs(S[0]) ); + double maxtmp, maxdifftmp; + + for ( k = 1; k < K; k++ ) { + maxdifftmp = fabs( fabs(Sinit[k]) - fabs(S[k]) ); + maxtmp = chameleon_max( fabs(Sinit[k]), fabs(S[k]) ); + + /* Update */ + maxSV = chameleon_max( maxtmp, maxSV ); + maxSVdiff = chameleon_max( maxdifftmp, maxSVdiff ); + } + + result = maxSVdiff / ( K * eps * maxSV ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_solution += 1; + } + + if ( (jobu == ChamAllVec) && (jobvt == ChamAllVec) ) { + /* To do: create mat( S ) */ + } + result = info_solution; + + run_arg_add_double( args, "||R||", result ); + + (void)Ainit; + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup testing + * + * @brief Checks if the Chameleon SVD algorithm works: descAinit = U * mat( S ) * Vt. + * - U and Vt should be orthogonal. + * - Sinit and S should be the same. + * - descAinit = U * mat( S ) * Vt. + * + ******************************************************************************* + * + * @param[in] jobu + * Specifies options for computing all or part of the matrix U. + * + * @param[in] jobvt + * Specifies options for computing all or part of the matrix V^H. + * + * @param[in] descAinit + * The descriptor of the matrix Ainit (initial matrix A). + * + * @param[in] descA + * The descriptor of the matrix A after the SVD, can contain parts of the matrix + * U or Vt or nothing (depending on the values of jobu and jobvt). + * + * @param[in] Sinit + * The vector with the singular values of the matrix Ainit + * (contains the K = min(M, N) singular values of Ainit). + * + * @param[in] S + * The vector with the singular values of the matrix Ainit + * computed by the Chameleon SVD algorithm. + * + * @param[in] U + * The orthogonal matrix U computed by the Chameleon SVD algorithm can + * contain all of U, a part of U or nothing (NULL) depending on the value of jobu; + * if jobu == AllVec : dimension: M * M + * if jobu == SVec : dimension: M * min(M, N) + * if jobu == NoVec or OVec : U = NULL + * + * @param[in] LDU + * The leading dimension of the matrix U. + * + * @param[in] Vt + * The orthogonal matrix Vt computed by the Chameleon SVD algorithm can + * contain all of Vt, a part of Vt or nothing (NULL) depending on the value of jobvt; + * if jobuvt == AllVec : dimension: N * N + * if jobvt == SVec : dimension: min(M, N) * N + * if jobvt == NoVec or OVec : Vt = NULL + * + * @param[in] LDVt + * The leading dimension of the matrix Vt. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgesvd( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) +{ + int info_solution; + int rank = CHAMELEON_Comm_rank(); + CHAMELEON_Complex64_t *Ainit, *A; + int M = descA->m; + int N = descA->n; + int LDA = descA->lm; + + if ( rank == 0 ) { + Ainit = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + } + + CHAMELEON_zDesc2Lap( ChamUpperLower, descAinit, Ainit, LDA ); + CHAMELEON_zDesc2Lap( ChamUpperLower, descA, A, LDA ); + + if ( rank == 0 ) { + + info_solution = check_zgesvd_std( args, jobu, jobvt, M, N, Ainit, A, LDA, Sinit, S, U, LDU, Vt, LDVt ); + + free( Ainit ); + free( A ); + } + +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + +#endif /* defined(CHAMELEON_SIMULATION) */ diff --git a/testing/testing_zgesvd.c b/testing/testing_zgesvd.c new file mode 100644 index 0000000000000000000000000000000000000000..a95eeb7338297bcc225a50197cdf487316711c09 --- /dev/null +++ b/testing/testing_zgesvd.c @@ -0,0 +1,302 @@ +/** + * + * @file testing_zgesvd.c + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgesvd testing + * + * @version 1.2.0 + * @author Alycia Lisito + * @date 2022-03-30 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> + +static cham_fixdbl_t +flops_zgesvd( int M, int N, int K, cham_job_t jobu, cham_job_t jobvt ) +{ + cham_fixdbl_t flops = flops_zgebrd( M, N ); + + switch ( jobu ) { + case ChamAllVec: + flops += flops_zunmqr( ChamLeft, M, N, N ); + break; + case ChamOVec: + case ChamSVec: + flops += flops_zunmqr( ChamLeft, M, K, K ); + break; + case ChamNoVec: + break; + default: + ; + } + + switch ( jobvt ) { + case ChamAllVec: + flops += flops_zunmlq( ChamRight, M, N, M ); + break; + case ChamOVec: + case ChamSVec: + flops += flops_zunmlq( ChamRight, K, N, K ); + break; + case ChamNoVec: + break; + default: + ; + } + + return -1; +} + +int +testing_zgesvd_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" ); + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = chameleon_min( M, N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + int Q = parameters_compute_q( P ); + cham_job_t jobu = run_arg_get_job( args, "jobu", ChamNoVec ); + cham_job_t jobvt = run_arg_get_job( args, "jobvt", ChamNoVec ); + int runtime; + + /* Descriptors */ + CHAM_desc_t *descA, *descT, *descA0; + CHAMELEON_Complex64_t *U, *Vt = NULL; + double *S, *D; + int LDU = M; + int LDVt = N; + int Un, Vtn; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + CHAMELEON_Get( CHAMELEON_RUNTIME, &runtime ); + if ( runtime == RUNTIME_SCHED_PARSEC ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: The SVD is not supported with PaRSEC\n" ); + } + return -1; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgesvd( M, N, &descT, P, Q ); + + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + Un = ( jobu == ChamSVec ) ? K : M; + U = malloc( LDU*Un*sizeof(CHAMELEON_Complex64_t) ); + } + else { + U = NULL; + } + + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + LDVt = ( jobvt == ChamSVec ) ? K : N; + Vtn = N; + Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); + } + else { + Vt = NULL; + } + + /* Generate the diagonal of eigen/singular values */ + D = malloc( K*sizeof(double) ); + S = malloc( K*sizeof(double) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms_Tile( ChamDistUniform, seedA, ChamNonsymPosv, D, mode, cond, 1., descA ); + if ( hres != 0 ) { + return hres; + } + /* + * descA0 is defined here because of the cost of zlatms. To copy descA in descA0 + * now prevents to call it again later in the check (indeed descA is modified + * with the call to CHAMELEON_zgepdf_qdwh_Tile[_Async]). + */ + if ( check ) { + descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_GLOBAL ); + CHAMELEON_zlacpy_Tile( ChamUpperLower, descA, descA0 ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zgesvd_Tile_Async( jobu, jobvt, descA, S, descT, U, LDU, Vt, LDVt, test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + CHAMELEON_Desc_Flush( descT, test_data.sequence ); + } + else { + hres = CHAMELEON_zgesvd_Tile( jobu, jobvt, descA, S, descT, U, LDU, Vt, LDVt ); + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgesvd( M, N, K, jobu, jobvt ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + + hres += check_zgesvd( args, jobu, jobvt, descA0, descA, D, S, U, LDU, Vt, LDVt ); + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + free( S ); + free( D ); + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + free( U ); + } + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + free( Vt ); + } + + return hres; +} + +int +testing_zgesvd_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = chameleon_min( M, N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + cham_job_t jobu = run_arg_get_job( args, "jobu", ChamNoVec ); + cham_job_t jobvt = run_arg_get_job( args, "jobvt", ChamNoVec ); + int runtime; + + /* Descriptors */ + CHAM_desc_t *descT; + CHAMELEON_Complex64_t *A, *A0, *U, *Vt; + double *S, *D; + int LDU = M; + int LDVt = N; + int Un, Vtn; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + CHAMELEON_Get( CHAMELEON_RUNTIME, &runtime ); + if ( runtime == RUNTIME_SCHED_PARSEC ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: The SVD is not supported with PaRSEC\n" ); + } + return -1; + } + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgesvd( M, N, &descT, 1, 1 ); + + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + Un = ( jobu == ChamSVec ) ? K : M; + U = malloc( LDU*Un*sizeof(CHAMELEON_Complex64_t) ); + } + else { + U = NULL; + } + + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + LDVt = ( jobvt == ChamSVec ) ? K : N; + Vtn = N; + Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); + } + else { + Vt = NULL; + } + + /* Generate the diagonal of eigen/singular values */ + D = malloc( K*sizeof(double) ); + S = malloc( K*sizeof(double) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms( M, N, ChamDistUniform, seedA, ChamNonsymPosv, D, mode, cond, 1., A, LDA ); + if ( hres != 0 ) { + return hres; + } + /* + * A0 is defined here because of the cost of zlatms. To copy A in A0 + * now prevents to call it again later in the check (indeed A is modified + * with the call to CHAMELEON_zgepdf_qdwh). + */ + if ( check ) { + A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zlacpy( ChamUpperLower, M, N, A, LDA, A0, LDA ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgesvd( jobu, jobvt, M, N, A, LDA, S, descT, U, LDU, Vt, LDVt ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgesvd( M, N, K, jobu, jobvt ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + + hres += check_zgesvd_std( args, jobu, jobvt, M, N, A0, A, LDA, D, S, U, LDU, Vt, LDVt ); + free( A0 ); + } + + free( A ); + free( S ); + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + free( U ); + } + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + free( Vt ); + } + CHAMELEON_Desc_Destroy( &descT ); + + return hres; +} + +testing_t test_zgesvd; +const char *zgesvd_params[] = { "mtxfmt", "nb", "jobu", "jobvt", "m", "n", "lda", "seedA", NULL }; +const char *zgesvd_output[] = { NULL }; +const char *zgesvd_outchk[] = { "||R||", "||I-QQ'||", "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgesvd_init( void ) __attribute__( ( constructor ) ); +void +testing_zgesvd_init( void ) +{ + test_zgesvd.name = "zgesvd"; + test_zgesvd.helper = "Singular Value Decomposition"; + test_zgesvd.params = zgesvd_params; + test_zgesvd.output = zgesvd_output; + test_zgesvd.outchk = zgesvd_outchk; + test_zgesvd.fptr_desc = testing_zgesvd_desc; + test_zgesvd.fptr_std = testing_zgesvd_std; + test_zgesvd.next = NULL; + + testing_register( &test_zgesvd ); +} diff --git a/testing/testings.h b/testing/testings.h index a938f45b746927e84aa5893df42af24315e17ff1..2832223f4edeac53eba444a13c5e8249fc4592ed 100644 --- a/testing/testings.h +++ b/testing/testings.h @@ -43,6 +43,7 @@ typedef enum valtype_ { TestUplo, TestDiag, TestSide, + TestJob, TestNormtype, TestString, } valtype_e; @@ -56,6 +57,7 @@ union val_u { cham_uplo_t uplo; cham_diag_t diag; cham_side_t side; + cham_job_t job; cham_normtype_t ntype; CHAMELEON_Complex64_t zval; CHAMELEON_Complex32_t cval; @@ -164,6 +166,7 @@ val_t pread_trans ( const char *str ); val_t pread_uplo ( const char *str ); val_t pread_diag ( const char *str ); val_t pread_side ( const char *str ); +val_t pread_job ( const char *str ); val_t pread_norm ( const char *str ); val_t pread_string ( const char *str ); @@ -180,6 +183,7 @@ char *sprint_trans ( val_t val, int human, int nbchar, char *str_in ); char *sprint_uplo ( val_t val, int human, int nbchar, char *str_in ); char *sprint_diag ( val_t val, int human, int nbchar, char *str_in ); char *sprint_side ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_job ( val_t val, int human, int nbchar, char *str_in ); char *sprint_norm ( val_t val, int human, int nbchar, char *str_in ); char *sprint_string ( val_t val, int human, int nbchar, char *str_in ); char *sprint_check ( val_t val, int human, int nbchar, char *str_in ); @@ -202,6 +206,7 @@ cham_trans_t run_arg_get_trans ( run_arg_list_t *arglist, const char cham_uplo_t run_arg_get_uplo ( run_arg_list_t *arglist, const char *name, cham_uplo_t defval ); cham_diag_t run_arg_get_diag ( run_arg_list_t *arglist, const char *name, cham_diag_t defval ); cham_side_t run_arg_get_side ( run_arg_list_t *arglist, const char *name, cham_side_t defval ); +cham_job_t run_arg_get_job ( run_arg_list_t *arglist, const char *name, cham_job_t defval ); cham_normtype_t run_arg_get_ntype ( run_arg_list_t *arglist, const char *name, cham_normtype_t defval ); int run_arg_add_int ( run_arg_list_t *arglist, const char *name, int defval ); diff --git a/testing/values.c b/testing/values.c index 0a1b7db8d142fde1b3bd7a6d96737a81a9921a68..678b9e31c46b456c3cb714b664f1327392c89fe7 100644 --- a/testing/values.c +++ b/testing/values.c @@ -11,6 +11,7 @@ * @version 1.2.0 * @author Lucas Barros de Assis * @author Mathieu Faverge + * @author Alycia Lisito * @date 2022-02-22 * */ @@ -258,6 +259,76 @@ val_t pread_side( const char *str ) return val; } +/** + * @brief Convert the input string to a cham_job_t + * @param[in] str + * The input string + * @return The cham_job_t read. + */ +val_t pread_job( const char *str ) +{ + val_t val; + val.job = ChamNoVec; + + if ( ( strcasecmp( "ChamNoVec", str ) == 0 ) || + ( strcasecmp( "NoVec", str ) == 0 ) || + ( strcasecmp( "N", str ) == 0 ) ) + { + val.job = ChamNoVec; + } + else if ( ( strcasecmp( "ChamVec", str ) == 0 ) || + ( strcasecmp( "Vec", str ) == 0 ) ) + { + val.job = ChamVec; + } + else if ( ( strcasecmp( "ChamIvec", str ) == 0 ) || + ( strcasecmp( "Ivec", str ) == 0 ) || + ( strcasecmp( "I", str ) == 0 ) ) + { + val.job = ChamIvec; + } + else if ( ( strcasecmp( "ChamAllVec", str ) == 0 ) || + ( strcasecmp( "AllVec", str ) == 0 ) || + ( strcasecmp( "A", str ) == 0 ) ) + { + val.job = ChamAllVec; + } + else if ( ( strcasecmp( "ChamSVec", str ) == 0 ) || + ( strcasecmp( "SVec", str ) == 0 ) || + ( strcasecmp( "S", str ) == 0 ) ) + { + val.job = ChamSVec; + } + else if ( ( strcasecmp( "ChamOVec", str ) == 0 ) || + ( strcasecmp( "OVec", str ) == 0 ) || + ( strcasecmp( "O", str ) == 0 ) ) + { + val.job = ChamOVec; + } + else { + int v = atoi( str ); + if ( (v == ChamVec) || (v == (ChamVec-ChamNoVec)) ) { + val.job = ChamVec; + } + else if ( (v == ChamIvec) || (v == (ChamIvec-ChamNoVec)) ) { + val.job = ChamIvec; + } + else if ( (v == ChamAllVec) || (v == (ChamAllVec-ChamNoVec)) ) { + val.job = ChamAllVec; + } + else if ( (v == ChamSVec) || (v == (ChamSVec-ChamNoVec)) ) { + val.job = ChamSVec; + } + else if ( (v == ChamOVec) || (v == (ChamOVec-ChamNoVec)) ) { + val.job = ChamOVec; + } + else { + val.job = ChamNoVec; + } + } + return val; +} + /** * @brief Convert the input string to a cham_normtype_t * @param[in] str @@ -497,6 +568,47 @@ char *sprint_side( val_t val, int human, int nbchar, char *str_in ) return str_in+rc; } +/** + * @brief Convert the input string to a cham_job_t + * @param[in] str + * The input string + * @return The cham_job_t read. + */ +char *sprint_job( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + char *name; + switch( val.job ) { + case ChamNoVec: + name = "No Vec"; + break; + case ChamVec: + name = "Vec"; + break; + case ChamIvec: + name = "Ivec"; + break; + case ChamAllVec: + name = "All Vec"; + break; + case ChamSVec: + name = "S Vec"; + break; + case ChamOVec: + name = "O Vec"; + break; + default: + name = "ERR"; + } + rc = sprintf( str_in, " %-*s", nbchar, name ); + } + else { + rc = sprintf( str_in, ";%d", val.job ); + } + return str_in+rc; +} + /** * @brief Convert the input string to a cham_normtype_t * @param[in] str