diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index eed4d7d4961772f864ed689f97ced39eaa922c21..a2216fb2d7d20a565a751562b4521a3ba46495fb 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -77,6 +77,7 @@ set(ZSRC_WO_STDAPI testing_zsysv.c testing_zgenm2.c testing_zgesv_nopiv.c + testing_zgesvd.c testing_zgetrf_nopiv.c testing_zgetrs_nopiv.c testing_zgeqrf.c @@ -120,6 +121,7 @@ set(ZSRC testing_zcheck_blas.c testing_zcheck_facto.c testing_zcheck_qr_lq.c + testing_zcheck_svd.c testing_zcheck_polar_decomp.c testing_zprint.c ) diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 6d42d203ada128a6b5fb606c2b217929fe09ba34..502add004a084acfc4fdbef08151bca551285c98 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -54,7 +54,7 @@ if (NOT CHAMELEON_SIMULATION) #set( TESTS ${TESTS} geqrs gelqs ) #set( TESTS ${TESTS} geqrs_hqr gelqs_hqr ) set( TESTS ${TESTS} gels gels_hqr ) - set( TESTS ${TESTS} genm2 gepdf_qr gepdf_qdwh ) + set( TESTS ${TESTS} genm2 gepdf_qr gepdf_qdwh gesvd ) set( TESTS ${TESTS} cesca gram ) foreach( cat ${TEST_CATEGORIES} ) @@ -69,6 +69,7 @@ if (NOT CHAMELEON_SIMULATION) if ( ${cat} STREQUAL "mpi" ) set ( PREFIX mpiexec --bind-to none -n ${NP} ) + list( REMOVE_ITEM TESTSTMP gesvd ) else() set ( PREFIX "" ) endif() diff --git a/testing/input/gesvd.in b/testing/input/gesvd.in new file mode 100644 index 0000000000000000000000000000000000000000..ac35a8ecb91acfa3f46ae652a1905dc6f3fb7eec --- /dev/null +++ b/testing/input/gesvd.in @@ -0,0 +1,14 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GESVD +# nb: Tile size +# M: Number of rows of matrix A and C +# N: Number of columns of matrix B and C +# LDA: Leading dimension of matrix A + +op = gesvd +nb = 3, 4, 16, 17 +m = 15, 32, 37 +n = 13, 24, 35 +lda = 41 diff --git a/testing/testing_zcheck.h b/testing/testing_zcheck.h index 2906ca16a17caa41e33d5cc8766c12f51f50e604..ab8d86977e5805fca7e649843a26040487a20fbe 100644 --- a/testing/testing_zcheck.h +++ b/testing/testing_zcheck.h @@ -99,6 +99,12 @@ static inline int check_zqc ( run_arg_list_t *args, cham_side_t side, static inline int check_zqc_std ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *CC, int LDC, CHAMELEON_Complex64_t *Q, int LDQ ) { return 0; } +/* SVD check */ +static inline int check_zgesvd_std ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) { return 0; } +static inline int check_zgesvd ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) { return 0; } + /* Polar decomposition check */ static inline int check_zgepdf_qr ( run_arg_list_t *args, CHAM_desc_t *descA1, CHAM_desc_t *descA2, CHAM_desc_t *descQ1, CHAM_desc_t *descQ2, CHAM_desc_t *descAF1 ) { return 0; } @@ -172,6 +178,11 @@ int check_zqc ( run_arg_list_t *args, cham_side_t side, cham_trans_t t int check_zqc_std ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *CC, int LDC, CHAMELEON_Complex64_t *Q, int LDQ ); +/* SVD check */ +int check_zgesvd_std ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ); +int check_zgesvd ( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ); /* Polar decomposition check */ int check_zgepdf_qr ( run_arg_list_t *args, CHAM_desc_t *descA1, CHAM_desc_t *descA2, CHAM_desc_t *descQ1, CHAM_desc_t *descQ2, CHAM_desc_t *descAF1 ); diff --git a/testing/testing_zcheck_qr_lq.c b/testing/testing_zcheck_qr_lq.c index 0de341de191bb7434cbdf7a603701508326d8083..247adba1a0a68cb80b140b4b418e818eb33dce59 100644 --- a/testing/testing_zcheck_qr_lq.c +++ b/testing/testing_zcheck_qr_lq.c @@ -870,10 +870,10 @@ int check_zgels( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, C * Whether the matrix A is transposed, conjugate transposed or not transposed. * * @param[in] M - * The number of rows of the matrix A. + * The number of rows of the matrix A. * * @param[in] N - * The number of columns of the matrix A. + * The number of columns of the matrix A. * * @param[in] A * The matrix A. diff --git a/testing/testing_zcheck_svd.c b/testing/testing_zcheck_svd.c new file mode 100644 index 0000000000000000000000000000000000000000..6df281921fabc0c7209a5b35a96d2be09425cee8 --- /dev/null +++ b/testing/testing_zcheck_svd.c @@ -0,0 +1,256 @@ +/** + * + * @file testing_zcheck_svd.c + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings routines + * + * @version 1.2.0 + * @author Alycia Lisito + * @date 2022-03-30 + * @precisions normal z -> c d s + * + */ +#include "../control/common.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <chameleon.h> + +#if !defined(CHAMELEON_SIMULATION) + +#include <coreblas/lapacke.h> +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#endif +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> + +#ifndef max +#define max( _a_, _b_ ) ( (_a_) > (_b_) ? (_a_) : (_b_) ) +#endif + +/** + ******************************************************************************** + * + * @ingroup testing + * + * @brief Checks if the Chameleon SVD algorithm works: Ainit = U * mat( S ) * Vt. + * - U and Vt should be orthogonal. + * - Sinit and S should be the same. + * - Ainit = U * mat( S ) * Vt. + * + ******************************************************************************* + * + * @param[in] jobu + * Specifies options for computing all or part of the matrix U. + * + * @param[in] jobvt + * Specifies options for computing all or part of the matrix V^H. + * + * @param[in] M + * The number of rows of the matrices Ainit, A and U. + * + * @param[in] N + * The number of columns of the matrices Ainit, A and Vt. + * + * @param[in] Ainit + * The matrix Ainit (initial matrix A). + * + * @param[in] A + * The matrix A after the SVD, can contain parts of the matrix U or Vt + * or nothing (depending on the values of jobu and jobvt). + * + * @param[in] LDA + * The leading dimension of the matrices A and Ainit. + * + * @param[in] Sinit + * The vector with the singular values of the matrix Ainit + * (contains the K = min(M, N) singular values of Ainit). + * + * @param[in] S + * The vector with the singular values of the matrix Ainit + * computed by the Chameleon SVD algorithm. + * + * @param[in] U + * The orthogonal matrix U computed by the Chameleon SVD algorithm can + * contain all of U, a part of U or nothing (NULL) depending on the value of jobu; + * dimension: M * K = min(M, N). + * + * @param[in] LDU + * The leading dimension of the matrix U. + * + * @param[in] Vt + * The orthogonal matrix Vt computed by the Chameleon SVD algorithm can + * contain all of Vt, a part of Vt or nothing (NULL) depending on the value of jobvt; + * dimension: K = min(M, N) * N. + * + * @param[in] LDVt + * The leading dimension of the matrix Vt. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgesvd_std( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *Ainit, CHAMELEON_Complex64_t *A, int LDA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) +{ + int info_solution = 0; + double result; + int K = chameleon_min(M, N); + double eps = LAPACKE_dlamch_work('e'); + + /* Checks if U is orthogonal */ + switch ( jobu ) { + case ChamAllVec: + info_solution += check_zortho_std( args, M, M, U, LDU ); + break; + case ChamSVec: + info_solution += check_zortho_std( args, M, K, U, LDU ); + break; + case ChamOVec: + info_solution += check_zortho_std( args, M, K, A, LDA ); + break; + default: + ; + } + + /* Checks if Vt is orthogonal */ + switch ( jobvt ) { + case ChamAllVec: + info_solution += check_zortho_std( args, N, N, Vt, LDVt ); + break; + case ChamSVec: + info_solution += check_zortho_std( args, K, N, Vt, LDVt ); + break; + case ChamOVec: + info_solution += check_zortho_std( args, K, N, A, LDA ); + break; + default: + ; + } + + /* checks if Sinit and S are the same */ + double maxSV = chameleon_max( fabs(Sinit[0]), fabs(S[0]) ); + double maxSVdiff = fabs( fabs(Sinit[0]) - fabs(S[0]) ); + double maxtmp, maxdifftmp; + + for ( int k = 1; k < K; k++ ) { + maxdifftmp = fabs( fabs(Sinit[k]) - fabs(S[k]) ); + maxtmp = chameleon_max( fabs(Sinit[k]), fabs(S[k]) ); + + /* Update */ + maxSV = chameleon_max( maxtmp, maxSV ); + maxSVdiff = chameleon_max( maxdifftmp, maxSVdiff ); + } + + result = maxSVdiff / ( K * eps * maxSV ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_solution += 1; + } + + if ( jobu == ChamAllVec && jobvt == ChamAllVec ) { + /* To do: create mat( S ) */ + } + result = info_solution; + + run_arg_add_double( args, "||R||", result ); + + (void)Ainit; + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup testing + * + * @brief Checks if the Chameleon SVD algorithm works: descAinit = U * mat( S ) * Vt. + * - U and Vt should be orthogonal. + * - Sinit and S should be the same. + * - descAinit = U * mat( S ) * Vt. + * + ******************************************************************************* + * + * @param[in] jobu + * Specifies options for computing all or part of the matrix U. + * + * @param[in] jobvt + * Specifies options for computing all or part of the matrix V^H. + * + * @param[in] descAinit + * The descriptor of the matrix Ainit (initial matrix A). + * + * @param[in] descA + * The descriptor of the matrix A after the SVD, can contain parts of the matrix + * U or Vt or nothing (depending on the values of jobu and jobvt). + * + * @param[in] Sinit + * The vector with the singular values of the matrix Ainit + * (contains the K = min(M, N) singular values of Ainit). + * + * @param[in] S + * The vector with the singular values of the matrix Ainit + * computed by the Chameleon SVD algorithm. + * + * @param[in] U + * The orthogonal matrix U computed by the Chameleon SVD algorithm can + * contain all of U, a part of U or nothing (NULL) depending on the value of jobu; + * dimension: M * K = min(M, N). + * + * @param[in] LDU + * The leading dimension of the matrix U. + * + * @param[in] Vt + * The orthogonal matrix Vt computed by the Chameleon SVD algorithm can + * contain all of Vt, a part of Vt or nothing (NULL) depending on the value of jobvt; + * dimension: K = min(M, N) * N. + * + * @param[in] LDVt + * The leading dimension of the matrix Vt. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgesvd( run_arg_list_t *args, cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *descAinit, CHAM_desc_t *descA, + double *Sinit, double *S, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *Vt, int LDVt ) +{ + int info_solution; + int rank = CHAMELEON_Comm_rank(); + CHAMELEON_Complex64_t *Ainit, *A; + int M = descA->m; + int N = descA->n; + int LDA = descA->lm; + + if ( rank == 0 ) { + Ainit = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + } + + CHAMELEON_zDesc2Lap( ChamUpperLower, descAinit, Ainit, LDA ); + CHAMELEON_zDesc2Lap( ChamUpperLower, descA, A, LDA ); + + if ( rank == 0 ) { + + info_solution = check_zgesvd_std( args, jobu, jobvt, M, N, Ainit, A, LDA, Sinit, S, U, LDU, Vt, LDVt ); + + free( Ainit ); + free( A ); + } + +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + +#endif /* defined(CHAMELEON_SIMULATION) */ diff --git a/testing/testing_zgesvd.c b/testing/testing_zgesvd.c new file mode 100644 index 0000000000000000000000000000000000000000..04582eb1dd9bc92fe3193f038301c1a5456346cc --- /dev/null +++ b/testing/testing_zgesvd.c @@ -0,0 +1,253 @@ +/** + * + * @file testing_zgesvd.c + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgesvd testing + * + * @version 1.2.0 + * @author Alycia Lisito + * @date 2022-03-30 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> + +static cham_fixdbl_t +flops_zgesvd( int M, int N ) +{ + cham_fixdbl_t flops = flops_zgebrd( M, N ); + return flops; +} + +int +testing_zgesvd_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" ); + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = chameleon_min( M, N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAM_desc_t *descA, *descT, *descA0; + CHAMELEON_Complex64_t *U, *Vt = NULL; + double *S, *D; + int LDU = M; + int LDVt = N; + int Un, Vtn; + cham_job_t jobu = ChamAllVec; + cham_job_t jobvt = ChamAllVec; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgesvd( M, N, &descT, P, Q ); + + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + Un = ( jobu == ChamSVec ) ? K : M; + U = malloc( LDU*Un*sizeof(CHAMELEON_Complex64_t) ); + } + else { + U = NULL; + } + + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + LDVt = ( jobvt == ChamSVec ) ? K : N; + Vtn = N; + Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); + } + else { + Vt = NULL; + } + + /* Generate the diagonal of eigen/singular values */ + D = malloc( K*sizeof(double) ); + S = malloc( K*sizeof(double) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms_Tile( ChamDistUniform, seedA, ChamNonsymPosv, D, mode, cond, 1., descA ); + if ( hres != 0 ) { + return hres; + } + /* + * descA0 is defined here because of the cost of zlatms. To copy descA in descA0 + * now prevents to call it again later in the check (indeed descA is modified + * with the call to CHAMELEON_zgepdf_qdwh_Tile[_Async]). + */ + if ( check ) { + descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_GLOBAL ); + CHAMELEON_zlacpy_Tile( ChamUpperLower, descA, descA0 ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zgesvd_Tile_Async( jobu, jobvt, descA, S, descT, U, LDU, Vt, LDVt, test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + CHAMELEON_Desc_Flush( descT, test_data.sequence ); + } + else { + hres = CHAMELEON_zgesvd_Tile( jobu, jobvt, descA, S, descT, U, LDU, Vt, LDVt ); + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgesvd( M, N ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + + hres += check_zgesvd( args, jobu, jobvt, descA0, descA, D, S, U, LDU, Vt, LDVt ); + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + free( S ); + free( D ); + if ( jobu == ChamAllVec || jobu == ChamSVec ) { + free( U ); + } + if ( jobvt == ChamAllVec || jobvt == ChamSVec ) { + free( Vt ); + } + + return hres; +} + +int +testing_zgesvd_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = chameleon_min( M, N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + + /* Descriptors */ + CHAM_desc_t *descT; + CHAMELEON_Complex64_t *A, *A0, *U, *Vt; + double *S, *D; + int LDU = M; + int LDVt = N; + int Un, Vtn; + cham_job_t jobu = ChamAllVec; + cham_job_t jobvt = ChamAllVec; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgesvd( M, N, &descT, 1, 1 ); + + if ( (jobu == ChamAllVec) || (jobu == ChamSVec) ) { + Un = ( jobu == ChamSVec ) ? K : M; + U = malloc( LDU*Un*sizeof(CHAMELEON_Complex64_t) ); + } + else { + U = NULL; + } + + if ( (jobvt == ChamAllVec) || (jobvt == ChamSVec) ) { + LDVt = ( jobvt == ChamSVec ) ? K : N; + Vtn = N; + Vt = malloc( LDVt*N*sizeof(CHAMELEON_Complex64_t) ); + } + else { + Vt = NULL; + } + + /* Generate the diagonal of eigen/singular values */ + D = malloc( K*sizeof(double) ); + S = malloc( K*sizeof(double) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms( M, N, ChamDistUniform, seedA, ChamNonsymPosv, D, mode, cond, 1., A, LDA ); + if ( hres != 0 ) { + return hres; + } + /* + * A0 is defined here because of the cost of zlatms. To copy A in A0 + * now prevents to call it again later in the check (indeed A is modified + * with the call to CHAMELEON_zgepdf_qdwh). + */ + if ( check ) { + A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zlacpy( ChamUpperLower, M, N, A, LDA, A0, LDA ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgesvd( jobu, jobvt, M, N, A, LDA, S, descT, U, LDU, Vt, LDVt ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgesvd( M, N ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + + hres += check_zgesvd_std( args, jobu, jobvt, M, N, A0, A, LDA, D, S, U, LDU, Vt, LDVt ); + free( A0 ); + } + + free( A ); + free( S ); + if ( jobu == ChamAllVec || jobu == ChamSVec ) { + free( U ); + } + if ( jobvt == ChamAllVec || jobvt == ChamSVec ) { + free( Vt ); + } + CHAMELEON_Desc_Destroy( &descT ); + + return hres; +} + +testing_t test_zgesvd; +const char *zgesvd_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", NULL }; +const char *zgesvd_output[] = { NULL }; +const char *zgesvd_outchk[] = { "||R||", "||I-QQ'||", "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgesvd_init( void ) __attribute__( ( constructor ) ); +void +testing_zgesvd_init( void ) +{ + test_zgesvd.name = "zgesvd"; + test_zgesvd.helper = "Singular Value Decomposition"; + test_zgesvd.params = zgesvd_params; + test_zgesvd.output = zgesvd_output; + test_zgesvd.outchk = zgesvd_outchk; + test_zgesvd.fptr_desc = testing_zgesvd_desc; + test_zgesvd.fptr_std = testing_zgesvd_std; + test_zgesvd.next = NULL; + + testing_register( &test_zgesvd ); +}