diff --git a/testing/testing_zcesca.c b/testing/testing_zcesca.c index 71accd144f4ae94606c98acb8ca7527085a4c362..0e8df8ad53c77b3d89e9b3373a9ae095ae0b70ef 100644 --- a/testing/testing_zcesca.c +++ b/testing/testing_zcesca.c @@ -92,6 +92,42 @@ testing_zcesca_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zcesca_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Create the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fill the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Compute the centered-scaled matrix transformation */ + testing_start( &test_data ); + hres = CHAMELEON_zcesca( 1, 1, ChamColumnwise, M, N, A, LDA, NULL, NULL ); + test_data.hres = hres; + testing_stop( &test_data, flops_zcesca( M, N ) ); + + free( A ); + + (void)check; + return hres; +} + testing_t test_zcesca; const char *zcesca_params[] = { "mtxfmt", "nb", "trans", "m", "n", "lda", "seedA", NULL }; const char *zcesca_output[] = { NULL }; @@ -110,7 +146,7 @@ testing_zcesca_init( void ) test_zcesca.output = zcesca_output; test_zcesca.outchk = zcesca_outchk; test_zcesca.fptr_desc = testing_zcesca_desc; - test_zcesca.fptr_std = NULL; + test_zcesca.fptr_std = testing_zcesca_std; test_zcesca.next = NULL; testing_register( &test_zcesca ); diff --git a/testing/testing_zgeadd.c b/testing/testing_zgeadd.c index 642d4ec20ca6f08da225e80b6a0c41b0bd17b345..ab0b79b4c1e372ed7346a0f3f82214c05f035ab9 100644 --- a/testing/testing_zgeadd.c +++ b/testing/testing_zgeadd.c @@ -116,6 +116,66 @@ testing_zgeadd_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgeadd_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + + /* Descriptors */ + int Am, An; + CHAMELEON_Complex64_t *A, *B; + + alpha = run_arg_get_complex64( args, "alpha", alpha ); + beta = run_arg_get_complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + Am = (trans == ChamNoTrans)? M : N; + An = (trans == ChamNoTrans)? N : M; + + /* Create the matrices */ + A = malloc( LDA*An*sizeof(CHAMELEON_Complex64_t) ); + B = malloc( LDB*N *sizeof(CHAMELEON_Complex64_t) ); + + /* Fill the matrix with random values */ + CHAMELEON_zplrnt( Am, An, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N, B, LDB, seedB ); + + /* Compute the sum */ + testing_start( &test_data ); + hres = CHAMELEON_zgeadd( trans, M, N, alpha, A, LDA, beta, B, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgeadd( M, N ) ); + + /* Check the solution */ + if ( check ) { + CHAMELEON_Complex64_t *B0 = malloc( LDB*N *sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( M, N, B0, LDB, seedB ); + + hres += check_zsum_std( args, ChamUpperLower, trans, M, N, alpha, A, LDA, beta, B0, B, LDB ); + + free( B0 ); + } + + free( A ); + free( B ); + + return hres; +} + testing_t test_zgeadd; const char *zgeadd_params[] = { "mtxfmt", "nb", "trans", "m", "n", "lda", "ldb", "alpha", "beta", "seedA", "seedB", NULL }; @@ -135,7 +195,7 @@ testing_zgeadd_init( void ) test_zgeadd.output = zgeadd_output; test_zgeadd.outchk = zgeadd_outchk; test_zgeadd.fptr_desc = testing_zgeadd_desc; - test_zgeadd.fptr_std = NULL; + test_zgeadd.fptr_std = testing_zgeadd_std; test_zgeadd.next = NULL; testing_register( &test_zgeadd ); diff --git a/testing/testing_zgelqf.c b/testing/testing_zgelqf.c index 8171ee543ac9f5aada28cfa5f6f89c79f49fe50a..d9209a3a654052380be45557a90f578d76442a26 100644 --- a/testing/testing_zgelqf.c +++ b/testing/testing_zgelqf.c @@ -100,6 +100,74 @@ testing_zgelqf_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgelqf_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + int K = chameleon_min( M, N ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgelqf( M, N, A, LDA, descT ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgelqf( M, N ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *Qlap = malloc( N*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + + CHAMELEON_zunglq( N, N, K, A, LDA, descT, Qlap, N ); + + hres += check_zgelqf_std( args, N, N, K, A0, A, LDA, Qlap, N ); + hres += check_zortho_std( args, N, N, Qlap, N ); + + free( A0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + + return hres; +} + testing_t test_zgelqf; const char *zgelqf_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL }; const char *zgelqf_output[] = { NULL }; @@ -118,7 +186,7 @@ testing_zgelqf_init( void ) test_zgelqf.output = zgelqf_output; test_zgelqf.outchk = zgelqf_outchk; test_zgelqf.fptr_desc = testing_zgelqf_desc; - test_zgelqf.fptr_std = NULL; + test_zgelqf.fptr_std = testing_zgelqf_std; test_zgelqf.next = NULL; testing_register( &test_zgelqf ); diff --git a/testing/testing_zgelqf_hqr.c b/testing/testing_zgelqf_hqr.c index 1851246698cf35b57fffe0909dc4ab5bc28ad410..45cfb63f8db479553f8976af03c8f8b4b567c8d4 100644 --- a/testing/testing_zgelqf_hqr.c +++ b/testing/testing_zgelqf_hqr.c @@ -111,6 +111,83 @@ testing_zgelqf_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgelqf_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + int K = chameleon_min( M, N ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgelqf_param( &qrtree, M, N, A, LDA, descTS, descTT ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgelqf( M, N ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *Qlap = malloc( N*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + + CHAMELEON_zunglq_param( &qrtree, N, N, K, A, LDA, descTS, descTT, Qlap, N ); + + hres += check_zgelqf_std( args, N, N, K, A0, A, LDA, Qlap, N ); + hres += check_zortho_std( args, N, N, Qlap, N ); + + free( A0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zgelqf_hqr; const char *zgelqf_hqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; @@ -130,7 +207,7 @@ testing_zgelqf_hqr_init( void ) test_zgelqf_hqr.output = zgelqf_hqr_output; test_zgelqf_hqr.outchk = zgelqf_hqr_outchk; test_zgelqf_hqr.fptr_desc = testing_zgelqf_hqr_desc; - test_zgelqf_hqr.fptr_std = NULL; + test_zgelqf_hqr.fptr_std = testing_zgelqf_hqr_std; test_zgelqf_hqr.next = NULL; testing_register( &test_zgelqf_hqr ); diff --git a/testing/testing_zgels.c b/testing/testing_zgels.c index 4a58e34aa503ccf758178004a915536dfdf24f47..3ae6de342389819772b1c28b078c5cfa4176d8ba 100644 --- a/testing/testing_zgels.c +++ b/testing/testing_zgels.c @@ -140,6 +140,84 @@ testing_zgels_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgels_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int maxMN = chameleon_max( M, N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", maxMN ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + CHAM_desc_t *descT; + + /* Make sure trans is only Notrans or ConjTrans */ + trans = ( trans == ChamNoTrans ) ? trans : ChamConjTrans; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + CHAMELEON_zplrnt( maxMN, NRHS, X, LDB, seedB ); + + /* Computes the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgels( trans, M, N, NRHS, A, LDA, descT, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgels( trans, M, N, NRHS ) ); + + if ( check ) { + CHAMELEON_Complex64_t *A0, *B; + + A0 = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( maxMN, NRHS, B, LDB, seedB ); + + /* Check the factorization and the residual */ + hres = check_zgels_std( args, trans, M, N, NRHS, A0, LDA, X, LDB, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + free( X ); + + return hres; +} + testing_t test_zgels; const char *zgels_params[] = { "mtxfmt", "nb", "ib", "trans", "m", "n", "k", "lda", "ldb", "qra", "seedA", "seedB", NULL }; @@ -159,7 +237,7 @@ testing_zgels_init( void ) test_zgels.output = zgels_output; test_zgels.outchk = zgels_outchk; test_zgels.fptr_desc = testing_zgels_desc; - test_zgels.fptr_std = NULL; + test_zgels.fptr_std = testing_zgels_std; test_zgels.next = NULL; testing_register( &test_zgels ); diff --git a/testing/testing_zgels_hqr.c b/testing/testing_zgels_hqr.c index ee19283a6a10eeb787cf8aa948f94f85572da486..b4ad33d5b42d82d79c629838d36349d339063d54 100644 --- a/testing/testing_zgels_hqr.c +++ b/testing/testing_zgels_hqr.c @@ -148,6 +148,91 @@ testing_zgels_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgels_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int maxMN = chameleon_max( M, N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", maxMN ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix, + llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + CHAMELEON_zplrnt( maxMN, NRHS, X, LDB, seedB ); + + /* Computes the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgels_param( &qrtree, trans, M, N, NRHS, A, LDA, descTS, descTT, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgels_hqr( trans, M, N, NRHS ) ); + + if ( check ) { + CHAMELEON_Complex64_t *A0, *B; + + A0 = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( maxMN, NRHS, B, LDB, seedB ); + + /* Check the factorization and the residual */ + hres = check_zgels_std( args, trans, M, N, NRHS, A0, LDA, X, LDB, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + free( X ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zgels_hqr; const char *zgels_hqr_params[] = { "mtxfmt", "nb", "ib", "trans", "m", "n", "k", "lda", "ldb", "qra", "qra", "qrp", @@ -169,7 +254,7 @@ testing_zgels_hqr_init( void ) test_zgels_hqr.output = zgels_hqr_output; test_zgels_hqr.outchk = zgels_hqr_outchk; test_zgels_hqr.fptr_desc = testing_zgels_hqr_desc; - test_zgels_hqr.fptr_std = NULL; + test_zgels_hqr.fptr_std = testing_zgels_hqr_std; test_zgels_hqr.next = NULL; testing_register( &test_zgels_hqr ); diff --git a/testing/testing_zgenm2.c b/testing/testing_zgenm2.c index 3beff91f0d6db9895ff72a951b8d3a33f0944590..46ca16531284ec7c0e0f790c22ec6221e26b2c46 100644 --- a/testing/testing_zgenm2.c +++ b/testing/testing_zgenm2.c @@ -125,6 +125,80 @@ testing_zgenm2_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgenm2_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int minMN = chameleon_min( M, N ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + double tol = 1.e-1; + + /* Descriptors */ + double norm; + CHAMELEON_Complex64_t *A; + double *D, dmax = 1.; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Generate the diagonal of eigen/singular values */ + D = malloc( minMN * sizeof(double) ); +#if !defined(CHAMELEON_SIMULATION) + hres = CORE_dlatm1( mode, cond, 0, ChamDistUniform, seedA, D, minMN ); + if ( hres != 0 ) { + free( D ); + return hres; + } + + /* Save the largest absolute value */ + hres = cblas_idamax( minMN, D, 1 ); + dmax = fabs( D[hres] ); +#else + (void)mode; +#endif + + /* Creates the matrix */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms( M, N, ChamDistUniform, seedA, ChamNonsymPosv, D, 0, cond, 0., A, LDA ); + free( D ); + if ( hres != 0 ) { + return hres; + } + + /* Calculates the norm */ + testing_start( &test_data ); + norm = CHAMELEON_zgenm2( tol, M, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgenm2( M, N ) ); + + /* Checks the solution */ + if ( check ) { + double res = fabs(dmax - norm) / (dmax * tol); + + run_arg_add_double( args, "||A||", dmax ); + run_arg_add_double( args, "||B||", norm ); + run_arg_add_double( args, "||R||", res ); + + if ( isnan(res) || isinf(res) || (res > 10.0) ) { + hres = 1; + } + } + + free( A ); + + return hres; +} + testing_t test_zgenm2; const char *zgenm2_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", "cond", "mode", NULL }; const char *zgenm2_output[] = { NULL }; @@ -143,7 +217,7 @@ testing_zgenm2_init( void ) test_zgenm2.output = zgenm2_output; test_zgenm2.outchk = zgenm2_outchk; test_zgenm2.fptr_desc = testing_zgenm2_desc; - test_zgenm2.fptr_std = NULL; + test_zgenm2.fptr_std = testing_zgenm2_std; test_zgenm2.next = NULL; testing_register( &test_zgenm2 ); diff --git a/testing/testing_zgepdf_qdwh.c b/testing/testing_zgepdf_qdwh.c index c1be3ce09239ff3855089e47ac1e5176dc6ecafb..17cb18a4ac953d90b61fe565c52f0f652a2be4f3 100644 --- a/testing/testing_zgepdf_qdwh.c +++ b/testing/testing_zgepdf_qdwh.c @@ -51,7 +51,7 @@ testing_zgepdf_qdwh_desc( run_arg_list_t *args, int check ) int runtime; /* Descriptors */ - CHAM_desc_t *descA, *descA0, *descH; + CHAM_desc_t *descA, *descH, *descA0; gepdf_info_t info; CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); @@ -86,7 +86,11 @@ testing_zgepdf_qdwh_desc( run_arg_list_t *args, int check ) if ( hres != 0 ) { return hres; } - + /* + * descA0 is defined here because of the cost of zlatms. To copy descA in descA0 + * now prevents to call it again later in the check (indeed descA is modified + * with the call to CHAMELEON_zgepdf_qdwh_Tile[_Async]). + */ if ( check ) { descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_GLOBAL ); CHAMELEON_zlacpy_Tile( ChamUpperLower, descA, descA0 ); @@ -120,6 +124,81 @@ testing_zgepdf_qdwh_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgepdf_qdwh_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double cond = run_arg_get_double( args, "cond", 1.e16 ); + int mode = run_arg_get_int( args, "mode", 4 ); + int runtime; + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *H, *A0; + gepdf_info_t info; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + CHAMELEON_Get( CHAMELEON_RUNTIME, &runtime ); + if ( runtime == RUNTIME_SCHED_PARSEC ) { + fprintf( stderr, "SKIPPED: The QDWH polar decomposition is not supported with PaRSEC\n" ); + return -1; + } + + if ( N > M ) { + fprintf( stderr, "SKIPPED: The QDWH polar decomposition is performed only when M >= N\n" ); + return -1; + } + + /* Creates the matrix */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + H = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + hres = CHAMELEON_zlatms( M, N, ChamDistUniform, seedA, ChamNonsymPosv, NULL, mode, cond, 1., A, LDA ); + if ( hres != 0 ) { + return hres; + } + /* + * A0 is defined here because of the cost of zlatms. To copy A in A0 + * now prevents to call it again later in the check (indeed A is modified + * with the call to CHAMELEON_zgepdf_qdwh). + */ + if ( check ) { + A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zlacpy( ChamUpperLower, M, N, A, LDA, A0, LDA ); + } + + /* Calculates the norm */ + testing_start( &test_data ); + hres = CHAMELEON_zgepdf_qdwh( M, N, A, LDA, H, LDB, &info ); + test_data.hres = hres; + testing_stop( &test_data, info.flops ); + + /* Checks the solution */ + if ( check ) { + hres += check_zxxpd_std ( args, M, N, A0, A, LDA, H, LDB ); + hres += check_zortho_std( args, M, N, A, LDA ); + + free( A0 ); + } + + free( A ); + free( H ); + + return hres; +} + testing_t test_zgepdf_qdwh; const char *zgepdf_qdwh_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "ldb", "seedA", "cond", "mode", NULL }; @@ -139,7 +218,7 @@ testing_zgepdf_qdwh_init( void ) test_zgepdf_qdwh.output = zgepdf_qdwh_output; test_zgepdf_qdwh.outchk = zgepdf_qdwh_outchk; test_zgepdf_qdwh.fptr_desc = testing_zgepdf_qdwh_desc; - test_zgepdf_qdwh.fptr_std = NULL; + test_zgepdf_qdwh.fptr_std = testing_zgepdf_qdwh_std; test_zgepdf_qdwh.next = NULL; testing_register( &test_zgepdf_qdwh ); diff --git a/testing/testing_zgeqrf.c b/testing/testing_zgeqrf.c index d672ec08d2db92294a0fd09921f6df55b61dbb3c..62f491540278c9c27f6bfa8c17a0b4d7341662b3 100644 --- a/testing/testing_zgeqrf.c +++ b/testing/testing_zgeqrf.c @@ -100,6 +100,74 @@ testing_zgeqrf_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgeqrf_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + int K = chameleon_min( M, N ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgeqrf( M, N, A, LDA, descT ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgeqrf( M, N ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *Qlap = malloc( M*M*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + + CHAMELEON_zungqr( M, M, K, A, LDA, descT, Qlap, M ); + + hres += check_zgeqrf_std( args, M, M, K, A0, A, LDA, Qlap, M ); + hres += check_zortho_std( args, M, M, Qlap, M ); + + free( A0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + + return hres; +} + testing_t test_zgeqrf; const char *zgeqrf_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL }; const char *zgeqrf_output[] = { NULL }; @@ -118,7 +186,7 @@ testing_zgeqrf_init( void ) test_zgeqrf.output = zgeqrf_output; test_zgeqrf.outchk = zgeqrf_outchk; test_zgeqrf.fptr_desc = testing_zgeqrf_desc; - test_zgeqrf.fptr_std = NULL; + test_zgeqrf.fptr_std = testing_zgeqrf_std; test_zgeqrf.next = NULL; testing_register( &test_zgeqrf ); diff --git a/testing/testing_zgeqrf_hqr.c b/testing/testing_zgeqrf_hqr.c index e7100350e81285119e4f2ec3e847b1c82081d83e..94ed1f2b7ba4699a803bf7d3a8e0f7144a05cb1b 100644 --- a/testing/testing_zgeqrf_hqr.c +++ b/testing/testing_zgeqrf_hqr.c @@ -111,6 +111,83 @@ testing_zgeqrf_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgeqrf_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + int K = chameleon_min( M, N ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgeqrf_param( &qrtree, M, N, A, LDA, descTS, descTT ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgeqrf( M, N ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *Qlap = malloc( M*M*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + + CHAMELEON_zungqr_param( &qrtree, M, M, K, A, LDA, descTS, descTT, Qlap, M ); + + hres += check_zgeqrf_std( args, M, M, K, A0, A, LDA, Qlap, M ); + hres += check_zortho_std( args, M, M, Qlap, M ); + + free( A0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zgeqrf_hqr; const char *zgeqrf_hqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "lda", "qra", "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; @@ -130,7 +207,7 @@ testing_zgeqrf_hqr_init( void ) test_zgeqrf_hqr.output = zgeqrf_hqr_output; test_zgeqrf_hqr.outchk = zgeqrf_hqr_outchk; test_zgeqrf_hqr.fptr_desc = testing_zgeqrf_hqr_desc; - test_zgeqrf_hqr.fptr_std = NULL; + test_zgeqrf_hqr.fptr_std = testing_zgeqrf_hqr_std; test_zgeqrf_hqr.next = NULL; testing_register( &test_zgeqrf_hqr ); diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c index 9ac90fede0a20a134e81866119933b4040333bf3..b97fba5ab582141b386ff306156c12a625befc92 100644 --- a/testing/testing_zgesv.c +++ b/testing/testing_zgesv.c @@ -105,6 +105,67 @@ testing_zgesv_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgesv_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( N, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgesv_nopiv( N, NRHS, A, LDA, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgesv( N, NRHS ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0, *B; + + /* Check the factorization */ + A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + + hres += check_zxxtrf_std( args, ChamGeneral, ChamUpperLower, N, N, A0, A, LDA ); + + /* Check the solve */ + B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + + hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + free( X ); + + return hres; +} + testing_t test_zgesv; const char *zgesv_params[] = { "mtxfmt", "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; const char *zgesv_output[] = { NULL }; @@ -123,7 +184,7 @@ testing_zgesv_init( void ) test_zgesv.output = zgesv_output; test_zgesv.outchk = zgesv_outchk; test_zgesv.fptr_desc = testing_zgesv_desc; - test_zgesv.fptr_std = NULL; + test_zgesv.fptr_std = testing_zgesv_std; test_zgesv.next = NULL; testing_register( &test_zgesv ); diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c index af695a37ad786fb9c0b43258379582ac4d26e376..c8c28e71ca1f3ef369fad8166a68f064a3bfea5b 100644 --- a/testing/testing_zgetrf.c +++ b/testing/testing_zgetrf.c @@ -78,6 +78,51 @@ testing_zgetrf_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgetrf_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zgetrf_nopiv( M, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrf( M, N ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( M, N, A0, LDA, seedA ); + + hres += check_zxxtrf_std( args, ChamGeneral, ChamUpperLower, M, N, A0, A, LDA ); + + free( A0 ); + } + + free( A ); + + return hres; +} + testing_t test_zgetrf; const char *zgetrf_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", NULL }; const char *zgetrf_output[] = { NULL }; @@ -96,7 +141,7 @@ testing_zgetrf_init( void ) test_zgetrf.output = zgetrf_output; test_zgetrf.outchk = zgetrf_outchk; test_zgetrf.fptr_desc = testing_zgetrf_desc; - test_zgetrf.fptr_std = NULL; + test_zgetrf.fptr_std = testing_zgetrf_std; test_zgetrf.next = NULL; testing_register( &test_zgetrf ); diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c index cc417bd40be7b12f638243000079d4710c8135f3..f292423055d8968be3f4a0a1f549862be519a8a5 100644 --- a/testing/testing_zgetrs.c +++ b/testing/testing_zgetrs.c @@ -95,6 +95,64 @@ testing_zgetrs_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgetrs_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( N, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + hres = CHAMELEON_zgetrf_nopiv( N, N, A, LDA ); + assert( hres == 0 ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres += CHAMELEON_zgetrs_nopiv( ChamNoTrans, N, NRHS, A, LDA, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrs( N, NRHS ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + + hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, + N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + free( X ); + + return hres; +} + testing_t test_zgetrs; const char *zgetrs_params[] = { "mtxfmt", "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; const char *zgetrs_output[] = { NULL }; @@ -113,7 +171,7 @@ testing_zgetrs_init( void ) test_zgetrs.output = zgetrs_output; test_zgetrs.outchk = zgetrs_outchk; test_zgetrs.fptr_desc = testing_zgetrs_desc; - test_zgetrs.fptr_std = NULL; + test_zgetrs.fptr_std = testing_zgetrs_std; test_zgetrs.next = NULL; testing_register( &test_zgetrs ); diff --git a/testing/testing_zgram.c b/testing/testing_zgram.c index b7b8f81cdfe8c0b95580788a91b26bb5c938485f..bd351ed3689afa7e2fcc34b647a1c48b798c7b2a 100644 --- a/testing/testing_zgram.c +++ b/testing/testing_zgram.c @@ -92,6 +92,42 @@ testing_zgram_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zgram_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Create the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fill the matrix with random values */ + CHAMELEON_zplghe( (double)N, uplo, N,A, LDA, seedA ); + + /* Compute the gram matrix transformation */ + testing_start( &test_data ); + hres = CHAMELEON_zgram( uplo, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgram( N ) ); + + free( A ); + + (void)check; + return hres; +} + testing_t test_zgram; const char *zgram_params[] = { "mtxfmt", "nb", "uplo", "n", "n", "lda", "seedA", NULL }; const char *zgram_output[] = { NULL }; @@ -110,7 +146,7 @@ testing_zgram_init( void ) test_zgram.output = zgram_output; test_zgram.outchk = zgram_outchk; test_zgram.fptr_desc = testing_zgram_desc; - test_zgram.fptr_std = NULL; + test_zgram.fptr_std = testing_zgram_std; test_zgram.next = NULL; testing_register( &test_zgram ); diff --git a/testing/testing_zlacpy.c b/testing/testing_zlacpy.c index df9d9787bfb209364a0df5f96165da85322fb144..963a62519796c5400aa694169282438bae9e6384 100644 --- a/testing/testing_zlacpy.c +++ b/testing/testing_zlacpy.c @@ -113,6 +113,52 @@ testing_zlacpy_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zlacpy_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *B; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates two different matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + B = malloc( LDB*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills each matrix with different random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + /* We use seedA + 1, just to create a variation in B */ + CHAMELEON_zplrnt( M, N, B, LDB, seedA + 1 ); + + /* Makes a copy of descA to descB */ + testing_start( &test_data ); + hres = CHAMELEON_zlacpy( uplo, M, N, A, LDA, B, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zlacpy( uplo, M, N ) ); + + /* Checks their differences */ + if ( check ) { + hres += check_zmatrices_std( args, uplo, M, N, A, LDA, B, LDB ); + } + + free( A ); + free( B ); + + return hres; +} + testing_t test_zlacpy; const char *zlacpy_params[] = { "mtxfmt", "nb", "uplo", "m", "n", "lda", "ldb", "seedA", NULL }; const char *zlacpy_output[] = { NULL }; @@ -131,7 +177,7 @@ testing_zlacpy_init( void ) test_zlacpy.output = zlacpy_output; test_zlacpy.outchk = zlacpy_outchk; test_zlacpy.fptr_desc = testing_zlacpy_desc; - test_zlacpy.fptr_std = NULL; + test_zlacpy.fptr_std = testing_zlacpy_std; test_zlacpy.next = NULL; testing_register( &test_zlacpy ); diff --git a/testing/testing_zlascal.c b/testing/testing_zlascal.c index 407622ca4972a17ac3f5a15416ee1cb1a67859d6..988af04f853102f9bf4c7653e108162aa33488e7 100644 --- a/testing/testing_zlascal.c +++ b/testing/testing_zlascal.c @@ -107,6 +107,53 @@ testing_zlascal_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zlascal_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + CHAMELEON_Complex64_t alpha = run_arg_get_complex64( args, "alpha", 1. ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, N, A, LDA, seedA ); + + /* Scales the matrix */ + testing_start( &test_data ); + hres = CHAMELEON_zlascal( uplo, M, N, alpha, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zlascal( uplo, M, N ) ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Complex64_t *Ainit = malloc ( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( M, N, Ainit, LDA, seedA ); + + hres += check_zscale_std( args, uplo, M, N, alpha, Ainit, A, LDA ); + + free( Ainit ); + } + + free( A ); + + return hres; +} + testing_t test_zlascal; const char *zlascal_params[] = { "mtxfmt", "nb", "uplo", "m", "n", "lda", "alpha", "seedA", NULL }; const char *zlascal_output[] = { NULL }; @@ -125,7 +172,7 @@ testing_zlascal_init( void ) test_zlascal.output = zlascal_output; test_zlascal.outchk = zlascal_outchk; test_zlascal.fptr_desc = testing_zlascal_desc; - test_zlascal.fptr_std = NULL; + test_zlascal.fptr_std = testing_zlascal_std; test_zlascal.next = NULL; testing_register( &test_zlascal ); diff --git a/testing/testing_zlauum.c b/testing/testing_zlauum.c index 47c7fd326d78bcca81008280135f9757adecbcda..71859dbcd40f5dc79eb7bcf3b60d5c5ba3a653d9 100644 --- a/testing/testing_zlauum.c +++ b/testing/testing_zlauum.c @@ -84,6 +84,50 @@ testing_zlauum_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zlauum_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Initialises the matrices with the same values */ + CHAMELEON_zplghe( 0., uplo, N, A, LDA, seedA ); + + /* Calculates the matrix product */ + testing_start( &test_data ); + hres = CHAMELEON_zlauum( uplo, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zlauum( N ) ); + + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplghe( 0., uplo, N, A0, LDA, seedA ); + + hres += check_zlauum_std( args, uplo, N, A0, A, LDA ); + + free( A0 ); + } + + free( A ); + + return hres; +} + testing_t test_zlauum; const char *zlauum_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL }; const char *zlauum_output[] = { NULL }; @@ -102,7 +146,7 @@ testing_zlauum_init( void ) test_zlauum.output = zlauum_output; test_zlauum.outchk = zlauum_outchk; test_zlauum.fptr_desc = testing_zlauum_desc; - test_zlauum.fptr_std = NULL; + test_zlauum.fptr_std = testing_zlauum_std; test_zlauum.next = NULL; testing_register( &test_zlauum ); diff --git a/testing/testing_zplrnk.c b/testing/testing_zplrnk.c index 96d2b9747368cfc163cb5df1c3b7527c9611e700..7b0b0ba8e08f7eb8790ed8252acdb4e29a1ff58b 100644 --- a/testing/testing_zplrnk.c +++ b/testing/testing_zplrnk.c @@ -73,6 +73,45 @@ testing_zplrnk_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zplrnk_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *C; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + C = malloc ( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Calculates the random rank-k matrix */ + testing_start( &test_data ); + hres = CHAMELEON_zplrnk( M, N, K, C, LDC, seedA, seedB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgemm( M, N, K ) ); + + /* Checks the solution */ + if ( check ) { + hres = check_zrankk_std( args, M, N, K, C, LDC ); + } + + free( C ); + + return hres; +} + testing_t test_zplrnk; const char *zplrnk_params[] = { "nb", "m", "n", "k", "ldc", "seedA", "seedB", NULL }; const char *zplrnk_output[] = { NULL }; @@ -91,7 +130,7 @@ testing_zplrnk_init( void ) test_zplrnk.output = zplrnk_output; test_zplrnk.outchk = zplrnk_outchk; test_zplrnk.fptr_desc = testing_zplrnk_desc; - test_zplrnk.fptr_std = NULL; + test_zplrnk.fptr_std = testing_zplrnk_std; test_zplrnk.next = NULL; testing_register( &test_zplrnk ); diff --git a/testing/testing_zsysv.c b/testing/testing_zsysv.c index a56b97de75d56d0a162961fb0ec7453fd4a7fddb..e73d555e0556e0fbbbe0969993b534c6f5968912 100644 --- a/testing/testing_zsysv.c +++ b/testing/testing_zsysv.c @@ -105,6 +105,68 @@ testing_zsysv_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zsysv_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy( (double)N, uplo, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zsysv( uplo, N, NRHS, A, LDA, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zsysv( N, NRHS ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0, *B; + + /* Check the factorization */ + A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplgsy( (double)N, uplo, N, A0, LDA, seedA ); + + hres += check_zxxtrf_std( args, ChamSymmetric, uplo, N, N, A0, A, LDA ); + + /* Check the solve */ + B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + + CHAMELEON_zplgsy( (double)N, uplo, N, A0, LDA, seedA ); + hres += check_zsolve_std( args, ChamSymmetric, ChamNoTrans, uplo, N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + free( X ); + + return hres; +} + testing_t test_zsysv; const char *zsysv_params[] = { "mtxfmt", "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; @@ -124,7 +186,7 @@ testing_zsysv_init( void ) test_zsysv.output = zsysv_output; test_zsysv.outchk = zsysv_outchk; test_zsysv.fptr_desc = testing_zsysv_desc; - test_zsysv.fptr_std = NULL; + test_zsysv.fptr_std = testing_zsysv_std; test_zsysv.next = NULL; testing_register( &test_zsysv ); diff --git a/testing/testing_zsytrf.c b/testing/testing_zsytrf.c index a643e4915b219abe9d94b6789271cdd8a8b830ab..a2407f18fad629307976d0cffc38f2bd5cfe0995 100644 --- a/testing/testing_zsytrf.c +++ b/testing/testing_zsytrf.c @@ -78,6 +78,51 @@ testing_zsytrf_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zsytrf_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy( (double)N, uplo, N, A, LDA, seedA ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zsytrf( uplo, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zpotrf( N ) ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplgsy( (double)N, uplo, N, A0, LDA, seedA ); + + hres += check_zxxtrf_std( args, ChamSymmetric, uplo, N, N, A0, A, LDA ); + + free( A0 ); + } + + free( A ); + + return hres; +} + testing_t test_zsytrf; const char *zsytrf_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL }; const char *zsytrf_output[] = { NULL }; @@ -96,7 +141,7 @@ testing_zsytrf_init( void ) test_zsytrf.output = zsytrf_output; test_zsytrf.outchk = zsytrf_outchk; test_zsytrf.fptr_desc = testing_zsytrf_desc; - test_zsytrf.fptr_std = NULL; + test_zsytrf.fptr_std = testing_zsytrf_std; test_zsytrf.next = NULL; testing_register( &test_zsytrf ); diff --git a/testing/testing_zsytrs.c b/testing/testing_zsytrs.c index eb08f427530646b0eb079e5da03aef71a9011402..8750654a0b88530a0eb5ec538f178a08ce893d77 100644 --- a/testing/testing_zsytrs.c +++ b/testing/testing_zsytrs.c @@ -95,6 +95,64 @@ testing_zsytrs_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zsytrs_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + X = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy( (double)N, uplo, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + hres = CHAMELEON_zsytrf( uplo, N, A, LDA ); + assert( hres == 0 ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres += CHAMELEON_zsytrs( uplo, N, NRHS, A, LDA, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, 0 /*flops_zsytrs( N, NRHS )*/ ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N* sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *B = malloc( LDB*NRHS*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplgsy( (double)N, uplo, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + + hres += check_zsolve_std( args, ChamSymmetric, ChamNoTrans, uplo, N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } + + free( A ); + free( X ); + + return hres; +} + testing_t test_zsytrs; const char *zsytrs_params[] = { "mtxfmt", "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; @@ -114,7 +172,7 @@ testing_zsytrs_init( void ) test_zsytrs.output = zsytrs_output; test_zsytrs.outchk = zsytrs_outchk; test_zsytrs.fptr_desc = testing_zsytrs_desc; - test_zsytrs.fptr_std = NULL; + test_zsytrs.fptr_std = testing_zsytrs_std; test_zsytrs.next = NULL; testing_register( &test_zsytrs ); diff --git a/testing/testing_ztradd.c b/testing/testing_ztradd.c index e594586a2152f605b6966469948146a23966087c..895c4eddabe918638bf5003306d3cc42bf29fbdb 100644 --- a/testing/testing_ztradd.c +++ b/testing/testing_ztradd.c @@ -150,6 +150,93 @@ testing_ztradd_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_ztradd_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + + /* Descriptors */ + int Am, An; + CHAMELEON_Complex64_t *A, *B; + cham_uplo_t uplo_inv = uplo; + + if ( (uplo != ChamUpperLower) && (trans != ChamNoTrans) ) { + uplo_inv = (uplo == ChamUpper) ? ChamLower : ChamUpper; + } + + alpha = run_arg_get_complex64( args, "alpha", alpha ); + beta = run_arg_get_complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + if ( trans != ChamNoTrans ) { + Am = N; + An = M; + } + else { + Am = M; + An = N; + } + + /* Creates the matrices */ + A = malloc( LDA*An*sizeof(CHAMELEON_Complex64_t) ); + B = malloc( LDB*N* sizeof(CHAMELEON_Complex64_t) ); + + /* Fills the matrix with random values */ + switch ( uplo ) { + case ChamUpper: + case ChamLower: + CHAMELEON_zplgtr( 0., uplo_inv, Am, An, A, LDA, seedA ); + CHAMELEON_zplgtr( 0., uplo, M, N, B, LDB, seedB ); + break; + case ChamUpperLower: + default: + CHAMELEON_zplrnt( Am, An, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N, B, LDB, seedB ); + break; + } + + /* Calculates the sum */ + testing_start( &test_data ); + hres = CHAMELEON_ztradd( uplo, trans, M, N, alpha, A, LDA, beta, B, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_ztradd( uplo, M, N ) ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Complex64_t *B0 = malloc( LDB*N* sizeof(CHAMELEON_Complex64_t) ); + + if ( uplo == ChamUpperLower ) { + CHAMELEON_zplrnt( M, N, B0, LDB, seedB ); + } + else { + CHAMELEON_zplgtr( 0., uplo, M, N, B0, LDB, seedB ); + } + hres += check_zsum_std( args, uplo, trans, M, N, alpha, A, LDA, beta, B0, B, LDB ); + + free( B0 ); + } + + free( A ); + free( B ); + + return hres; +} + testing_t test_ztradd; const char *ztradd_params[] = { "mtxfmt", "nb", "trans", "uplo", "m", "n", "lda", "ldb", "alpha", "beta", "seedA", "seedB", NULL }; @@ -169,7 +256,7 @@ testing_ztradd_init( void ) test_ztradd.output = ztradd_output; test_ztradd.outchk = ztradd_outchk; test_ztradd.fptr_desc = testing_ztradd_desc; - test_ztradd.fptr_std = NULL; + test_ztradd.fptr_std = testing_ztradd_std; test_ztradd.next = NULL; testing_register( &test_ztradd ); diff --git a/testing/testing_ztrtri.c b/testing/testing_ztrtri.c index 6f7c653fdb57cf1f26a6badbfe0bb3ef47f56585..521fe54ed3738cd189216ed6ffe929a4620f6e21 100644 --- a/testing/testing_ztrtri.c +++ b/testing/testing_ztrtri.c @@ -80,6 +80,52 @@ testing_ztrtri_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_ztrtri_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + + /* Initialises the matrices with the same values */ + CHAMELEON_zplghe( (double)N, uplo, N, A, LDA, seedA ); + + /* Calculates the inversed matrices */ + testing_start( &test_data ); + hres = CHAMELEON_ztrtri( uplo, diag, N, A, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_ztrtri( N ) ); + + /* Checks the inverse */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplghe( (double)N, uplo, N, A0, LDA, seedA ); + + hres += check_ztrtri_std( args, ChamTriangular, uplo, diag, N, A0, A, LDA ); + + free( A0 ); + } + + free( A ); + + return hres; +} + testing_t test_ztrtri; const char *ztrtri_params[] = { "mtxfmt", "nb", "uplo", "diag", "n", "lda", "seedA", NULL }; const char *ztrtri_output[] = { NULL }; @@ -98,7 +144,7 @@ testing_ztrtri_init( void ) test_ztrtri.output = ztrtri_output; test_ztrtri.outchk = ztrtri_outchk; test_ztrtri.fptr_desc = testing_ztrtri_desc; - test_ztrtri.fptr_std = NULL; + test_ztrtri.fptr_std = testing_ztrtri_std; test_ztrtri.next = NULL; testing_register( &test_ztrtri ); diff --git a/testing/testing_zunglq.c b/testing/testing_zunglq.c index 2d9ff38eaae00f8bf0ee1f30742b54f46f547cd4..a96bc23c4cb54c709d02aee850f8bc85b46f83f6 100644 --- a/testing/testing_zunglq.c +++ b/testing/testing_zunglq.c @@ -114,6 +114,82 @@ testing_zunglq_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunglq_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *Qlap; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( M > N ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq (M > N)\n" ); + return -1; + } + + if ( K > M ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq (K > M)\n" ); + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + Qlap = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( K, N, A, LDA, seedA ); + hres = CHAMELEON_zgelqf( K, N, A, LDA, descT ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zunglq( M, N, K, A, LDA, descT, Qlap, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunglq( M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( K, N, A0, LDA, seedA ); + + hres += check_zortho_std( args, M, N, Qlap, LDA ); + hres += check_zgelqf_std( args, M, N, K, A0, A, LDA, Qlap, LDA ); + + free( A0 ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + free( Qlap ); + + return hres; +} + testing_t test_zunglq; const char *zunglq_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL }; const char *zunglq_output[] = { NULL }; @@ -132,7 +208,7 @@ testing_zunglq_init( void ) test_zunglq.output = zunglq_output; test_zunglq.outchk = zunglq_outchk; test_zunglq.fptr_desc = testing_zunglq_desc; - test_zunglq.fptr_std = NULL; + test_zunglq.fptr_std = testing_zunglq_std; test_zunglq.next = NULL; testing_register( &test_zunglq ); diff --git a/testing/testing_zunglq_hqr.c b/testing/testing_zunglq_hqr.c index 2bec55f53e33eca4ec8424e3d641101ac240d892..981cd4d7416a3515bcb1d521c7dcb488488a4997 100644 --- a/testing/testing_zunglq_hqr.c +++ b/testing/testing_zunglq_hqr.c @@ -124,6 +124,91 @@ testing_zunglq_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunglq_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *Qlap; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( M > N ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq_hqr (M > N)\n" ); + return -1; + } + + if ( K > M ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq_hqr (K > M)\n" ); + return -1; + } + + /* Creates the matrices */ + A = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + Qlap = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( K, N, A, LDA, seedA ); + hres = CHAMELEON_zgelqf_param( &qrtree, K, N, A, LDA, descTS, descTT ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zunglq_param( &qrtree, M, N, K, A, LDA, descTS, descTT, Qlap, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunglq( M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( K, N, A0, LDA, seedA ); + + hres += check_zortho_std( args, M, N, Qlap, LDA ); + hres += check_zgelqf_std( args, M, N, K, A0, A, LDA, Qlap, LDA ); + + free( A0 ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + free( Qlap ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zunglq_hqr; const char *zunglq_hqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; @@ -143,7 +228,7 @@ testing_zunglq_hqr_init( void ) test_zunglq_hqr.output = zunglq_hqr_output; test_zunglq_hqr.outchk = zunglq_hqr_outchk; test_zunglq_hqr.fptr_desc = testing_zunglq_hqr_desc; - test_zunglq_hqr.fptr_std = NULL; + test_zunglq_hqr.fptr_std = testing_zunglq_hqr_std; test_zunglq_hqr.next = NULL; testing_register( &test_zunglq_hqr ); diff --git a/testing/testing_zungqr.c b/testing/testing_zungqr.c index f5fb684290c60061d2a5497dbb6bd0ca5f2f2636..a2a22b968596d0662b42ac282bd4f8b05630bb18 100644 --- a/testing/testing_zungqr.c +++ b/testing/testing_zungqr.c @@ -114,6 +114,82 @@ testing_zungqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zungqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *Qlap; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( N > M ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr (N > M)\n" ); + return -1; + } + + if ( K > N ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr (K > N)\n" ); + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + A = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + Qlap = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, K, A, LDA, seedA ); + hres = CHAMELEON_zgeqrf( M, K, A, LDA, descT ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zungqr( M, N, K, A, LDA, descT, Qlap, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zungqr( M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( M, K, A0, LDA, seedA ); + + hres += check_zortho_std( args, M, N, Qlap, LDA ); + hres += check_zgeqrf_std( args, M, N, K, A0, A, LDA, Qlap, LDA ); + + free( A0 ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + free( Qlap ); + + return hres; +} + testing_t test_zungqr; const char *zungqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL }; const char *zungqr_output[] = { NULL }; @@ -132,7 +208,7 @@ testing_zungqr_init( void ) test_zungqr.output = zungqr_output; test_zungqr.outchk = zungqr_outchk; test_zungqr.fptr_desc = testing_zungqr_desc; - test_zungqr.fptr_std = NULL; + test_zungqr.fptr_std = testing_zungqr_std; test_zungqr.next = NULL; testing_register( &test_zungqr ); diff --git a/testing/testing_zungqr_hqr.c b/testing/testing_zungqr_hqr.c index 02def386337da59c015944e4861ec6925597e35e..40b16a1e06a57bd4e7ff9bc9457a3d2f9d7fb981 100644 --- a/testing/testing_zungqr_hqr.c +++ b/testing/testing_zungqr_hqr.c @@ -124,6 +124,91 @@ testing_zungqr_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zungqr_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *Qlap; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( N > M ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr_hqr (N > M)\n" ); + return -1; + } + + if ( K > N ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr_hqr (K > N)\n" ); + return -1; + } + + /* Creates the matrices */ + A = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + Qlap = malloc( LDA*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( M, K, A, LDA, seedA ); + hres = CHAMELEON_zgeqrf_param( &qrtree, M, K, A, LDA, descTS, descTT ); + + /* Calculates the solution */ + testing_start( &test_data ); + hres = CHAMELEON_zungqr_param( &qrtree, M, N, K, A, LDA, descTS, descTT, Qlap, LDA ); + test_data.hres = hres; + testing_stop( &test_data, flops_zungqr( M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_zplrnt( M, K, A0, LDA, seedA ); + + hres += check_zortho_std( args, M, N, Qlap, LDA ); + hres += check_zgeqrf_std( args, M, N, K, A0, A, LDA, Qlap, LDA ); + + free( A0 ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + free( Qlap ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zungqr_hqr; const char *zungqr_hqr_params[] = { "mtxfmt", "nb", "ib", "m", "n", "k", "lda", "qra", "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; @@ -143,7 +228,7 @@ testing_zungqr_hqr_init( void ) test_zungqr_hqr.output = zungqr_hqr_output; test_zungqr_hqr.outchk = zungqr_hqr_outchk; test_zungqr_hqr.fptr_desc = testing_zungqr_hqr_desc; - test_zungqr_hqr.fptr_std = NULL; + test_zungqr_hqr.fptr_std = testing_zungqr_hqr_std; test_zungqr_hqr.next = NULL; testing_register( &test_zungqr_hqr ); diff --git a/testing/testing_zunmlq.c b/testing/testing_zunmlq.c index 49a2e0565e3abc13a8f17d6283b53418dae1ad30..7506473b933ba31bf2ed357848e6b6d586b6c56b 100644 --- a/testing/testing_zunmlq.c +++ b/testing/testing_zunmlq.c @@ -119,6 +119,86 @@ testing_zunmlq_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunmlq_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_side( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", K ); + int LDC = run_arg_get_int( args, "LDC", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + int An; + CHAMELEON_Complex64_t *A, *C; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Calculates the dimensions according to the transposition and the side */ + An = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + A = malloc( LDA*An*sizeof(CHAMELEON_Complex64_t) ); + C = malloc( LDC*N *sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( K, An, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N , C, LDC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgelqf( K, An, A, LDA, descT ); + + /* Computes unmlq */ + testing_start( &test_data ); + hres += CHAMELEON_zunmlq( side, trans, M, N, K, A, LDA, descT, C, LDC ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunmlq( side, M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *C0 = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *Qlap = malloc( An*An*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, C0, LDC, seedC ); + CHAMELEON_zunglq( An, An, K, A, LDA, descT, Qlap, An ); + + hres += check_zqc_std( args, side, trans, M, N, C0, C, LDC, Qlap, An ); + + free( C0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + free( C ); + + return hres; +} + testing_t test_zunmlq; const char *zunmlq_params[] = { "mtxfmt", "nb", "ib", "side", "trans", "m", "n", "k", "lda", "ldc", "qra", "seedA", "seedC", NULL }; @@ -138,7 +218,7 @@ testing_zunmlq_init( void ) test_zunmlq.output = zunmlq_output; test_zunmlq.outchk = zunmlq_outchk; test_zunmlq.fptr_desc = testing_zunmlq_desc; - test_zunmlq.fptr_std = NULL; + test_zunmlq.fptr_std = testing_zunmlq_std; test_zunmlq.next = NULL; testing_register( &test_zunmlq ); diff --git a/testing/testing_zunmlq_hqr.c b/testing/testing_zunmlq_hqr.c index 6cb3010565c1cb6ef76a929e1ac8ab6f83947ac6..06289c0107113896c72395888d3d8c8aa6c1b2e0 100644 --- a/testing/testing_zunmlq_hqr.c +++ b/testing/testing_zunmlq_hqr.c @@ -129,6 +129,95 @@ testing_zunmlq_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunmlq_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_side( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", K ); + int LDC = run_arg_get_int( args, "LDC", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + int An; + CHAMELEON_Complex64_t *A, *C; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Calculates the dimensions according to the transposition and the side */ + An = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + A = malloc( LDA*An*sizeof(CHAMELEON_Complex64_t) ); + C = malloc( LDC*N *sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( K, An, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N , C, LDC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgelqf_param( &qrtree, K, An, A, LDA, descTS, descTT ); + + /* Computes unmlq_hqr */ + testing_start( &test_data ); + hres += CHAMELEON_zunmlq_param( &qrtree, side, trans, M, N, K, A, LDA, descTS, descTT, C, LDC ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunmlq( side, M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *C0 = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *Qlap = malloc( An*An*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, C0, LDC, seedC ); + CHAMELEON_zunglq_param( &qrtree, An, An, K, A, LDA, descTS, descTT, Qlap, An ); + + hres += check_zqc_std( args, side, trans, M, N, C0, C, LDC, Qlap, An ); + + free( C0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + free( C ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zunmlq_hqr; const char *zunmlq_hqr_params[] = { "mtxfmt", "nb", "ib", "side", "trans", "m", "n", "k", "lda", "ldc", "qra", "qrp", @@ -149,7 +238,7 @@ testing_zunmlq_hqr_init( void ) test_zunmlq_hqr.output = zunmlq_hqr_output; test_zunmlq_hqr.outchk = zunmlq_hqr_outchk; test_zunmlq_hqr.fptr_desc = testing_zunmlq_hqr_desc; - test_zunmlq_hqr.fptr_std = NULL; + test_zunmlq_hqr.fptr_std = testing_zunmlq_hqr_std; test_zunmlq_hqr.next = NULL; testing_register( &test_zunmlq_hqr ); diff --git a/testing/testing_zunmqr.c b/testing/testing_zunmqr.c index cb0a94ed817b289a93c052da68486e9872971e1c..7c0e9efac691108416b126b0610497ab79e4f07e 100644 --- a/testing/testing_zunmqr.c +++ b/testing/testing_zunmqr.c @@ -119,6 +119,86 @@ testing_zunmqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunmqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_side( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", ( side == ChamLeft ) ? M : N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + int Am; + CHAMELEON_Complex64_t *A, *C; + CHAM_desc_t *descT; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Calculates the dimensions according to the transposition and the side */ + Am = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + A = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + C = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( Am, K, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N, C, LDC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgeqrf( Am, K, A, LDA, descT ); + + /* Computes unmqr */ + testing_start( &test_data ); + hres += CHAMELEON_zunmqr( side, trans, M, N, K, A, LDA, descT, C, LDC ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunmqr( side, M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *C0 = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *Qlap = malloc( Am*Am*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, C0, LDC, seedC ); + CHAMELEON_zungqr( Am, Am, K, A, LDA, descT, Qlap, Am ); + + hres += check_zqc_std( args, side, trans, M, N, C0, C, LDC, Qlap, Am ); + + free( C0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descT ); + free( C ); + + return hres; +} + testing_t test_zunmqr; const char *zunmqr_params[] = { "mtxfmt", "nb", "ib", "side", "trans", "m", "n", "k", "lda", "ldc", "qra", "seedA", "seedC", NULL }; @@ -138,7 +218,7 @@ testing_zunmqr_init( void ) test_zunmqr.output = zunmqr_output; test_zunmqr.outchk = zunmqr_outchk; test_zunmqr.fptr_desc = testing_zunmqr_desc; - test_zunmqr.fptr_std = NULL; + test_zunmqr.fptr_std = testing_zunmqr_std; test_zunmqr.next = NULL; testing_register( &test_zunmqr ); diff --git a/testing/testing_zunmqr_hqr.c b/testing/testing_zunmqr_hqr.c index b213356d39150445c4903288af4b5068ce95afc3..ce77e0b414b4a2fa4ec79b4fb95f86fdd990c3f1 100644 --- a/testing/testing_zunmqr_hqr.c +++ b/testing/testing_zunmqr_hqr.c @@ -129,6 +129,95 @@ testing_zunmqr_hqr_desc( run_arg_list_t *args, int check ) return hres; } +int +testing_zunmqr_hqr_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_side( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", ( side == ChamLeft ) ? M : N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + int Am; + CHAMELEON_Complex64_t *A, *C; + CHAM_desc_t *descTS, *descTT; + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Calculates the dimensions according to the transposition and the side */ + Am = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + A = malloc( LDA*K*sizeof(CHAMELEON_Complex64_t) ); + C = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( Am, K, A, LDA, seedA ); + CHAMELEON_zplrnt( M, N, C, LDC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgeqrf_param( &qrtree, Am, K, A, LDA, descTS, descTT ); + + /* Computes unmqr_hqr */ + testing_start( &test_data ); + hres += CHAMELEON_zunmqr_param( &qrtree, side, trans, M, N, K, A, LDA, descTS, descTT, C, LDC ); + test_data.hres = hres; + testing_stop( &test_data, flops_zunmqr( side, M, N, K ) ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAMELEON_Complex64_t *C0 = malloc( LDC*N*sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *Qlap = malloc( Am*Am*sizeof(CHAMELEON_Complex64_t) ); + + CHAMELEON_zplrnt( M, N, C0, LDC, seedC ); + CHAMELEON_zungqr_param( &qrtree, Am, Am, K, A, LDA, descTS, descTT, Qlap, Am ); + + hres += check_zqc_std( args, side, trans, M, N, C0, C, LDC, Qlap, Am ); + + free( C0 ); + free( Qlap ); + } + + free( A ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + free( C ); + libhqr_finalize( &qrtree ); + + return hres; +} + testing_t test_zunmqr_hqr; const char *zunmqr_hqr_params[] = { "mtxfmt", "nb", "ib", "side", "trans", "m", "n", "k", "lda", "ldc", "qra", "qrp", @@ -149,7 +238,7 @@ testing_zunmqr_hqr_init( void ) test_zunmqr_hqr.output = zunmqr_hqr_output; test_zunmqr_hqr.outchk = zunmqr_hqr_outchk; test_zunmqr_hqr.fptr_desc = testing_zunmqr_hqr_desc; - test_zunmqr_hqr.fptr_std = NULL; + test_zunmqr_hqr.fptr_std = testing_zunmqr_hqr_std; test_zunmqr_hqr.next = NULL; testing_register( &test_zunmqr_hqr );