diff --git a/testing/testing_zcheck.c b/testing/testing_zcheck.c index 7569e18cd57a602f491945d57327423c43f4c1a4..f3ba7d628eb9da15c5f7bbf993055b82d98524d6 100644 --- a/testing/testing_zcheck.c +++ b/testing/testing_zcheck.c @@ -491,52 +491,96 @@ int check_zscale( run_arg_list_t *args, cham_uplo_t uplo, CHAMELEON_Complex64_t * ******************************************************************************* */ +int check_zgemm_std( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, int M, int N, int K, CHAMELEON_Complex64_t *A, int LDA, + CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *Cref, CHAMELEON_Complex64_t *C, int LDC ) +{ + int info_solution = 0; + double Anorm, Bnorm, Crefnorm, Rnorm, result; + CHAMELEON_Complex64_t mzone = -1.0; + + /* Calculates the dimensions according to the transposition */ + if ( transA == ChamNoTrans ) { + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, K, A, LDA ); + } else { + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'O', K, M, A, LDA ); + } + if ( transB == ChamNoTrans ) { + Bnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', K, N, B, LDB ); + } else { + Bnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'O', N, K, B, LDB ); + } + + /* Computes the norms for comparing */ + Crefnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); + + double eps = LAPACKE_dlamch_work('e'); + + /* Makes the multiplication with the core function */ + cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC ); + cblas_zaxpy( LDC * N, CBLAS_SADDR(mzone), C, 1, Cref, 1 ); + + /* Calculates the norm with the core function's result */ + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); + + if ( ( alpha != 0. ) || (beta != 0. ) ) { + result = Rnorm / ( ( cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm ) * K * eps ); + } + else { + result = Rnorm; + } + run_arg_add_double( args, "||A||", Anorm ); + run_arg_add_double( args, "||B||", Bnorm ); + run_arg_add_double( args, "||C||", Crefnorm ); + run_arg_add_double( args, "||R||", Rnorm ); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + return info_solution; +} int check_zgemm( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ) { int An, LDA, Bn, LDB; - int info_solution = 0; - int M = descC->m; - int N = descC->n; - int K = (transA != ChamNoTrans)? descA->m : descA->n; - int LDC = descC->m; - int rank = CHAMELEON_Comm_rank(); - double Anorm, Bnorm, Crefnorm, Rnorm, result; - CHAMELEON_Complex64_t *A = NULL; - CHAMELEON_Complex64_t *B = NULL; - CHAMELEON_Complex64_t *C = NULL; + int info_solution = 0; + int M = descC->m; + int N = descC->n; + int K = (transA != ChamNoTrans)? descA->m : descA->n; + int LDC = descC->m; + int rank = CHAMELEON_Comm_rank(); + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *B = NULL; + CHAMELEON_Complex64_t *C = NULL; CHAMELEON_Complex64_t *Cref = NULL; - CHAMELEON_Complex64_t mzone = -1.0; - /* Calculates the dimensions according to the transposition */ if ( transA == ChamNoTrans ) { - Anorm = CHAMELEON_zlange_Tile(ChamInfNorm, descA); - LDA = M; An = K; + LDA = M; } else { - Anorm = CHAMELEON_zlange_Tile(ChamOneNorm, descA); - LDA = K; An = M; + LDA = K; } if ( transB == ChamNoTrans ) { - Bnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descB); - LDB = K; Bn = N; + LDB = K; + } else { - Bnorm = CHAMELEON_zlange_Tile(ChamOneNorm, descB); - LDB = N; Bn = K; + LDB = N; } - /* Computes the norms for comparing */ - Crefnorm = CHAMELEON_zlange_Tile(ChamMaxNorm, descCref); - /* Creates the LAPACK version of the matrices */ if ( rank == 0 ) { - A = (CHAMELEON_Complex64_t *)malloc(An*LDA*sizeof(CHAMELEON_Complex64_t)); - B = (CHAMELEON_Complex64_t *)malloc(Bn*LDB*sizeof(CHAMELEON_Complex64_t)); - Cref = (CHAMELEON_Complex64_t *)malloc(N *LDC*sizeof(CHAMELEON_Complex64_t)); - C = (CHAMELEON_Complex64_t *)malloc(N *LDC*sizeof(CHAMELEON_Complex64_t)); + A = (CHAMELEON_Complex64_t *)malloc(LDA*An*sizeof(CHAMELEON_Complex64_t)); + B = (CHAMELEON_Complex64_t *)malloc(LDB*Bn*sizeof(CHAMELEON_Complex64_t)); + Cref = (CHAMELEON_Complex64_t *)malloc(LDC*N*sizeof(CHAMELEON_Complex64_t)); + C = (CHAMELEON_Complex64_t *)malloc(LDC*N*sizeof(CHAMELEON_Complex64_t)); } CHAMELEON_zDesc2Lap( ChamUpperLower, descA, A, LDA ); @@ -545,35 +589,9 @@ int check_zgemm( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_zDesc2Lap( ChamUpperLower, descC, C, LDC ); if ( rank == 0 ) { - double eps = LAPACKE_dlamch_work('e'); - - /* Makes the multiplication with the core function */ - cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, - CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC ); - cblas_zaxpy(LDC * N, CBLAS_SADDR(mzone), C, 1, Cref, 1); - - /* Calculates the norm with the core function's result */ - Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); - - if ( ( alpha != 0. ) || (beta != 0. ) ) { - result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps); - } - else { - result = Rnorm; - } - run_arg_add_double( args, "||A||", Anorm ); - run_arg_add_double( args, "||B||", Bnorm ); - run_arg_add_double( args, "||C||", Crefnorm ); - run_arg_add_double( args, "||R||", Rnorm ); - - /* Verifies if the result is inside a threshold */ - if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { - info_solution = 1; - } - else { - info_solution = 0; - } - + + info_solution = check_zgemm_std( args, transA, transB, alpha, M, N, K, A, LDA, B, LDB, beta, Cref, C, LDC ); + free(A); free(B); free(C); diff --git a/testing/testing_zcheck.h b/testing/testing_zcheck.h index 4f86784dfd58d4fe7be29d90c4f18a7dc1b7e5ab..b2f69378ede2f3cf396c5dd874f1db8c7cc988fc 100644 --- a/testing/testing_zcheck.h +++ b/testing/testing_zcheck.h @@ -44,6 +44,8 @@ static inline int check_znorm ( run_arg_list_t *args, cham_mtxtype_t mtx static inline int check_zsum ( run_arg_list_t *args, cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAMELEON_Complex64_t beta, CHAM_desc_t *descBref, CHAM_desc_t *descBcham ) { return 0; } static inline int check_zscale ( run_arg_list_t *args, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ) { return 0; } +static inline int check_zgemm_std ( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, int M, int N, int K, CHAMELEON_Complex64_t *A, int LDA, + CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *Cref, CHAMELEON_Complex64_t *C, int LDC ); static inline int check_zgemm ( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ) { return 0; } static inline int check_zsymm ( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, @@ -87,6 +89,8 @@ int check_znorm ( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_nor int check_zsum ( run_arg_list_t *args, cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAMELEON_Complex64_t beta, CHAM_desc_t *descBref, CHAM_desc_t *descBcham ); int check_zscale ( run_arg_list_t *args, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ); +int check_zgemm_std ( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, int M, int N, int K, CHAMELEON_Complex64_t *A, int LDA, + CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *Cref, CHAMELEON_Complex64_t *C, int LDC ); int check_zgemm ( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ); int check_zsymm ( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB,