From a731cd2f1611054cb70a40d1a321aa77d51493b5 Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Mon, 23 Mar 2015 16:09:12 +0000 Subject: [PATCH] z_check_gemm is buggy: copy the body solves the problem - TODO: understand the error --- timing/time_zgemm.c | 27 ++++++++++++++++++++++----- timing/time_zgemm_tile.c | 27 ++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/timing/time_zgemm.c b/timing/time_zgemm.c index 029e712e6..3497315d3 100644 --- a/timing/time_zgemm.c +++ b/timing/time_zgemm.c @@ -58,11 +58,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Check the solution */ if (check) { - dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, - alpha, A, LDA, B, LDB, beta, C, C2, LDC, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), - &(dparam[IPARAM_XNORM])); +// dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, +// &alpha, A, LDA, B, LDB, &beta, C, C2, LDC, +// &(dparam[IPARAM_ANORM]), +// &(dparam[IPARAM_BNORM]), +// &(dparam[IPARAM_XNORM])); + + MORSE_Complex64_t beta_const = -1.0; + double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); + + dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work); + + cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC); + + dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1); + + dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + free(work); free(C2); } diff --git a/timing/time_zgemm_tile.c b/timing/time_zgemm_tile.c index b6db750c1..594b02114 100644 --- a/timing/time_zgemm_tile.c +++ b/timing/time_zgemm_tile.c @@ -60,11 +60,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_TILE_TO_LAPACK( descB, B, check, MORSE_Complex64_t, LDB, N ); PASTE_TILE_TO_LAPACK( descC, C, check, MORSE_Complex64_t, LDC, N ); - dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, - alpha, A, LDA, B, LDB, beta, C, C2, LDC, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), - &(dparam[IPARAM_XNORM])); +// dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, +// alpha, A, LDA, B, LDB, beta, C, C2, LDC, +// &(dparam[IPARAM_ANORM]), +// &(dparam[IPARAM_BNORM]), +// &(dparam[IPARAM_XNORM])); + + MORSE_Complex64_t beta_const = -1.0; + double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); + + dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work); + + cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC); + + dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1); + + dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + free(work); free(A); free(B); free(C); free(C2); } -- GitLab