From a731cd2f1611054cb70a40d1a321aa77d51493b5 Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Mon, 23 Mar 2015 16:09:12 +0000
Subject: [PATCH] z_check_gemm is buggy: copy the body solves the problem -
 TODO: understand the error

---
 timing/time_zgemm.c      | 27 ++++++++++++++++++++++-----
 timing/time_zgemm_tile.c | 27 ++++++++++++++++++++++-----
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/timing/time_zgemm.c b/timing/time_zgemm.c
index 029e712e6..3497315d3 100644
--- a/timing/time_zgemm.c
+++ b/timing/time_zgemm.c
@@ -58,11 +58,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
     /* Check the solution */
     if (check)
     {
-        dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K,
-                                           alpha, A, LDA, B, LDB, beta, C, C2, LDC,
-                                           &(dparam[IPARAM_ANORM]), 
-                                           &(dparam[IPARAM_BNORM]), 
-                                           &(dparam[IPARAM_XNORM]));
+//        dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K,
+//                                           &alpha, A, LDA, B, LDB, &beta, C, C2, LDC,
+//                                           &(dparam[IPARAM_ANORM]),
+//                                           &(dparam[IPARAM_BNORM]),
+//                                           &(dparam[IPARAM_XNORM]));
+
+        MORSE_Complex64_t beta_const = -1.0;
+        double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));
+
+        dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2,   LDC, work);
+        dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work);
+
+        cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K,
+                    CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC);
+
+        dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work);
+
+        cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1);
+
+        dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work);
+
+        free(work);
         free(C2);
     }
 
diff --git a/timing/time_zgemm_tile.c b/timing/time_zgemm_tile.c
index b6db750c1..594b02114 100644
--- a/timing/time_zgemm_tile.c
+++ b/timing/time_zgemm_tile.c
@@ -60,11 +60,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
         PASTE_TILE_TO_LAPACK( descB, B, check, MORSE_Complex64_t, LDB, N );
         PASTE_TILE_TO_LAPACK( descC, C, check, MORSE_Complex64_t, LDC, N );
 
-        dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K,
-                                           alpha, A, LDA, B, LDB, beta, C, C2, LDC,
-                                           &(dparam[IPARAM_ANORM]),
-                                           &(dparam[IPARAM_BNORM]),
-                                           &(dparam[IPARAM_XNORM]));
+//        dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K,
+//                                           alpha, A, LDA, B, LDB, beta, C, C2, LDC,
+//                                           &(dparam[IPARAM_ANORM]),
+//                                           &(dparam[IPARAM_BNORM]),
+//                                           &(dparam[IPARAM_XNORM]));
+
+        MORSE_Complex64_t beta_const = -1.0;
+        double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));
+
+        dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2,   LDC, work);
+        dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work);
+
+        cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K,
+                    CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC);
+
+        dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work);
+
+        cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1);
+
+        dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work);
+
+        free(work);
         free(A); free(B); free(C); free(C2);
     }
 
-- 
GitLab