diff --git a/timing/time_zgemm.c b/timing/time_zgemm.c index 029e712e685f9299d672ed178e31a5f9a19edb90..3497315d346bae2f342da1c8edccaab079dd0c46 100644 --- a/timing/time_zgemm.c +++ b/timing/time_zgemm.c @@ -58,11 +58,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Check the solution */ if (check) { - dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, - alpha, A, LDA, B, LDB, beta, C, C2, LDC, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), - &(dparam[IPARAM_XNORM])); +// dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, +// &alpha, A, LDA, B, LDB, &beta, C, C2, LDC, +// &(dparam[IPARAM_ANORM]), +// &(dparam[IPARAM_BNORM]), +// &(dparam[IPARAM_XNORM])); + + MORSE_Complex64_t beta_const = -1.0; + double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); + + dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work); + + cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC); + + dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1); + + dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + free(work); free(C2); } diff --git a/timing/time_zgemm_tile.c b/timing/time_zgemm_tile.c index b6db750c1444bcdcaca07f1ee4c20e324848a0bc..594b02114dcc525e17c71ee0815739daa781456b 100644 --- a/timing/time_zgemm_tile.c +++ b/timing/time_zgemm_tile.c @@ -60,11 +60,28 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) PASTE_TILE_TO_LAPACK( descB, B, check, MORSE_Complex64_t, LDB, N ); PASTE_TILE_TO_LAPACK( descC, C, check, MORSE_Complex64_t, LDC, N ); - dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, - alpha, A, LDA, B, LDB, beta, C, C2, LDC, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), - &(dparam[IPARAM_XNORM])); +// dparam[IPARAM_RES] = z_check_gemm( MorseNoTrans, MorseNoTrans, M, N, K, +// alpha, A, LDA, B, LDB, beta, C, C2, LDC, +// &(dparam[IPARAM_ANORM]), +// &(dparam[IPARAM_BNORM]), +// &(dparam[IPARAM_XNORM])); + + MORSE_Complex64_t beta_const = -1.0; + double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); + + dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + dparam[IPARAM_BNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C, LDC, work); + + cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)MorseNoTrans, (CBLAS_TRANSPOSE)MorseNoTrans, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), C2, LDC); + + dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), C, 1, C2, 1); + + dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), M, N, C2, LDC, work); + + free(work); free(A); free(B); free(C); free(C2); }