diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c index f3b9d3aa429b7299c6427cb79a7a7fb331fae276..d3c750fdbd92953ecf519b6759cfb3d4ac25ab4c 100644 --- a/coreblas/compute/core_zpamm.c +++ b/coreblas/compute/core_zpamm.c @@ -284,18 +284,22 @@ CORE_zpamm(int op, cham_side_t side, cham_store_t storev, } /**/ - if (op==ChameleonW) { + if ( op == ChameleonW ) + { info = CORE_zpamm_w( - side, trans, uplo, M, N, K, L, vi2, vi3, - A1, LDA1, A2, LDA2, V, LDV, W, LDW); - if (info != 0) - return info; - } else if (op==ChameleonA2) { + side, trans, uplo, M, N, K, L, vi2, vi3, + A1, LDA1, A2, LDA2, V, LDV, W, LDW); + } + else if ( op == ChameleonA2 ) + { info = CORE_zpamm_a2( - side, trans, uplo, M, N, K, L, vi2, vi3, - A2, LDA2, V, LDV, W, LDW); - if (info != 0) - return info; + side, trans, uplo, M, N, K, L, vi2, vi3, + A2, LDA2, V, LDV, W, LDW); + } + + + if ( info != 0 ) { + return info; } return CHAMELEON_SUCCESS; @@ -303,28 +307,26 @@ CORE_zpamm(int op, cham_side_t side, cham_store_t storev, /**/ static inline int -CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t uplo, - int M, int N, int K, int L, - int vi2, int vi3, - const CHAMELEON_Complex64_t *A1, int LDA1, - CHAMELEON_Complex64_t *A2, int LDA2, - const CHAMELEON_Complex64_t *V, int LDV, - CHAMELEON_Complex64_t *W, int LDW) +CORE_zpamm_w( cham_side_t side, cham_trans_t trans, cham_uplo_t uplo, + int M, int N, int K, int L, int vi2, int vi3, + const CHAMELEON_Complex64_t *A1, int LDA1, + CHAMELEON_Complex64_t *A2, int LDA2, + const CHAMELEON_Complex64_t *V, int LDV, + CHAMELEON_Complex64_t *W, int LDW ) { /* * W = A1 + op(V) * A2 or W = A1 + A2 * op(V) */ - int j; - static CHAMELEON_Complex64_t zone = 1.0; - static CHAMELEON_Complex64_t zzero = 0.0; + static CHAMELEON_Complex64_t zone = 1.0; + static CHAMELEON_Complex64_t zzero = 0.0; - if (side == ChamLeft) { - - if (((trans == ChamConjTrans) && (uplo == ChamUpper)) || - ((trans == ChamNoTrans) && (uplo == ChamLower))) { + if ( side == ChamLeft ) { + if ( ((trans == ChamConjTrans) && (uplo == ChamUpper)) || + ((trans == ChamNoTrans) && (uplo == ChamLower)) ) + { /* * W = A1 + V' * A2 */ @@ -445,32 +447,29 @@ CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t uplo, /**/ static inline int -CORE_zpamm_a2(cham_side_t side, cham_trans_t trans, cham_uplo_t uplo, - int M, int N, int K, int L, - int vi2, int vi3, - CHAMELEON_Complex64_t *A2, int LDA2, - const CHAMELEON_Complex64_t *V, int LDV, - CHAMELEON_Complex64_t *W, int LDW) +CORE_zpamm_a2( cham_side_t side, cham_trans_t trans, cham_uplo_t uplo, + int M, int N, int K, int L, int vi2, int vi3, + CHAMELEON_Complex64_t *A2, int LDA2, + const CHAMELEON_Complex64_t *V, int LDV, + CHAMELEON_Complex64_t *W, int LDW ) { - - /* - * A2 = A2 + op(V) * W or A2 = A2 + W * op(V) - */ - + /* + * A2 = A2 + op(V) * W or A2 = A2 + W * op(V) + */ int j; static CHAMELEON_Complex64_t zone = 1.0; - static CHAMELEON_Complex64_t mzone = -1.0; + static CHAMELEON_Complex64_t mzone = -1.0; - if (side == ChamLeft) { - - if (((trans == ChamConjTrans) && (uplo == ChamUpper)) || - ((trans == ChamNoTrans) && (uplo == ChamLower))) { + if ( side == ChamLeft ) { + if ( ((trans == ChamConjTrans) && (uplo == ChamUpper)) || + ((trans == ChamNoTrans) && (uplo == ChamLower)) ) + { printf("Left Upper/ConjTrans & Lower/NoTrans not implemented yet\n"); return CHAMELEON_ERR_NOT_SUPPORTED; - } - else { //trans + else + { //trans /* * A2 = A2 - V * W diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c index de9756506e88cd00975d30d3714c2cd881c3e6e1..05775accf2e45a8d26485e31a3636a9e456c121d 100644 --- a/coreblas/compute/core_zparfb.c +++ b/coreblas/compute/core_zparfb.c @@ -12,13 +12,8 @@ * @brief Chameleon core_zparfb CPU kernel * * @version 1.2.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 - * @author Dulceneia Becker - * @author Mathieu Faverge - * @author Emmanuel Agullo - * @author Cedric Castagnede * @author Florent Pruvost + * @author Mathieu Faverge * @date 2022-02-22 * @precisions normal z -> c d s * @@ -134,19 +129,19 @@ ******************************************************************************* * * @retval CHAMELEON_SUCCESS successful exit - * @retval <0 if -i, the i-th argument had an illegal value - * + * @retval <0 if -i, the i-th argument had an illegal value + * @retval The recommended LWORK value, if LWORK == -1 on entry. */ /* This kernel is never traced so return type on previous line for convert2eztrace.pl script */ int -CORE_zparfb(cham_side_t side, cham_trans_t trans, - cham_dir_t direct, cham_store_t storev, - int M1, int N1, int M2, int N2, int K, int L, - CHAMELEON_Complex64_t *A1, int LDA1, - CHAMELEON_Complex64_t *A2, int LDA2, - const CHAMELEON_Complex64_t *V, int LDV, - const CHAMELEON_Complex64_t *T, int LDT, - CHAMELEON_Complex64_t *WORK, int LDWORK) +CORE_zparfb( cham_side_t side, cham_trans_t trans, + cham_dir_t direct, cham_store_t storev, + int M1, int N1, int M2, int N2, int K, int L, + CHAMELEON_Complex64_t *A1, int LDA1, + CHAMELEON_Complex64_t *A2, int LDA2, + const CHAMELEON_Complex64_t *V, int LDV, + const CHAMELEON_Complex64_t *T, int LDT, + CHAMELEON_Complex64_t *WORK, int LDWORK ) { static CHAMELEON_Complex64_t zone = 1.0; static CHAMELEON_Complex64_t mzone = -1.0; @@ -194,13 +189,13 @@ CORE_zparfb(cham_side_t side, cham_trans_t trans, } /* Quick return */ - if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) + if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) { return CHAMELEON_SUCCESS; + } if (direct == ChamDirForward) { if (side == ChamLeft) { - /* * Column or Rowwise / Forward / Left * ---------------------------------- diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index dcd962c008cf2178401916a36bd3ef244d2b32bd..9e7f433fef780d2259b9f1327f85df07cfc3e6f9 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -252,9 +252,9 @@ int CORE_zttmlq(cham_side_t side, cham_trans_t trans, mi1, ni1, mi2, ni2, kb, l, A1 + LDA1 * jc + ic, LDA1, A2, LDA2, - V + i, LDV, + V + i, LDV, T + LDT * i, LDT, - WORK, LDWORK); + WORK, LDWORK ); } return CHAMELEON_SUCCESS; } diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c index 4770c22c069cc1f764db0a0b8c97a0b94d2f0456..5229c4725a60cb27f530741f49d81257e87ecb3f 100644 --- a/coreblas/compute/core_zttmqr.c +++ b/coreblas/compute/core_zttmqr.c @@ -6,6 +6,7 @@ * Tennessee Research Foundation. All rights reserved. * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. + * *** * * @brief Chameleon core_zttmqr CPU kernel @@ -206,11 +207,13 @@ int CORE_zttmqr(cham_side_t side, cham_trans_t trans, } /* Quick return */ - if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0)) + if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0)) { return CHAMELEON_SUCCESS; + } - if (((side == ChamLeft) && (trans != ChamNoTrans)) - || ((side == ChamRight) && (trans == ChamNoTrans))) { + if ( ((side == ChamLeft ) && (trans != ChamNoTrans)) || + ((side == ChamRight) && (trans == ChamNoTrans)) ) + { i1 = 0; i3 = IB; } @@ -249,5 +252,3 @@ int CORE_zttmqr(cham_side_t side, cham_trans_t trans, } return CHAMELEON_SUCCESS; } - - diff --git a/cudablas/compute/cuda_zttmlq.c b/cudablas/compute/cuda_zttmlq.c index c46a589810172fe52e30cdafa1aaab50851a7e85..845fc38d38a398da9f3cb9aa27f983218e3159f7 100644 --- a/cudablas/compute/cuda_zttmlq.c +++ b/cudablas/compute/cuda_zttmlq.c @@ -55,12 +55,12 @@ CUDA_zttmlq( cham_side_t side, cham_trans_t trans, if (N1 < 0) { return -4; } - if ( (M2 < 0) || - ( (M2 != M1) && (side == ChamRight) ) ){ + if ((M2 < 0) || + ( (side == ChamRight) && (M1 != M2) ) ) { return -5; } - if ( (N2 < 0) || - ( (N2 != N1) && (side == ChamLeft) ) ){ + if ((N2 < 0) || + ( (side == ChamLeft) && (N1 != N2) ) ) { return -6; } if ((K < 0) || diff --git a/cudablas/compute/cuda_zttmqr.c b/cudablas/compute/cuda_zttmqr.c index e18466a0ed3bbcf797affb5d4113a886ddb79b79..23238e79248ead40c80a3157fd69f46ba3f0617e 100644 --- a/cudablas/compute/cuda_zttmqr.c +++ b/cudablas/compute/cuda_zttmqr.c @@ -32,7 +32,7 @@ CUDA_zttmqr( cham_side_t side, cham_trans_t trans, cuDoubleComplex *WORK, int LWORK, cublasHandle_t handle ) { - int i, i1, i3; + int i, i1, i3; int NQ; int kb, l; int ic = 0; @@ -131,10 +131,10 @@ CUDA_zttmqr( cham_side_t side, cham_trans_t trans, CUDA_zparfb( side, trans, ChamDirForward, ChamColumnwise, mi1, ni1, mi2, ni2, kb, l, - A1 + LDA1*jc+ic, LDA1, + A1 + LDA1 * jc + ic, LDA1, A2, LDA2, - V + LDV*i, LDV, - T + LDT*i, LDT, + V + LDV * i, LDV, + T + LDT * i, LDT, WORK, LWORK, handle ); } return CHAMELEON_SUCCESS;