From aa0fa3e9fe58dedbe363afc700c87c3a1382f30b Mon Sep 17 00:00:00 2001 From: BARROS DE ASSIS Lucas Date: Tue, 2 Jul 2019 16:16:18 +0200 Subject: [PATCH] Fix codelets --- runtime/starpu/codelets/codelet_dzasum.c | 13 ++-- runtime/starpu/codelets/codelet_zbuild.c | 15 +++-- runtime/starpu/codelets/codelet_zgeadd.c | 43 +++++++------ runtime/starpu/codelets/codelet_zgelqt.c | 29 +++++---- runtime/starpu/codelets/codelet_zgemm.c | 50 +++++++++------ runtime/starpu/codelets/codelet_zgeqrt.c | 29 +++++---- runtime/starpu/codelets/codelet_zgessm.c | 35 ++++++----- runtime/starpu/codelets/codelet_zgessq.c | 13 ++-- runtime/starpu/codelets/codelet_zgetrf.c | 12 ++-- .../starpu/codelets/codelet_zgetrf_incpiv.c | 21 ++++--- .../starpu/codelets/codelet_zgetrf_nopiv.c | 16 ++--- runtime/starpu/codelets/codelet_zgram.c | 32 ++++++---- runtime/starpu/codelets/codelet_zhe2ge.c | 21 ++++--- runtime/starpu/codelets/codelet_zhemm.c | 51 +++++++++------ runtime/starpu/codelets/codelet_zher2k.c | 43 ++++++++----- runtime/starpu/codelets/codelet_zherfb.c | 45 +++++++------ runtime/starpu/codelets/codelet_zherk.c | 37 ++++++----- runtime/starpu/codelets/codelet_zhessq.c | 4 +- runtime/starpu/codelets/codelet_zlacpy.c | 29 +++++---- runtime/starpu/codelets/codelet_zlag2c.c | 45 ++++++++----- runtime/starpu/codelets/codelet_zlange.c | 15 +++-- runtime/starpu/codelets/codelet_zlanhe.c | 14 +++-- runtime/starpu/codelets/codelet_zlansy.c | 13 ++-- runtime/starpu/codelets/codelet_zlantr.c | 12 ++-- runtime/starpu/codelets/codelet_zlascal.c | 19 +++--- runtime/starpu/codelets/codelet_zlaset.c | 17 ++--- runtime/starpu/codelets/codelet_zlaset2.c | 16 ++--- runtime/starpu/codelets/codelet_zlatro.c | 20 +++--- runtime/starpu/codelets/codelet_zlauum.c | 14 +++-- runtime/starpu/codelets/codelet_zplghe.c | 13 ++-- runtime/starpu/codelets/codelet_zplgsy.c | 13 ++-- runtime/starpu/codelets/codelet_zplrnt.c | 13 ++-- runtime/starpu/codelets/codelet_zpotrf.c | 12 ++-- runtime/starpu/codelets/codelet_zssssm.c | 45 ++++++------- runtime/starpu/codelets/codelet_zsymm.c | 47 ++++++++------ runtime/starpu/codelets/codelet_zsyr2k.c | 39 +++++++----- runtime/starpu/codelets/codelet_zsyrk.c | 34 +++++----- runtime/starpu/codelets/codelet_zsyssq.c | 11 ++-- .../starpu/codelets/codelet_zsytrf_nopiv.c | 13 ++-- runtime/starpu/codelets/codelet_ztplqt.c | 29 ++++----- runtime/starpu/codelets/codelet_ztpmlqt.c | 51 ++++++++------- runtime/starpu/codelets/codelet_ztpmqrt.c | 52 ++++++++------- runtime/starpu/codelets/codelet_ztpqrt.c | 28 ++++----- runtime/starpu/codelets/codelet_ztradd.c | 30 ++++----- runtime/starpu/codelets/codelet_ztrasm.c | 12 ++-- runtime/starpu/codelets/codelet_ztrmm.c | 34 +++++----- runtime/starpu/codelets/codelet_ztrsm.c | 33 +++++----- runtime/starpu/codelets/codelet_ztrssq.c | 11 ++-- runtime/starpu/codelets/codelet_ztrtri.c | 13 ++-- .../starpu/codelets/codelet_ztsmlq_hetra1.c | 37 ++++++----- .../starpu/codelets/codelet_ztsmqr_hetra1.c | 36 +++++------ runtime/starpu/codelets/codelet_ztstrf.c | 45 ++++++------- runtime/starpu/codelets/codelet_zunmlq.c | 63 ++++++++++--------- runtime/starpu/codelets/codelet_zunmqr.c | 63 ++++++++++--------- 54 files changed, 857 insertions(+), 643 deletions(-) diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c index 0e94fc672..869927a85 100644 --- a/runtime/starpu/codelets/codelet_dzasum.c +++ b/runtime/starpu/codelets/codelet_dzasum.c @@ -15,6 +15,7 @@ * @comment This file has been automatically generated * from Plasma 2.6.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) int M; int N; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); - CORE_dzasum(storev, uplo, M, N, A, lda, work); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N); + CORE_dzasum(storev, uplo, M, N, A, ldA, work); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func) void INSERT_TASK_dzasum( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_dzasum; @@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, STARPU_NAME, "dzasum", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index d10574c7c..e329dc377 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -20,6 +20,7 @@ * @author Emmanuel Agullo * @author Cedric Castagnede * @author Guillaume Sylvand + * @author Lucas Barros de Assis * @date 2016-09-08 * @precisions normal z -> c d s * @@ -31,19 +32,21 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) { CHAMELEON_Complex64_t *A; - int ld; + int ldA; void *user_data; - void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; + void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ldA, void *user_data) ; int row_min, row_max, col_min, col_max; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback ); /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) * and store it at the address 'buffer' with leading dimension 'ld' */ - user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); + user_build_callback(row_min, row_max, col_min, col_max, A, ldA, user_data); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) void INSERT_TASK_zbuild( const RUNTIME_option_t *options, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, void *user_data, void* user_build_callback ) { @@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, STARPU_VALUE, &col_min, sizeof(int), STARPU_VALUE, &col_max, sizeof(int), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &user_data, sizeof(void*), STARPU_VALUE, &user_build_callback, sizeof(void*), STARPU_PRIORITY, options->priority, @@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, STARPU_NAME, "zbuild", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index d4105c89d..9e8ec52ef 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -17,6 +17,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; const CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta); + CORE_zgeadd(trans, M, N, alpha, A, ldA, beta, B, ldB); return; } @@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) int N; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; cuDoubleComplex beta; cuDoubleComplex *B; - int ldb; + int ldB; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta); RUNTIME_getStream( stream ); CUDA_zgeadd( trans, M, N, - &alpha, A, lda, - &beta, B, ldb, + &alpha, A, ldA, + &beta, B, ldB, stream); #ifndef STARPU_CUDA_ASYNC @@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M * otherwise. * - * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * @param[in] ldA + * Leading dimension of the array A. ldA >= max(1,k), with k=M, if * trans = ChamNoTrans, and k=N otherwise. * * @param[in] beta * Scalar factor of B. * * @param[in,out] B - * Matrix of size LDB-by-N. + * Matrix of size ldB-by-N. * On exit, B = alpha * op(A) + beta * B * - * @param[in] LDB - * Leading dimension of the array B. LDB >= max(1,M) + * @param[in] ldB + * Leading dimension of the array B. ldB >= max(1,M) * ******************************************************************************* * @@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) */ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zgeadd", #endif 0); + (void)ldA; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 2bdf26fe7..21e38b440 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) int n; int ib; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *TAU, *WORK; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work); WORK = TAU + chameleon_max( m, n ); - CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); - CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); + CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldT ); + CORE_zgelqt(m, n, ib, A, ldA, T, ldT, TAU, WORK); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) * with the array TAU, represent the unitary tile Q as a * product of elementary reflectors (see Further Details). * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * * @param[out] T * The IB-by-N triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. + * @param[in] ldT + * The leading dimension of the array T. ldT >= IB. * * @param[out] TAU * The scalar factors of the elementary reflectors (see Further @@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) */ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *T, int Tm, int Tn, int ldT) { (void)nb; struct starpu_codelet *codelet = &cl_zgelqt; @@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), /* max( nb * (ib+1), ib * (ib+nb) ) */ STARPU_SCRATCH, options->ws_worker, /* /\* ib*n + 3*ib*ib + max(m,n) *\/ */ @@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, STARPU_NAME, "zgelqt", #endif 0); + (void)ldT; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 3ac4d9ee0..397926015 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) int k; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); CORE_zgemm(transA, transB, m, n, k, - alpha, A, lda, - B, ldb, - beta, C, ldc); + alpha, A, ldA, + B, ldB, + beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) int k; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; const cuDoubleComplex *B; - int ldb; + int ldB; cuDoubleComplex beta; cuDoubleComplex *C; - int ldc; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta); RUNTIME_getStream( stream ); CUDA_zgemm( transA, transB, m, n, k, - &alpha, A, lda, - B, ldb, - &beta, C, ldc, + &alpha, A, ldA, + B, ldB, + &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zgemm(const RUNTIME_option_t *options, cham_trans_t transA, cham_trans_t transB, int m, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zgemm; @@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zgemm", #endif 0); + + (void)ldA; + (void)ldB; + (void)ldC; } diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index ff78c7ec5..ddc681630 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -34,21 +35,23 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) int n; int ib; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *TAU, *WORK; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work); WORK = TAU + chameleon_max( m, n ); - CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt ); - CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); + CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldT ); + CORE_zgeqrt(m, n, ib, A, ldA, T, ldT, TAU, WORK); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -95,16 +98,16 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) * with the array TAU, represent the unitary tile Q as a * product of elementary reflectors (see Further Details). * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * * @param[out] T * The IB-by-N triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. + * @param[in] ldT + * The leading dimension of the array T. ldT >= IB. * * @param[out] TAU * The scalar factors of the elementary reflectors (see Further @@ -120,8 +123,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) */ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *T, int Tm, int Tn, int ldT) { (void)nb; struct starpu_codelet *codelet = &cl_zgeqrt; @@ -139,9 +142,7 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), /* max( nb * (ib+1), ib * (ib+nb) ) */ STARPU_SCRATCH, options->ws_worker, /* ib * (m+3*ib) + max(m,n) */ @@ -152,4 +153,6 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, STARPU_NAME, "zgeqrt", #endif 0); + (void)ldT; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index e4278006b..26ce1b2a2 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -34,16 +35,21 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) int k; int ib; int *IPIV; - int ldl; + int ldL; CHAMELEON_Complex64_t *D; - int ldd; + int ldD; CHAMELEON_Complex64_t *A; - int lda; + int ldA; D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); - CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); + + ldL = STARPU_MATRIX_GET_LD( descr[0] ); + ldD = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV); + CORE_zgessm(m, n, k, ib, IPIV, D, ldD, A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -80,15 +86,15 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) * @param[in] L * The M-by-K lower triangular tile. * - * @param[in] LDL - * The leading dimension of the array L. LDL >= max(1,M). + * @param[in] ldL + * The leading dimension of the array L. ldL >= max(1,M). * * @param[in,out] A * On entry, the M-by-N tile A. * On exit, updated by the application of L. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * ******************************************************************************* * @@ -100,9 +106,9 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) void INSERT_TASK_zgessm( const RUNTIME_option_t *options, int m, int n, int k, int ib, int nb, int *IPIV, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, - const CHAM_desc_t *D, int Dm, int Dn, int ldd, - const CHAM_desc_t *A, int Am, int An, int lda ) + const CHAM_desc_t *L, int Lm, int Ln, int ldL, + const CHAM_desc_t *D, int Dm, int Dn, int ldD, + const CHAM_desc_t *A, int Am, int An, int ldA ) { (void)nb; struct starpu_codelet *codelet = &cl_zgessm; @@ -122,15 +128,14 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options, STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &IPIV, sizeof(int*), STARPU_R, RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln), - STARPU_VALUE, &ldl, sizeof(int), STARPU_R, RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn), - STARPU_VALUE, &ldd, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zgessm", #endif 0); + (void)ldD; + (void)ldL; } diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index c9141aefb..165177411 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -15,6 +15,7 @@ * @comment This file has been automatically generated * from Plasma 2.6.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -29,13 +30,15 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double *SCALESUMSQ; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &m, &n, &lda); - CORE_zgessq( storev, m, n, A, lda, SCALESUMSQ ); + starpu_codelet_unpack_args(cl_arg, &storev, &m, &n); + CORE_zgessq( storev, m, n, A, ldA, SCALESUMSQ ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -46,7 +49,7 @@ CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) void INSERT_TASK_zgessq( const RUNTIME_option_t *options, cham_store_t storev, int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zgessq; @@ -63,7 +66,6 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -71,4 +73,5 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options, STARPU_NAME, "zgessq", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index 6711c4de7..3ce6227d9 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -17,6 +17,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -30,7 +31,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int *IPIV; cham_bool_t check_info; int iinfo; @@ -39,9 +40,10 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) int info = 0; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request); - CORE_zgetrf( m, n, A, lda, IPIV, &info ); + starpu_codelet_unpack_args(cl_arg, &m, &n, &IPIV, &check_info, &iinfo, &sequence, &request); + CORE_zgetrf( m, n, A, ldA, IPIV, &info ); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -56,7 +58,7 @@ CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, int *IPIV, cham_bool_t check_info, int iinfo ) { @@ -73,7 +75,6 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &IPIV, sizeof(int*), STARPU_VALUE, &check_info, sizeof(cham_bool_t), STARPU_VALUE, &iinfo, sizeof(int), @@ -85,4 +86,5 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, STARPU_NAME, "zgetrf", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 5e033500f..b1216bb05 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -34,7 +35,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) int n; int ib; CHAMELEON_Complex64_t *A; - int lda, ldl; + int ldA, ldL; int *IPIV; cham_bool_t check_info; int iinfo; @@ -43,9 +44,11 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) int info = 0; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldL = STARPU_MATRIX_GET_LD( descr[1] ); - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); - CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request); + CORE_zgetrf_incpiv(m, n, ib, A, ldA, IPIV, &info); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -91,8 +94,8 @@ CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) * On exit, the factors L and U from the factorization * A = P*L*U; the unit diagonal elements of L are not stored. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * * @param[out] IPIV * The pivot indices; for 1 <= i <= min(M,N), row i of the @@ -114,8 +117,8 @@ CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *L, int Lm, int Ln, int ldL, int *IPIV, cham_bool_t check_info, int iinfo) { @@ -136,9 +139,7 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln), - STARPU_VALUE, &ldl, sizeof(int), STARPU_VALUE, &IPIV, sizeof(int*), STARPU_VALUE, &check_info, sizeof(cham_bool_t), STARPU_VALUE, &iinfo, sizeof(int), @@ -152,4 +153,6 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, STARPU_NAME, "zgetrf_incpiv", #endif 0); + (void)ldL; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index 4b108067c..34daf9495 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -16,6 +16,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -33,16 +34,17 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) int n; int ib; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; int info = 0; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request); - CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &iinfo, &sequence, &request); + CORE_zgetrf_nopiv(m, n, ib, A, ldA, &info); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -88,8 +90,8 @@ CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) * On exit, the factors L and U from the factorization * A = P*L*U; the unit diagonal elements of L are not stored. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * ******************************************************************************* * @@ -104,7 +106,7 @@ CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, int m, int n, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, int iinfo) { (void)nb; @@ -121,7 +123,6 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &iinfo, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), @@ -131,4 +132,5 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, STARPU_NAME, "zgetrf_nopiv", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c index 56af2b407..f8997ad1c 100644 --- a/runtime/starpu/codelets/codelet_zgram.c +++ b/runtime/starpu/codelets/codelet_zgram.c @@ -12,6 +12,7 @@ * @version 0.9.2 * @author Mathieu Faverge * @author Florent Pruvost + * @author Lucas Barros de Assis * @date 2019-04-16 * @precisions normal z -> c d s * @@ -25,24 +26,29 @@ static void cl_zgram_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int m, n, mt, nt; double *Di; - int lddi; + int ldDI; double *Dj; - int lddj; + int ldDJ; double *D; double *A; - int lda; + int ldA; Di = (double *)STARPU_MATRIX_GET_PTR(descr[0]); Dj = (double *)STARPU_MATRIX_GET_PTR(descr[1]); D = (double *)STARPU_MATRIX_GET_PTR(descr[2]); A = (double *)STARPU_MATRIX_GET_PTR(descr[3]); - starpu_codelet_unpack_args(cl_arg, &uplo, &m, &n, &mt, &nt, &lddi, &lddj, &lda); + + ldDI = STARPU_MATRIX_GET_LD( descr[0] ); + ldDJ = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[3] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &m, &n, &mt, &nt); CORE_zgram( uplo, m, n, mt, nt, - Di, lddi, - Dj, lddj, + Di, ldDI, + Dj, ldDJ, D, - A, lda); + A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -54,10 +60,10 @@ CODELETS_CPU(zgram, 4, cl_zgram_cpu_func) void INSERT_TASK_zgram( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mt, int nt, - const CHAM_desc_t *Di, int Dim, int Din, int lddi, - const CHAM_desc_t *Dj, int Djm, int Djn, int lddj, + const CHAM_desc_t *Di, int Dim, int Din, int ldDI, + const CHAM_desc_t *Dj, int Djm, int Djn, int ldDJ, const CHAM_desc_t *D, int Dm, int Dn, - CHAM_desc_t *A, int Am, int An, int lda) + CHAM_desc_t *A, int Am, int An, int ldA) { struct starpu_codelet *codelet = &cl_zgram; void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL; @@ -77,16 +83,16 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options, STARPU_VALUE, &mt, sizeof(int), STARPU_VALUE, &nt, sizeof(int), STARPU_R, RTBLKADDR(Di, double, Dim, Din), - STARPU_VALUE, &lddi, sizeof(int), STARPU_R, RTBLKADDR(Dj, double, Djm, Djn), - STARPU_VALUE, &lddj, sizeof(int), STARPU_R, RTBLKADDR(D, double, Dm, Dn), STARPU_RW, RTBLKADDR(A, double, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zgram", #endif 0); + (void)ldA; + (void)ldDJ; + (void)ldDI; } diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index c1d97a668..e740f97dd 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -13,6 +13,7 @@ * * @version 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2016-12-09 * @precisions normal z -> c d s * @@ -27,14 +28,18 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) int M; int N; const CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB); - CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N); + CORE_zhe2ge(uplo, M, N, A, ldA, B, ldB); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -51,8 +56,8 @@ CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB) { (void)mb; struct starpu_codelet *codelet = &cl_zhe2ge; @@ -69,13 +74,13 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zhe2ge", #endif 0); + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 3dbcae1b3..df1b09627 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -35,22 +36,27 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *C; - int LDC; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta); CORE_zhemm(side, uplo, M, N, - alpha, A, LDA, - B, LDB, - beta, C, LDC); + alpha, A, ldA, + B, ldB, + beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -62,26 +68,31 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) int N; cuDoubleComplex alpha; const cuDoubleComplex *A; - int LDA; + int ldA; const cuDoubleComplex *B; - int LDB; + int ldB; cuDoubleComplex beta; cuDoubleComplex *C; - int LDC; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zhemm( side, uplo, M, N, - &alpha, A, LDA, - B, LDB, - &beta, C, LDC, + &alpha, A, ldA, + B, ldB, + &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -106,9 +117,9 @@ CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zhemm(const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zhemm; @@ -128,16 +139,16 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zhemm", #endif 0); + (void)ldC; + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 2cf491f62..bdd303ea5 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -35,19 +36,24 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg) int k; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; double beta; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); CORE_zher2k(uplo, trans, - n, k, alpha, A, lda, B, ldb, beta, C, ldc); + n, k, alpha, A, ldA, B, ldB, beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -59,22 +65,27 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) int k; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; const cuDoubleComplex *B; - int ldb; + int ldB; double beta; cuDoubleComplex *C; - int ldc; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zher2k( uplo, trans, - n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, + n, k, &alpha, A, ldA, B, ldB, &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -99,9 +110,9 @@ CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zher2k(const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zher2k; @@ -121,16 +132,16 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &beta, sizeof(double), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zher2k", #endif 0); + (void)ldC; + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index 8974ad765..f0bbcb02e 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -13,6 +13,7 @@ * * @version 0.9.2 * @author Hatem Ltaief + * @author Lucas Barros de Assis * @date 2016-12-09 * @precisions normal z -> c d s * @@ -29,22 +30,26 @@ static void cl_zherfb_cpu_func(void *descr[], void *cl_arg) int ib; int nb; const CHAMELEON_Complex64_t *A; - int lda; + int ldA; const CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; CHAMELEON_Complex64_t *WORK; - int ldwork; + int ldWORK; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &lda, &ldt, &ldc, &ldwork); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); - CORE_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &ldWORK); + + CORE_zherfb(uplo, n, k, ib, nb, A, ldA, T, ldT, C, ldC, WORK, ldWORK); } #if defined(CHAMELEON_USE_CUDA) @@ -56,13 +61,13 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) int ib; int nb; const cuDoubleComplex *A; - int lda; + int ldA; const cuDoubleComplex *T; - int ldt; + int ldT; cuDoubleComplex *C; - int ldc; + int ldC; cuDoubleComplex *WORK; - int ldwork; + int ldWORK; RUNTIME_getStream(stream); @@ -71,9 +76,13 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &lda, &ldt, &ldc, &ldwork); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &ldWORK); - CUDA_zherfb( uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + CUDA_zherfb( uplo, n, k, ib, nb, A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -95,9 +104,9 @@ CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zherfb(const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *T, int Tm, int Tn, int ldT, + const CHAM_desc_t *C, int Cm, int Cn, int ldC) { struct starpu_codelet *codelet = &cl_zherfb; void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL; @@ -116,11 +125,8 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options, STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &nb, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_SCRATCH, options->ws_worker, STARPU_VALUE, &nb, sizeof(int), STARPU_PRIORITY, options->priority, @@ -129,4 +135,7 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options, STARPU_NAME, "zherfb", #endif 0); + (void)ldC; + (void)ldT; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 059705fd2..89ac73aa3 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -35,18 +36,22 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg) int k; double alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double beta; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldC = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); CORE_zherk(uplo, trans, n, k, - alpha, A, lda, - beta, C, ldc); + alpha, A, ldA, + beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -58,22 +63,26 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) int k; double alpha; const cuDoubleComplex *A; - int lda; + int ldA; double beta; cuDoubleComplex *C; - int ldc; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldC = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zherk( uplo, trans, n, k, - &alpha, A, lda, - &beta, C, ldc, + &alpha, A, ldA, + &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -98,8 +107,8 @@ CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zherk(const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, - double alpha, const CHAM_desc_t *A, int Am, int An, int lda, - double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + double alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zherk; @@ -118,14 +127,14 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &alpha, sizeof(double), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &beta, sizeof(double), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zherk", #endif 0); + (void)ldC; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c index c1b18c8bc..884f02aa1 100644 --- a/runtime/starpu/codelets/codelet_zhessq.c +++ b/runtime/starpu/codelets/codelet_zhessq.c @@ -24,10 +24,10 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { INSERT_TASK_zsyssq( options, storev, uplo, n, - A, Am, An, lda, + A, Am, An, ldA, SCALESUMSQ, SCALESUMSQm, SCALESUMSQn ); } diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 9893f4f29..1417bf5c0 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,14 +36,18 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) int displA; int displB; const CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB); - CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &displB); + CORE_zlacpy(uplo, M, N, A + displA, ldA, B + displB, ldB); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -58,8 +63,8 @@ CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) */ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, - int displA, const CHAM_desc_t *A, int Am, int An, int lda, - int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + int displA, const CHAM_desc_t *A, int Am, int An, int ldA, + int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { (void)nb; struct starpu_codelet *codelet = &cl_zlacpy; @@ -77,24 +82,24 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &displA, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &displB, sizeof(int), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlacpy", #endif 0); + (void)ldA; + (void)ldA; } void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { INSERT_TASK_zlacpyx( options, uplo, m, n, nb, - 0, A, Am, An, lda, - 0, B, Bm, Bn, ldb ); + 0, A, Am, An, ldA, + 0, B, Bm, Bn, ldB ); } diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index 79a12d480..21823b861 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -17,6 +17,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions mixed zc -> ds * @@ -30,14 +31,18 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex32_t *B; - int ldb; + int ldB; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_zlag2c( m, n, A, lda, B, ldb); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &m, &n); + CORE_zlag2c( m, n, A, ldA, B, ldB); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -53,8 +58,8 @@ CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) */ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB) { (void)nb; struct starpu_codelet *codelet = &cl_zlag2c; @@ -70,15 +75,17 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlag2c", #endif 0); + (void)ldB; + (void)ldA; + (void)ldB; + (void)ldA; } #if !defined(CHAMELEON_SIMULATION) @@ -87,14 +94,18 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex32_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb); - CORE_clag2z( m, n, A, lda, B, ldb); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &m, &n); + CORE_clag2z( m, n, A, ldA, B, ldB); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -105,8 +116,8 @@ CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) void INSERT_TASK_clag2z(const RUNTIME_option_t *options, int m, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB) { (void)nb; struct starpu_codelet *codelet = &cl_clag2z; @@ -122,13 +133,15 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "clag2z", #endif 0); + (void)ldB; + (void)ldA; + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index 7d6297db6..7ed044638 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -17,6 +17,7 @@ * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,14 +33,17 @@ static void cl_zlange_cpu_func(void *descr[], void *cl_arg) int M; int N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA); - CORE_zlange( norm, M, N, A, LDA, work, normA ); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &norm, &M, &N); + CORE_zlange( norm, M, N, A, ldA, work, normA ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -50,7 +54,7 @@ CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) void INSERT_TASK_zlange( const RUNTIME_option_t *options, cham_normtype_t norm, int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn ) { (void)NB; @@ -68,7 +72,6 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_SCRATCH, options->ws_worker, STARPU_W, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, @@ -77,6 +80,7 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options, STARPU_NAME, "zlange", #endif 0); + (void)ldA; } #if !defined(CHAMELEON_SIMULATION) @@ -122,4 +126,5 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, STARPU_NAME, "zlange_max", #endif 0); + } diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index 6ad883725..3428c08fa 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -17,6 +17,7 @@ * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -32,14 +33,17 @@ static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlanhe( norm, uplo, N, A, LDA, work, normA); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N); + CORE_zlanhe( norm, uplo, N, A, ldA, work, normA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -50,7 +54,7 @@ CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn) { struct starpu_codelet *codelet = &cl_zlanhe; @@ -67,7 +71,6 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_SCRATCH, options->ws_worker, STARPU_W, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, @@ -76,6 +79,7 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, STARPU_NAME, "zlanhe", #endif 0); + (void)ldA; (void)NB; } diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index cba12c33f..42a4a4ee4 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -17,6 +17,7 @@ * @author Julien Langou * @author Henricus Bouwmeester * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,14 +33,16 @@ static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA); - CORE_zlansy( norm, uplo, N, A, LDA, work, normA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N); + CORE_zlansy( norm, uplo, N, A, ldA, work, normA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -50,7 +53,7 @@ CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) void INSERT_TASK_zlansy( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn ) { (void)NB; @@ -68,7 +71,6 @@ void INSERT_TASK_zlansy( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_SCRATCH, options->ws_worker, STARPU_W, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, @@ -77,4 +79,5 @@ void INSERT_TASK_zlansy( const RUNTIME_option_t *options, STARPU_NAME, "zlansy", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index f76696297..1154bfd47 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -29,14 +29,16 @@ static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) cham_normtype_t norm, uplo, diag; int M, N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA); - CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N); + CORE_zlantr( norm, uplo, diag, M, N, A, ldA, work, normA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -48,7 +50,7 @@ CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) void INSERT_TASK_zlantr( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int M, int N, int NB, - const CHAM_desc_t *A, int Am, int An, int LDA, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_zlantr; @@ -67,7 +69,6 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options, STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_SCRATCH, options->ws_worker, STARPU_W, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, @@ -76,6 +77,7 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options, STARPU_NAME, "zlantr", #endif 0); + (void)ldA; (void)NB; } diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 19b761154..c454234a5 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -15,6 +15,7 @@ * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Dalal Sukkari + * @author Lucas Barros de Assis * @date 2016-11-30 * @precisions normal z -> c d s * @@ -30,11 +31,13 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlascal(uplo, M, N, alpha, A, LDA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha); + CORE_zlascal(uplo, M, N, alpha, A, ldA); return; } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -64,10 +67,10 @@ CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N. + * Matrix of size ldA-by-N. * - * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,M) + * @param[in] ldA + * Leading dimension of the array A. ldA >= max(1,M) * ******************************************************************************* * @@ -79,7 +82,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An, int lda) + const CHAM_desc_t *A, int Am, int An, int ldA) { (void)nb; struct starpu_codelet *codelet = &cl_zlascal; @@ -96,11 +99,11 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlascal", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index 2ad9dc2e9..ba27e1925 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -18,6 +18,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -34,11 +35,13 @@ static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA); - CORE_zlaset(uplo, M, N, alpha, beta, A, LDA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta); + CORE_zlaset(uplo, M, N, alpha, beta, A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -78,14 +81,14 @@ CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) * On entry, the M-by-N tile A. * On exit, A has been set accordingly. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * */ void INSERT_TASK_zlaset(const RUNTIME_option_t *options, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, - const CHAM_desc_t *A, int Am, int An, int LDA) + const CHAM_desc_t *A, int Am, int An, int ldA) { struct starpu_codelet *codelet = &cl_zlaset; @@ -103,11 +106,11 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlaset", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index 2cba6a547..c7efc2a9c 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -18,6 +18,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -33,11 +34,12 @@ static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA); - CORE_zlaset2(uplo, M, N, alpha, A, LDA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha); + CORE_zlaset2(uplo, M, N, alpha, A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -75,13 +77,13 @@ CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) * On entry, the M-by-N tile A. * On exit, A has been set to alpha accordingly. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * */ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, cham_uplo_t uplo, int M, int N, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA) { struct starpu_codelet *codelet = &cl_zlaset2; @@ -98,11 +100,11 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &LDA, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlaset2", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index 5c635e08f..f2a1b0779 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2016-12-09 * @precisions normal z -> c d s * @@ -34,14 +35,17 @@ static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) int M; int N; const CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB); - CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N); + CORE_zlatro(uplo, trans, M, N, A, ldA, B, ldB); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -58,8 +62,8 @@ CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) void INSERT_TASK_zlatro( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { struct starpu_codelet *codelet = &cl_zlatro; void (*callback)(void*) = NULL; @@ -76,14 +80,14 @@ void INSERT_TASK_zlatro( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlatro", #endif 0); + (void)ldA; + (void)ldB; (void)mb; } diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 361be4a06..31f742e60 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,11 +33,13 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); - CORE_zlauum(uplo, N, A, LDA); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &uplo, &N); + CORE_zlauum(uplo, N, A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -52,7 +55,7 @@ CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) */ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda ) + const CHAM_desc_t *A, int Am, int An, int ldA ) { (void)nb; struct starpu_codelet *codelet = &cl_zlauum; @@ -67,11 +70,12 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zlauum", #endif 0); + + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 39cf74565..fc3a062f7 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -35,15 +36,17 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int bigM; int m0; int n0; unsigned long long int seed; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed ); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); + CORE_zplghe( bump, m, n, A, ldA, bigM, m0, n0, seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -53,7 +56,7 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) void INSERT_TASK_zplghe( const RUNTIME_option_t *options, - double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA, int bigM, int m0, int n0, unsigned long long int seed ) { struct starpu_codelet *codelet = &cl_zplghe; @@ -69,7 +72,6 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &bigM, sizeof(int), STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &n0, sizeof(int), @@ -80,4 +82,5 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options, STARPU_NAME, "zplghe", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index 216e61f39..e98a40ea0 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,15 +36,17 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int bigM; int m0; int n0; unsigned long long int seed; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed ); + CORE_zplgsy( bump, m, n, A, ldA, bigM, m0, n0, seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -53,7 +56,7 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, - CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -70,7 +73,6 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &bigM, sizeof(int), STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &n0, sizeof(int), @@ -81,4 +83,5 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, STARPU_NAME, "zplgsy", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index f3b984c6b..8ec8b9609 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,15 +33,17 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int bigM; int m0; int n0; unsigned long long int seed; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed ); - CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed ); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + + starpu_codelet_unpack_args(cl_arg, &m, &n, &bigM, &m0, &n0, &seed ); + CORE_zplrnt( m, n, A, ldA, bigM, m0, n0, seed ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -50,7 +53,7 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, - int m, int n, const CHAM_desc_t *A, int Am, int An, int lda, + int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -66,7 +69,6 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &bigM, sizeof(int), STARPU_VALUE, &m0, sizeof(int), STARPU_VALUE, &n0, sizeof(int), @@ -77,4 +79,5 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, STARPU_NAME, "zplrnt", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index 9de8abdd5..c0bead183 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -32,16 +33,17 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; int info = 0; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request); - CORE_zpotrf(uplo, n, A, lda, &info); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo, &sequence, &request); + CORE_zpotrf(uplo, n, A, ldA, &info); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -61,7 +63,7 @@ CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) */ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, int iinfo) { (void)nb; @@ -77,7 +79,6 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &iinfo, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), @@ -88,4 +89,5 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options, STARPU_NAME, "zpotrf", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 33f7a5eca..a46520512 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -36,21 +37,25 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) int k; int ib; CHAMELEON_Complex64_t *A1; - int lda1; + int ldA1; CHAMELEON_Complex64_t *A2; - int lda2; + int ldA2; CHAMELEON_Complex64_t *L1; - int ldl1; + int ldL1; CHAMELEON_Complex64_t *L2; - int ldl2; + int ldL2; int *IPIV; A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); - CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); + ldA1 = STARPU_MATRIX_GET_LD( descr[0] ); + ldA2 = STARPU_MATRIX_GET_LD( descr[1] ); + ldL1 = STARPU_MATRIX_GET_LD( descr[2] ); + ldL2 = STARPU_MATRIX_GET_LD( descr[3] ); + starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &IPIV); + CORE_zssssm(m1, n1, m2, n2, k, ib, A1, ldA1, A2, ldA2, L1, ldL1, L2, ldL2, IPIV); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -95,28 +100,28 @@ CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) * On entry, the M1-by-N1 tile A1. * On exit, A1 is updated by the application of L (L1 L2). * - * @param[in] LDA1 - * The leading dimension of the array A1. LDA1 >= max(1,M1). + * @param[in] ldA1 + * The leading dimension of the array A1. ldA1 >= max(1,M1). * * @param[in,out] A2 * On entry, the M2-by-N2 tile A2. * On exit, A2 is updated by the application of L (L1 L2). * - * @param[in] LDA2 - * The leading dimension of the array A2. LDA2 >= max(1,M2). + * @param[in] ldA2 + * The leading dimension of the array A2. ldA2 >= max(1,M2). * * @param[in] L1 * The IB-by-K lower triangular tile as returned by * CORE_ztstrf. * - * @param[in] LDL1 - * The leading dimension of the array L1. LDL1 >= max(1,IB). + * @param[in] ldL1 + * The leading dimension of the array L1. ldL1 >= max(1,IB). * * @param[in] L2 * The M2-by-K tile as returned by CORE_ztstrf. * - * @param[in] LDL2 - * The leading dimension of the array L2. LDL2 >= max(1,M2). + * @param[in] ldL2 + * The leading dimension of the array L2. ldL2 >= max(1,M2). * * @param[in] IPIV * The pivot indices array of size K as returned by @@ -130,10 +135,10 @@ CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) */ void INSERT_TASK_zssssm( const RUNTIME_option_t *options, int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *L1, int L1m, int L1n, int ldl1, - const CHAM_desc_t *L2, int L2m, int L2n, int ldl2, + const CHAM_desc_t *A1, int A1m, int A1n, int ldA1, + const CHAM_desc_t *A2, int A2m, int A2n, int ldA2, + const CHAM_desc_t *L1, int L1m, int L1n, int ldL1, + const CHAM_desc_t *L2, int L2m, int L2n, int ldL2, const int *IPIV ) { (void)nb; @@ -156,13 +161,9 @@ void INSERT_TASK_zssssm( const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), STARPU_R, RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n), - STARPU_VALUE, &ldl1, sizeof(int), STARPU_R, RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n), - STARPU_VALUE, &ldl2, sizeof(int), STARPU_VALUE, &IPIV, sizeof(int*), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 88d161a26..fc22d08f9 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,22 +36,25 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *C; - int LDC; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta); CORE_zsymm(side, uplo, M, N, - alpha, A, LDA, - B, LDB, - beta, C, LDC); + alpha, A, ldA, + B, ldB, + beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -62,26 +66,29 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) int N; cuDoubleComplex alpha; const cuDoubleComplex *A; - int LDA; + int ldA; const cuDoubleComplex *B; - int LDB; + int ldB; cuDoubleComplex beta; cuDoubleComplex *C; - int LDC; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zsymm( side, uplo, M, N, - &alpha, A, LDA, - B, LDB, - &beta, C, LDC, + &alpha, A, ldA, + B, ldB, + &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -106,9 +113,9 @@ CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zsymm(const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zsymm; @@ -128,16 +135,16 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zsymm", #endif 0); + (void)ldC; + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 26e135bf7..45b5377a7 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,19 +36,22 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg) int k; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); CORE_zsyr2k(uplo, trans, - n, k, alpha, A, lda, B, ldb, beta, C, ldc); + n, k, alpha, A, ldA, B, ldB, beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -59,22 +63,25 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) int k; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; const cuDoubleComplex *B; - int ldb; + int ldB; cuDoubleComplex beta; cuDoubleComplex *C; - int ldc; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zsyr2k( uplo, trans, - n, k, &alpha, A, lda, B, ldb, &beta, C, ldc, + n, k, &alpha, A, ldA, B, ldB, &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -99,9 +106,9 @@ CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zsyr2k; @@ -121,16 +128,16 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zsyr2k", #endif 0); + (void)ldC; + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 3d3368546..cd195c591 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,18 +36,21 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg) int k; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); + + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldC = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); CORE_zsyrk(uplo, trans, n, k, - alpha, A, lda, - beta, C, ldc); + alpha, A, ldA, + beta, C, ldC); } #ifdef CHAMELEON_USE_CUDA @@ -58,22 +62,24 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) int k; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; cuDoubleComplex beta; cuDoubleComplex *C; - int ldc; + int ldC; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldA = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta); RUNTIME_getStream(stream); CUDA_zsyrk( uplo, trans, n, k, - &alpha, A, lda, - &beta, C, ldc, + &alpha, A, ldA, + &beta, C, ldC, stream); #ifndef STARPU_CUDA_ASYNC @@ -98,8 +104,8 @@ CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int n, int k, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC) { (void)nb; struct starpu_codelet *codelet = &cl_zsyrk; @@ -118,14 +124,14 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "zsyrk", #endif 0); + (void)ldC; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 30980d669..678fb8d02 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -29,13 +29,14 @@ static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double *SCALESUMSQ; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &n, &lda); - CORE_zsyssq( storev, uplo, n, A, lda, SCALESUMSQ ); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &n); + CORE_zsyssq( storev, uplo, n, A, ldA, SCALESUMSQ ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -46,7 +47,7 @@ CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int n, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_zsyssq; @@ -63,7 +64,6 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -71,4 +71,5 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, STARPU_NAME, "zsyssq", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index 1200f2f77..3f4d7f107 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -19,6 +19,7 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Marc Sergent + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c * @@ -32,13 +33,13 @@ static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) cham_uplo_t uplo; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; int iinfo; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); - CORE_zsytf2_nopiv(uplo, n, A, lda); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo); + CORE_zsytf2_nopiv(uplo, n, A, ldA); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -49,7 +50,7 @@ CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, int iinfo ) { (void)nb; @@ -65,7 +66,6 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &iinfo, sizeof(int), /* STARPU_SCRATCH, options->ws_worker, */ STARPU_PRIORITY, options->priority, @@ -74,4 +74,5 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, STARPU_NAME, "zsytrf_nopiv", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index e15fde664..3809706d5 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -13,6 +13,7 @@ * * @version 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2018-01-31 * @precisions normal z -> s d c * @@ -28,24 +29,25 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) int L; int ib; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *WORK; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldT = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib ); - starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, - &lda, &ldb, &ldt ); - - CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldt ); + CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldT ); CORE_ztplqt( M, N, L, ib, - A, lda, B, ldb, T, ldt, WORK ); + A, ldA, B, ldB, T, ldT, WORK ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -56,9 +58,9 @@ CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func) void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + const CHAM_desc_t *T, int Tm, int Tn, int ldT ) { struct starpu_codelet *codelet = &cl_ztplqt; void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL; @@ -76,11 +78,8 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, STARPU_VALUE, &L, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), /* Other options */ STARPU_SCRATCH, options->ws_worker, STARPU_PRIORITY, options->priority, @@ -92,6 +91,8 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, STARPU_NAME, (L == 0) ? "ztplqs" : "ztplqt", #endif 0); + (void)ldB; + (void)ldA; (void)ib; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 025c60f0c..15f9be5a2 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -11,6 +11,7 @@ * * @version 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2018-01-31 * @precisions normal z -> s d c * @@ -29,13 +30,13 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg) int L; int ib; const CHAMELEON_Complex64_t *V; - int ldv; + int ldV; const CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t *WORK; size_t lwork; @@ -44,12 +45,14 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg) A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, - &ldv, &ldt, &lda, &ldb, &lwork ); + ldV = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[2] ); + ldB = STARPU_MATRIX_GET_LD( descr[3] ); + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork ); CORE_ztpmlqt( side, trans, M, N, K, L, ib, - V, ldv, T, ldt, A, lda, B, ldb, WORK ); + V, ldV, T, ldT, A, ldA, B, ldB, WORK ); (void)lwork; } @@ -65,13 +68,13 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) int L; int ib; const cuDoubleComplex *V; - int ldv; + int ldV; const cuDoubleComplex *T; - int ldt; + int ldT; cuDoubleComplex *A; - int lda; + int ldA; cuDoubleComplex *B; - int ldb; + int ldB; cuDoubleComplex *W; size_t lwork; @@ -80,15 +83,18 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */ + ldV = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[2] ); + ldB = STARPU_MATRIX_GET_LD( descr[3] ); - starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, - &ldv, &ldt, &lda, &ldb, &lwork ); + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork ); RUNTIME_getStream(stream); CUDA_ztpmlqt( side, trans, M, N, K, L, ib, - V, ldv, T, ldt, A, lda, B, ldb, + V, ldV, T, ldT, A, ldA, B, ldB, W, lwork, stream ); #ifndef STARPU_CUDA_ASYNC @@ -106,10 +112,10 @@ CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYN void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + const CHAM_desc_t *V, int Vm, int Vn, int ldV, + const CHAM_desc_t *T, int Tm, int Tn, int ldT, + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { struct starpu_codelet *codelet = &cl_ztpmlqt; void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL; @@ -131,13 +137,9 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, STARPU_VALUE, &L, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &(options->ws_wsize), sizeof(size_t), /* Other options */ STARPU_SCRATCH, options->ws_worker, @@ -150,6 +152,9 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, STARPU_NAME, (( L == 0 ) ? "ztsmlq" : "ztpmlqt"), #endif 0); + (void)ldA; + (void)ldT; + (void)ldV; (void)ib; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index b149751e6..ff225663d 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -11,6 +11,7 @@ * * @version 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2016-12-16 * @precisions normal z -> s d c * @@ -29,13 +30,13 @@ static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg) int L; int ib; const CHAMELEON_Complex64_t *V; - int ldv; + int ldV; const CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t *WORK; size_t lwork; @@ -44,12 +45,14 @@ static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg) A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, - &ldv, &ldt, &lda, &ldb, &lwork ); + ldV = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[2] ); + ldB = STARPU_MATRIX_GET_LD( descr[3] ); + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork ); CORE_ztpmqrt( side, trans, M, N, K, L, ib, - V, ldv, T, ldt, A, lda, B, ldb, WORK ); + V, ldV, T, ldT, A, ldA, B, ldB, WORK ); (void)lwork; } @@ -66,13 +69,13 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) int L; int ib; const cuDoubleComplex *V; - int ldv; + int ldV; const cuDoubleComplex *T; - int ldt; + int ldT; cuDoubleComplex *A; - int lda; + int ldA; cuDoubleComplex *B; - int ldb; + int ldB; cuDoubleComplex *W; size_t lwork; @@ -81,15 +84,17 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 3*ib*nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, - &ldv, &ldt, &lda, &ldb, &lwork ); + ldV = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldA = STARPU_MATRIX_GET_LD( descr[2] ); + ldB = STARPU_MATRIX_GET_LD( descr[3] ); + starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork ); RUNTIME_getStream(stream); CUDA_ztpmqrt( side, trans, M, N, K, L, ib, - V, ldv, T, ldt, A, lda, B, ldb, + V, ldV, T, ldT, A, ldA, B, ldB, W, lwork, stream ); #ifndef STARPU_CUDA_ASYNC @@ -107,10 +112,10 @@ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYN void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int M, int N, int K, int L, int ib, int nb, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + const CHAM_desc_t *V, int Vm, int Vn, int ldV, + const CHAM_desc_t *T, int Tm, int Tn, int ldT, + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { struct starpu_codelet *codelet = &cl_ztpmqrt; void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL; @@ -132,13 +137,9 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, STARPU_VALUE, &L, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_VALUE, &(options->ws_wsize), sizeof(size_t), /* Other options */ STARPU_SCRATCH, options->ws_worker, @@ -151,6 +152,9 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"), #endif 0); + (void)ldA; + (void)ldT; + (void)ldV; (void)ib; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index dbe95343f..26962b5cb 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -28,24 +28,25 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) int L; int ib; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *WORK; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + ldT = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib ); - starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib, - &lda, &ldb, &ldt ); - - CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt ); + CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldT ); CORE_ztpqrt( M, N, L, ib, - A, lda, B, ldb, T, ldt, WORK ); + A, ldA, B, ldB, T, ldT, WORK ); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -56,9 +57,9 @@ CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, int M, int N, int L, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB, + const CHAM_desc_t *T, int Tm, int Tn, int ldT ) { struct starpu_codelet *codelet = &cl_ztpqrt; void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL; @@ -76,11 +77,8 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, STARPU_VALUE, &L, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), /* Other options */ STARPU_SCRATCH, options->ws_worker, STARPU_PRIORITY, options->priority, @@ -92,6 +90,8 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, STARPU_NAME, "ztpqrt", #endif 0); + (void)ldB; + (void)ldA; (void)ib; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index 8204416f5..ada8343d7 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -15,6 +15,7 @@ * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2015-11-03 * @precisions normal z -> c d s * @@ -31,15 +32,17 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); - CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &beta); + CORE_ztradd(uplo, trans, M, N, alpha, A, ldA, beta, B, ldB); return; } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -85,22 +88,22 @@ CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) * Scalar factor of A. * * @param[in] A - * Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M + * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M * otherwise. * - * @param[in] LDA - * Leading dimension of the array A. LDA >= max(1,k), with k=M, if + * @param[in] ldA + * Leading dimension of the array A. ldA >= max(1,k), with k=M, if * trans = ChamNoTrans, and k=N otherwise. * * @param[in] beta * Scalar factor of B. * * @param[in,out] B - * Matrix of size LDB-by-N. + * Matrix of size ldB-by-N. * On exit, B = alpha * op(A) + beta * B * - * @param[in] LDB - * Leading dimension of the array B. LDB >= max(1,M) + * @param[in] ldB + * Leading dimension of the array B. ldB >= max(1,M) * ******************************************************************************* * @@ -110,8 +113,8 @@ CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) */ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldB ) { struct starpu_codelet *codelet = &cl_ztradd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; @@ -129,16 +132,15 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztradd", #endif 0); + (void)ldA; (void)nb; } diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index d2528c349..a616b14c8 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -15,6 +15,7 @@ * @comment This file has been automatically generated * from Plasma 2.6.0 for CHAMELEON 0.9.2 * @author Mathieu Faverge + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -31,13 +32,14 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) int M; int N; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double *work; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda); - CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N); + CORE_ztrasm(storev, uplo, diag, M, N, A, ldA, work); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -48,7 +50,7 @@ CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *B, int Bm, int Bn ) { struct starpu_codelet *codelet = &cl_ztrasm; @@ -67,7 +69,6 @@ void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, double, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -75,4 +76,5 @@ void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, STARPU_NAME, "ztrasm", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 32fb27c69..322c2326c 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -37,18 +38,21 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg) int N; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; CHAMELEON_Complex64_t *B; - int LDB; + int ldB; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); CORE_ztrmm(side, uplo, transA, diag, M, N, - alpha, A, LDA, - B, LDB); + alpha, A, ldA, + B, ldB); } #ifdef CHAMELEON_USE_CUDA @@ -62,13 +66,15 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) int N; cuDoubleComplex alpha; const cuDoubleComplex *A; - int LDA; + int ldA; cuDoubleComplex *B; - int LDB; + int ldB; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha); RUNTIME_getStream(stream); @@ -76,8 +82,8 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) side, uplo, transA, diag, M, N, - &alpha, A, LDA, - B, LDB, + &alpha, A, ldA, + B, ldB, stream); #ifndef STARPU_CUDA_ASYNC @@ -103,8 +109,8 @@ CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB) { (void)nb; struct starpu_codelet *codelet = &cl_ztrmm; @@ -125,13 +131,13 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztrmm", #endif 0); + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index 03c554110..1d3281bdb 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -37,18 +38,20 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg) int n; CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *B; - int ldb; + int ldB; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); CORE_ztrsm(side, uplo, transA, diag, m, n, - alpha, A, lda, - B, ldb); + alpha, A, ldA, + B, ldB); } #ifdef CHAMELEON_USE_CUDA @@ -62,21 +65,23 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) int n; cuDoubleComplex alpha; const cuDoubleComplex *A; - int lda; + int ldA; cuDoubleComplex *B; - int ldb; + int ldB; A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldB = STARPU_MATRIX_GET_LD( descr[1] ); + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha); RUNTIME_getStream(stream); CUDA_ztrsm( side, uplo, transA, diag, m, n, - &alpha, A, lda, - B, ldb, + &alpha, A, ldA, + B, ldB, stream); #ifndef STARPU_CUDA_ASYNC @@ -101,8 +106,8 @@ CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int m, int n, int nb, - CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *B, int Bm, int Bn, int ldb) + CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *B, int Bm, int Bn, int ldB) { (void)nb; struct starpu_codelet *codelet = &cl_ztrsm; @@ -123,13 +128,13 @@ void INSERT_TASK_ztrsm(const RUNTIME_option_t *options, STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), - STARPU_VALUE, &ldb, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) STARPU_NAME, "ztrsm", #endif 0); + (void)ldB; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index 110ba5001..4374a51f4 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -30,13 +30,14 @@ static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) int m; int n; CHAMELEON_Complex64_t *A; - int lda; + int ldA; double *SCALESUMSQ; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda); - CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n); + CORE_ztrssq( uplo, diag, m, n, A, ldA, &SCALESUMSQ[0], &SCALESUMSQ[1]); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -48,7 +49,7 @@ CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_diag_t diag, int m, int n, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn ) { struct starpu_codelet *codelet = &cl_ztrssq; @@ -66,7 +67,6 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, @@ -74,4 +74,5 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, STARPU_NAME, "ztrssq", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 1106e602f..50b492106 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -33,16 +34,16 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) cham_diag_t diag; int N; CHAMELEON_Complex64_t *A; - int LDA; + int ldA; int iinfo; RUNTIME_sequence_t *sequence; RUNTIME_request_t *request; int info = 0; A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); - - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request); - CORE_ztrtri(uplo, diag, N, A, LDA, &info); + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &iinfo, &sequence, &request); + CORE_ztrtri(uplo, diag, N, A, ldA, &info); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -63,7 +64,7 @@ CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_diag_t diag, int n, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, + const CHAM_desc_t *A, int Am, int An, int ldA, int iinfo ) { (void)nb; @@ -80,7 +81,6 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, STARPU_VALUE, &diag, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_VALUE, &iinfo, sizeof(int), STARPU_VALUE, &(options->sequence), sizeof(RUNTIME_sequence_t*), STARPU_VALUE, &(options->request), sizeof(RUNTIME_request_t*), @@ -90,4 +90,5 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options, STARPU_NAME, "ztrtri", #endif 0); + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index b5cad6b05..9056a098b 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -15,6 +15,7 @@ * @author Hatem Ltaief * @author Mathieu Faverge * @author Azzam Haidar + * @author Lucas Barros de Assis * @date 2016-12-09 * @precisions normal z -> c d s * @@ -35,27 +36,29 @@ static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) int ib; int nb; CHAMELEON_Complex64_t *A1; - int lda1; + int ldA1; CHAMELEON_Complex64_t *A2; - int lda2; + int ldA2; CHAMELEON_Complex64_t *V; - int ldv; + int ldV; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *WORK; - int ldwork; + int ldWORK; A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */ - - starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork); + ldA1 = STARPU_MATRIX_GET_LD( descr[0] ); + ldA2 = STARPU_MATRIX_GET_LD( descr[1] ); + ldV = STARPU_MATRIX_GET_LD( descr[2] ); + ldT = STARPU_MATRIX_GET_LD( descr[3] ); + starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &nb, &ldWORK); CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); + ib, A1, ldA1, A2, ldA2, V, ldV, T, ldT, WORK, ldWORK); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -72,15 +75,15 @@ CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + const CHAM_desc_t *A1, int A1m, int A1n, int ldA1, + const CHAM_desc_t *A2, int A2m, int A2n, int ldA2, + const CHAM_desc_t *V, int Vm, int Vn, int ldV, + const CHAM_desc_t *T, int Tm, int Tn, int ldT ) { struct starpu_codelet *codelet = &cl_ztsmlq_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; + int ldWORK = side == ChamLeft ? ib : nb; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A1, A1m, A1n); @@ -101,15 +104,11 @@ void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options, STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &nb, sizeof(int), STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), + STARPU_VALUE, &ldWORK, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index 0320559bd..9f3c2d3fb 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -15,6 +15,7 @@ * @author Hatem Ltaief * @author Mathieu Faverge * @author Azzam Haidar + * @author Lucas Barros de Assis * @date 2016-12-09 * @precisions normal z -> c d s * @@ -34,28 +35,31 @@ static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) int k; int ib; CHAMELEON_Complex64_t *A1; - int lda1; + int ldA1; CHAMELEON_Complex64_t *A2; - int lda2; + int ldA2; CHAMELEON_Complex64_t *V; - int ldv; + int ldV; CHAMELEON_Complex64_t *T; - int ldt; + int ldT; /* TODO: manage workspace */ CHAMELEON_Complex64_t *WORK; - int ldwork; + int ldWORK; A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); V = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); - + ldA1 = STARPU_MATRIX_GET_LD( descr[0] ); + ldA2 = STARPU_MATRIX_GET_LD( descr[1] ); + ldV = STARPU_MATRIX_GET_LD( descr[2] ); + ldT = STARPU_MATRIX_GET_LD( descr[3] ); starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, - &ib, &lda1, &lda2, &ldv, &ldt, &ldwork); + &ib, &ldWORK); CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, - ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork); + ib, A1, ldA1, A2, ldA2, V, ldV, T, ldT, WORK, ldWORK); } #endif /* !defined(CHAMELEON_SIMULATION) */ @@ -72,15 +76,15 @@ CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, - const CHAM_desc_t *A1, int A1m, int A1n, int lda1, - const CHAM_desc_t *A2, int A2m, int A2n, int lda2, - const CHAM_desc_t *V, int Vm, int Vn, int ldv, - const CHAM_desc_t *T, int Tm, int Tn, int ldt ) + const CHAM_desc_t *A1, int A1m, int A1n, int ldA1, + const CHAM_desc_t *A2, int A2m, int A2n, int ldA2, + const CHAM_desc_t *V, int Vm, int Vn, int ldV, + const CHAM_desc_t *T, int Tm, int Tn, int ldT ) { struct starpu_codelet *codelet = &cl_ztsmqr_hetra1; void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL; - int ldwork = side == ChamLeft ? ib : nb; + int ldWORK = side == ChamLeft ? ib : nb; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_RW(A1, A1m, A1n); @@ -100,15 +104,11 @@ void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), - STARPU_VALUE, &lda1, sizeof(int), STARPU_RW, RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), - STARPU_VALUE, &lda2, sizeof(int), STARPU_R, RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn), - STARPU_VALUE, &ldv, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_SCRATCH, options->ws_worker, - STARPU_VALUE, &ldwork, sizeof(int), + STARPU_VALUE, &ldWORK, sizeof(int), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index 4d740fc86..a711810e9 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -35,14 +36,14 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) int ib; int nb; CHAMELEON_Complex64_t *U; - int ldu; + int ldU; CHAMELEON_Complex64_t *A; - int lda; + int ldA; CHAMELEON_Complex64_t *L; - int ldl; + int ldL; int *IPIV; CHAMELEON_Complex64_t *WORK; - int ldwork; + int ldWORK; cham_bool_t check_info; int iinfo; RUNTIME_sequence_t *sequence; @@ -53,12 +54,14 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, - &IPIV, &d_work, &ldwork, &check_info, &iinfo, + ldU = STARPU_MATRIX_GET_LD( descr[0] ); + ldA = STARPU_MATRIX_GET_LD( descr[1] ); + ldL = STARPU_MATRIX_GET_LD( descr[2] ); + starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, + &IPIV, &d_work, &ldWORK, &check_info, &iinfo, &sequence, &request); - CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); + CORE_ztstrf(m, n, ib, nb, U, ldU, A, ldA, L, ldL, IPIV, WORK, ldWORK, &info); if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) { RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info ); @@ -98,22 +101,22 @@ CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) * On entry, the NB-by-N upper triangular tile. * On exit, the new factor U from the factorization * - * @param[in] LDU - * The leading dimension of the array U. LDU >= max(1,NB). + * @param[in] ldU + * The leading dimension of the array U. ldU >= max(1,NB). * * @param[in,out] A * On entry, the M-by-N tile to be factored. * On exit, the factor L from the factorization * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,M). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,M). * * @param[in,out] L * On entry, the IB-by-N lower triangular tile. * On exit, the interchanged rows form the tile A in case of pivoting. * - * @param[in] LDL - * The leading dimension of the array L. LDL >= max(1,IB). + * @param[in] ldL + * The leading dimension of the array L. ldL >= max(1,IB). * * @param[out] IPIV * The pivot indices; for 1 <= i <= min(M,N), row i of the @@ -121,7 +124,7 @@ CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) * * @param[in,out] WORK * - * @param[in] LDWORK + * @param[in] ldWORK * The dimension of the array WORK. * * @param[out] INFO @@ -138,9 +141,9 @@ CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) */ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, int m, int n, int ib, int nb, - const CHAM_desc_t *U, int Um, int Un, int ldu, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *L, int Lm, int Ln, int ldl, + const CHAM_desc_t *U, int Um, int Un, int ldU, + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *L, int Lm, int Ln, int ldL, int *IPIV, cham_bool_t check_info, int iinfo ) { @@ -162,11 +165,8 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &nb, sizeof(int), STARPU_RW, RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un), - STARPU_VALUE, &ldu, sizeof(int), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_W, RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln), - STARPU_VALUE, &ldl, sizeof(int), STARPU_VALUE, &IPIV, sizeof(int*), STARPU_SCRATCH, options->ws_worker, STARPU_VALUE, &d_work, sizeof(CHAMELEON_starpu_ws_t *), @@ -181,4 +181,7 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, STARPU_NAME, "ztstrf", #endif 0); + (void)ldL; + (void)ldA; + (void)ldU; } diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index be36f957d..dd9c8d7dd 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -20,6 +20,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -37,24 +38,26 @@ static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) int k; int ib; const CHAMELEON_Complex64_t *A; - int lda; + int ldA; const CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; CHAMELEON_Complex64_t *WORK; - int ldwork; + int ldWORK; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK); CORE_zunmlq(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); + A, ldA, T, ldT, C, ldC, WORK, ldWORK); } #if defined(CHAMELEON_USE_CUDA) @@ -68,21 +71,23 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) int ib; const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; + int ldA, ldT, ldC, ldWORK; - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); RUNTIME_getStream(stream); CUDA_zunmlqt( side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -140,37 +145,37 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) * The inner-blocking size. IB >= 0. * * @param[in] A - * Dimension: (LDA,M) if SIDE = ChamLeft, - * (LDA,N) if SIDE = ChamRight, + * Dimension: (ldA,M) if SIDE = ChamLeft, + * (ldA,N) if SIDE = ChamRight, * The i-th row must contain the vector which defines the * elementary reflector H(i), for i = 1,2,...,k, as returned by * CORE_zgelqt in the first k rows of its array argument A. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,K). + * @param[in] ldA + * The leading dimension of the array A. ldA >= max(1,K). * * @param[in] T * The IB-by-K triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. + * @param[in] ldT + * The leading dimension of the array T. ldT >= IB. * * @param[in,out] C * On entry, the M-by-N tile C. * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * - * @param[in] LDC - * The leading dimension of the array C. LDC >= max(1,M). + * @param[in] ldC + * The leading dimension of the array C. ldC >= max(1,M). * * @param[in,out] WORK - * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * On exit, if INFO = 0, WORK(1) returns the optimal ldWORK. * - * @param[in] LDWORK + * @param[in] ldWORK * The dimension of the array WORK. - * If SIDE = ChamLeft, LDWORK >= max(1,N); - * if SIDE = ChamRight, LDWORK >= max(1,M). + * If SIDE = ChamLeft, ldWORK >= max(1,N); + * if SIDE = ChamRight, ldWORK >= max(1,M). * ******************************************************************************* * @@ -181,9 +186,9 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *T, int Tm, int Tn, int ldT, + const CHAM_desc_t *C, int Cm, int Cn, int ldC ) { struct starpu_codelet *codelet = &cl_zunmlq; void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL; @@ -203,11 +208,8 @@ void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), /* ib * nb */ STARPU_SCRATCH, options->ws_worker, STARPU_VALUE, &nb, sizeof(int), @@ -217,4 +219,7 @@ void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, STARPU_NAME, "zunmlq", #endif 0); + + (void)ldT; + (void)ldA; } diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index 8ff98bc79..a8ef47db4 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -19,6 +19,7 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede + * @author Lucas Barros de Assis * @date 2014-11-16 * @precisions normal z -> c d s * @@ -36,24 +37,26 @@ static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) int k; int ib; const CHAMELEON_Complex64_t *A; - int lda; + int ldA; const CHAMELEON_Complex64_t *T; - int ldt; + int ldT; CHAMELEON_Complex64_t *C; - int ldc; + int ldC; CHAMELEON_Complex64_t *WORK; - int ldwork; +int ldWORK; A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK); CORE_zunmqr(side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork); + A, ldA, T, ldT, C, ldC, WORK, ldWORK); } #if defined(CHAMELEON_USE_CUDA) @@ -67,21 +70,23 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) int ib; const cuDoubleComplex *A, *T; cuDoubleComplex *C, *WORK; - int lda, ldt, ldc, ldwork; + int ldA, ldT, ldC, ldWORK; - starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, - &lda, &ldt, &ldc, &ldwork); + starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + ldA = STARPU_MATRIX_GET_LD( descr[0] ); + ldT = STARPU_MATRIX_GET_LD( descr[1] ); + ldC = STARPU_MATRIX_GET_LD( descr[2] ); RUNTIME_getStream(stream); CUDA_zunmqrt( side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); + A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream ); #ifndef STARPU_CUDA_ASYNC cudaStreamSynchronize( stream ); @@ -139,38 +144,38 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) * The inner-blocking size. IB >= 0. * * @param[in] A - * Dimension: (LDA,K) + * Dimension: (ldA,K) * The i-th column must contain the vector which defines the * elementary reflector H(i), for i = 1,2,...,k, as returned by * CORE_zgeqrt in the first k columns of its array argument A. * - * @param[in] LDA + * @param[in] ldA * The leading dimension of the array A. - * If SIDE = ChamLeft, LDA >= max(1,M); - * if SIDE = ChamRight, LDA >= max(1,N). + * If SIDE = ChamLeft, ldA >= max(1,M); + * if SIDE = ChamRight, ldA >= max(1,N). * * @param[in] T * The IB-by-K triangular factor T of the block reflector. * T is upper triangular by block (economic storage); * The rest of the array is not referenced. * - * @param[in] LDT - * The leading dimension of the array T. LDT >= IB. + * @param[in] ldT + * The leading dimension of the array T. ldT >= IB. * * @param[in,out] C * On entry, the M-by-N tile C. * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * - * @param[in] LDC - * The leading dimension of the array C. LDC >= max(1,M). + * @param[in] ldC + * The leading dimension of the array C. ldC >= max(1,M). * * @param[in,out] WORK - * On exit, if INFO = 0, WORK(1) returns the optimal LDWORK. + * On exit, if INFO = 0, WORK(1) returns the optimal ldWORK. * - * @param[in] LDWORK + * @param[in] ldWORK * The dimension of the array WORK. - * If SIDE = ChamLeft, LDWORK >= max(1,N); - * if SIDE = ChamRight, LDWORK >= max(1,M). + * If SIDE = ChamLeft, ldWORK >= max(1,N); + * if SIDE = ChamRight, ldWORK >= max(1,M). * ******************************************************************************* * @@ -181,9 +186,9 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, int m, int n, int k, int ib, int nb, - const CHAM_desc_t *A, int Am, int An, int lda, - const CHAM_desc_t *T, int Tm, int Tn, int ldt, - const CHAM_desc_t *C, int Cm, int Cn, int ldc ) + const CHAM_desc_t *A, int Am, int An, int ldA, + const CHAM_desc_t *T, int Tm, int Tn, int ldT, + const CHAM_desc_t *C, int Cm, int Cn, int ldC ) { struct starpu_codelet *codelet = &cl_zunmqr; void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL; @@ -203,11 +208,8 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &ib, sizeof(int), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), - STARPU_VALUE, &lda, sizeof(int), STARPU_R, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), - STARPU_VALUE, &ldt, sizeof(int), STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), - STARPU_VALUE, &ldc, sizeof(int), /* ib * nb */ STARPU_SCRATCH, options->ws_worker, STARPU_VALUE, &nb, sizeof(int), @@ -217,4 +219,7 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, STARPU_NAME, "zunmqr", #endif 0); + + (void)ldT; + (void)ldA; } -- GitLab