Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit aa0fa3e9 authored by BARROS DE ASSIS Lucas's avatar BARROS DE ASSIS Lucas Committed by Mathieu Faverge

Fix codelets

parent 9b777d34
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2 * from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) ...@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
int M; int M;
int N; int N;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
double *work; double *work;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); ldA = STARPU_MATRIX_GET_LD( descr[0] );
CORE_dzasum(storev, uplo, M, N, A, lda, work);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N);
CORE_dzasum(storev, uplo, M, N, A, ldA, work);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func) ...@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
void INSERT_TASK_dzasum( const RUNTIME_option_t *options, void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N, cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn ) const CHAM_desc_t *B, int Bm, int Bn )
{ {
struct starpu_codelet *codelet = &cl_dzasum; struct starpu_codelet *codelet = &cl_dzasum;
...@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, ...@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &N, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn), STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
...@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, ...@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_NAME, "dzasum", STARPU_NAME, "dzasum",
#endif #endif
0); 0);
(void)ldA;
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Guillaume Sylvand * @author Guillaume Sylvand
* @author Lucas Barros de Assis
* @date 2016-09-08 * @date 2016-09-08
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -31,19 +32,21 @@ ...@@ -31,19 +32,21 @@
static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
{ {
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int ld; int ldA;
void *user_data; void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ldA, void *user_data) ;
int row_min, row_max, col_min, col_max; int row_min, row_max, col_min, col_max;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); ldA = STARPU_MATRIX_GET_LD( descr[0] );
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback );
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the address 'buffer' with leading dimension 'ld' * and store it at the address 'buffer' with leading dimension 'ld'
*/ */
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); user_build_callback(row_min, row_max, col_min, col_max, A, ldA, user_data);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) ...@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
void INSERT_TASK_zbuild( const RUNTIME_option_t *options, void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
void *user_data, void* user_build_callback ) void *user_data, void* user_build_callback )
{ {
...@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, ...@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_VALUE, &col_min, sizeof(int), STARPU_VALUE, &col_min, sizeof(int),
STARPU_VALUE, &col_max, sizeof(int), STARPU_VALUE, &col_max, sizeof(int),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &user_data, sizeof(void*), STARPU_VALUE, &user_data, sizeof(void*),
STARPU_VALUE, &user_build_callback, sizeof(void*), STARPU_VALUE, &user_build_callback, sizeof(void*),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
...@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, ...@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_NAME, "zbuild", STARPU_NAME, "zbuild",
#endif #endif
0); 0);
(void)ldA;
} }
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) ...@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int N; int N;
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
const CHAMELEON_Complex64_t *A; const CHAMELEON_Complex64_t *A;
int LDA; int ldA;
CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *B; CHAMELEON_Complex64_t *B;
int LDB; int ldB;
A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); ldA = STARPU_MATRIX_GET_LD( descr[0] );
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
CORE_zgeadd(trans, M, N, alpha, A, ldA, beta, B, ldB);
return; return;
} }
...@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) ...@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
int N; int N;
cuDoubleComplex alpha; cuDoubleComplex alpha;
const cuDoubleComplex *A; const cuDoubleComplex *A;
int lda; int ldA;
cuDoubleComplex beta; cuDoubleComplex beta;
cuDoubleComplex *B; cuDoubleComplex *B;
int ldb; int ldB;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
RUNTIME_getStream( stream ); RUNTIME_getStream( stream );
CUDA_zgeadd( CUDA_zgeadd(
trans, trans,
M, N, M, N,
&alpha, A, lda, &alpha, A, ldA,
&beta, B, ldb, &beta, B, ldB,
stream); stream);
#ifndef STARPU_CUDA_ASYNC #ifndef STARPU_CUDA_ASYNC
...@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) ...@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
* Scalar factor of A. * Scalar factor of A.
* *
* @param[in] A * @param[in] A
* Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M
* otherwise. * otherwise.
* *
* @param[in] LDA * @param[in] ldA
* Leading dimension of the array A. LDA >= max(1,k), with k=M, if * Leading dimension of the array A. ldA >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise. * trans = ChamNoTrans, and k=N otherwise.
* *
* @param[in] beta * @param[in] beta
* Scalar factor of B. * Scalar factor of B.
* *
* @param[in,out] B * @param[in,out] B
* Matrix of size LDB-by-N. * Matrix of size ldB-by-N.
* On exit, B = alpha * op(A) + beta * B * On exit, B = alpha * op(A) + beta * B
* *
* @param[in] LDB * @param[in] ldB
* Leading dimension of the array B. LDB >= max(1,M) * Leading dimension of the array B. ldB >= max(1,M)
* *
******************************************************************************* *******************************************************************************
* *
...@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) ...@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
*/ */
void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb, cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldB )
{ {
struct starpu_codelet *codelet = &cl_zgeadd; struct starpu_codelet *codelet = &cl_zgeadd;
void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
...@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, ...@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME) #if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zgeadd", STARPU_NAME, "zgeadd",
#endif #endif
0); 0);
(void)ldA;
(void)nb; (void)nb;
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) ...@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
int n; int n;
int ib; int ib;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
CHAMELEON_Complex64_t *T; CHAMELEON_Complex64_t *T;
int ldt; int ldT;
CHAMELEON_Complex64_t *TAU, *WORK; CHAMELEON_Complex64_t *TAU, *WORK;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */ TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldT = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
WORK = TAU + chameleon_max( m, n ); WORK = TAU + chameleon_max( m, n );
CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldT );
CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); CORE_zgelqt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) ...@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
* with the array TAU, represent the unitary tile Q as a * with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details). * product of elementary reflectors (see Further Details).
* *
* @param[in] LDA * @param[in] ldA
* The leading dimension of the array A. LDA >= max(1,M). * The leading dimension of the array A. ldA >= max(1,M).
* *
* @param[out] T * @param[out] T
* The IB-by-N triangular factor T of the block reflector. * The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage); * T is upper triangular by block (economic storage);
* The rest of the array is not referenced. * The rest of the array is not referenced.
* *
* @param[in] LDT * @param[in] ldT
* The leading dimension of the array T. LDT >= IB. * The leading dimension of the array T. ldT >= IB.
* *
* @param[out] TAU * @param[out] TAU
* The scalar factors of the elementary reflectors (see Further * The scalar factors of the elementary reflectors (see Further
...@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) ...@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
*/ */
void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb, int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *T, int Tm, int Tn, int ldt) const CHAM_desc_t *T, int Tm, int Tn, int ldT)
{ {
(void)nb; (void)nb;
struct starpu_codelet *codelet = &cl_zgelqt; struct starpu_codelet *codelet = &cl_zgelqt;
...@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, ...@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( nb * (ib+1), ib * (ib+nb) ) */ /* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH, options->ws_worker, STARPU_SCRATCH, options->ws_worker,
/* /\* ib*n + 3*ib*ib + max(m,n) *\/ */ /* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
...@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, ...@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_NAME, "zgelqt", STARPU_NAME, "zgelqt",
#endif #endif
0); 0);
(void)ldT;
(void)ldA;
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) ...@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
int k; int k;
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
CHAMELEON_Complex64_t *B; CHAMELEON_Complex64_t *B;
int ldb; int ldB;
CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *C; CHAMELEON_Complex64_t *C;
int ldc; int ldC;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
CORE_zgemm(transA, transB, CORE_zgemm(transA, transB,
m, n, k, m, n, k,
alpha, A, lda, alpha, A, ldA,
B, ldb, B, ldB,
beta, C, ldc); beta, C, ldC);
} }
#ifdef CHAMELEON_USE_CUDA #ifdef CHAMELEON_USE_CUDA
...@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) ...@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
int k; int k;
cuDoubleComplex alpha; cuDoubleComplex alpha;
const cuDoubleComplex *A; const cuDoubleComplex *A;
int lda; int ldA;
const cuDoubleComplex *B; const cuDoubleComplex *B;
int ldb; int ldB;
cuDoubleComplex beta; cuDoubleComplex beta;
cuDoubleComplex *C; cuDoubleComplex *C;
int ldc; int ldC;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
RUNTIME_getStream( stream ); RUNTIME_getStream( stream );
CUDA_zgemm( CUDA_zgemm(
transA, transB, transA, transB,
m, n, k, m, n, k,
&alpha, A, lda, &alpha, A, ldA,
B, ldb, B, ldB,
&beta, C, ldc, &beta, C, ldC,
stream); stream);
#ifndef STARPU_CUDA_ASYNC #ifndef STARPU_CUDA_ASYNC
...@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) ...@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
void INSERT_TASK_zgemm(const RUNTIME_option_t *options, void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB, cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb, int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn, int ldb, const CHAM_desc_t *B, int Bm, int Bn, int ldB,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
{ {
(void)nb; (void)nb;
struct starpu_codelet *codelet = &cl_zgemm; struct starpu_codelet *codelet = &cl_zgemm;
...@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, ...@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
STARPU_VALUE, &k, sizeof(int), STARPU_VALUE, &k, sizeof(int),
STARPU_VALUE, &alpha, sizeof(CHA