Attention une mise à jour du serveur va être effectuée le lundi 17 mai entre 13h et 13h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit d5e458e1 authored by BARROS DE ASSIS Lucas's avatar BARROS DE ASSIS Lucas

Merge branch 'master' of gitlab.inria.fr:solverstack/chameleon

parents 985080e9 2be2ccd3
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
* @comment This file has been automatically generated * @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2 * from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) ...@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
int M; int M;
int N; int N;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
double *work; double *work;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]); work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda); ldA = STARPU_MATRIX_GET_LD( descr[0] );
CORE_dzasum(storev, uplo, M, N, A, lda, work);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N);
CORE_dzasum(storev, uplo, M, N, A, ldA, work);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func) ...@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
void INSERT_TASK_dzasum( const RUNTIME_option_t *options, void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N, cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn ) const CHAM_desc_t *B, int Bm, int Bn )
{ {
struct starpu_codelet *codelet = &cl_dzasum; struct starpu_codelet *codelet = &cl_dzasum;
...@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, ...@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &N, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn), STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
...@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, ...@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_NAME, "dzasum", STARPU_NAME, "dzasum",
#endif #endif
0); 0);
(void)ldA;
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Guillaume Sylvand * @author Guillaume Sylvand
* @author Lucas Barros de Assis
* @date 2016-09-08 * @date 2016-09-08
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -31,19 +32,21 @@ ...@@ -31,19 +32,21 @@
static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
{ {
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int ld; int ldA;
void *user_data; void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ; void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ldA, void *user_data) ;
int row_min, row_max, col_min, col_max; int row_min, row_max, col_min, col_max;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback ); ldA = STARPU_MATRIX_GET_LD( descr[0] );
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback );
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max] /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran) * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the address 'buffer' with leading dimension 'ld' * and store it at the address 'buffer' with leading dimension 'ld'
*/ */
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data); user_build_callback(row_min, row_max, col_min, col_max, A, ldA, user_data);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) ...@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
void INSERT_TASK_zbuild( const RUNTIME_option_t *options, void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
void *user_data, void* user_build_callback ) void *user_data, void* user_build_callback )
{ {
...@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, ...@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_VALUE, &col_min, sizeof(int), STARPU_VALUE, &col_min, sizeof(int),
STARPU_VALUE, &col_max, sizeof(int), STARPU_VALUE, &col_max, sizeof(int),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &user_data, sizeof(void*), STARPU_VALUE, &user_data, sizeof(void*),
STARPU_VALUE, &user_build_callback, sizeof(void*), STARPU_VALUE, &user_build_callback, sizeof(void*),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
...@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options, ...@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_NAME, "zbuild", STARPU_NAME, "zbuild",
#endif #endif
0); 0);
(void)ldA;
} }
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) ...@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int N; int N;
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
const CHAMELEON_Complex64_t *A; const CHAMELEON_Complex64_t *A;
int LDA; int ldA;
CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *B; CHAMELEON_Complex64_t *B;
int LDB; int ldB;
A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB); ldA = STARPU_MATRIX_GET_LD( descr[0] );
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB); ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
CORE_zgeadd(trans, M, N, alpha, A, ldA, beta, B, ldB);
return; return;
} }
...@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) ...@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
int N; int N;
cuDoubleComplex alpha; cuDoubleComplex alpha;
const cuDoubleComplex *A; const cuDoubleComplex *A;
int lda; int ldA;
cuDoubleComplex beta; cuDoubleComplex beta;
cuDoubleComplex *B; cuDoubleComplex *B;
int ldb; int ldB;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
RUNTIME_getStream( stream ); RUNTIME_getStream( stream );
CUDA_zgeadd( CUDA_zgeadd(
trans, trans,
M, N, M, N,
&alpha, A, lda, &alpha, A, ldA,
&beta, B, ldb, &beta, B, ldB,
stream); stream);
#ifndef STARPU_CUDA_ASYNC #ifndef STARPU_CUDA_ASYNC
...@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) ...@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
* Scalar factor of A. * Scalar factor of A.
* *
* @param[in] A * @param[in] A
* Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M * Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M
* otherwise. * otherwise.
* *
* @param[in] LDA * @param[in] ldA
* Leading dimension of the array A. LDA >= max(1,k), with k=M, if * Leading dimension of the array A. ldA >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise. * trans = ChamNoTrans, and k=N otherwise.
* *
* @param[in] beta * @param[in] beta
* Scalar factor of B. * Scalar factor of B.
* *
* @param[in,out] B * @param[in,out] B
* Matrix of size LDB-by-N. * Matrix of size ldB-by-N.
* On exit, B = alpha * op(A) + beta * B * On exit, B = alpha * op(A) + beta * B
* *
* @param[in] LDB * @param[in] ldB
* Leading dimension of the array B. LDB >= max(1,M) * Leading dimension of the array B. ldB >= max(1,M)
* *
******************************************************************************* *******************************************************************************
* *
...@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) ...@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
*/ */
void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb, cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb ) CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldB )
{ {
struct starpu_codelet *codelet = &cl_zgeadd; struct starpu_codelet *codelet = &cl_zgeadd;
void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
...@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, ...@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority, STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback, STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME) #if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zgeadd", STARPU_NAME, "zgeadd",
#endif #endif
0); 0);
(void)ldA;
(void)nb; (void)nb;
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) ...@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
int n; int n;
int ib; int ib;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
CHAMELEON_Complex64_t *T; CHAMELEON_Complex64_t *T;
int ldt; int ldT;
CHAMELEON_Complex64_t *TAU, *WORK; CHAMELEON_Complex64_t *TAU, *WORK;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */ TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldT = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
WORK = TAU + chameleon_max( m, n ); WORK = TAU + chameleon_max( m, n );
CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt ); CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldT );
CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK); CORE_zgelqt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
} }
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
...@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) ...@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
* with the array TAU, represent the unitary tile Q as a * with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details). * product of elementary reflectors (see Further Details).
* *
* @param[in] LDA * @param[in] ldA
* The leading dimension of the array A. LDA >= max(1,M). * The leading dimension of the array A. ldA >= max(1,M).
* *
* @param[out] T * @param[out] T
* The IB-by-N triangular factor T of the block reflector. * The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage); * T is upper triangular by block (economic storage);
* The rest of the array is not referenced. * The rest of the array is not referenced.
* *
* @param[in] LDT * @param[in] ldT
* The leading dimension of the array T. LDT >= IB. * The leading dimension of the array T. ldT >= IB.
* *
* @param[out] TAU * @param[out] TAU
* The scalar factors of the elementary reflectors (see Further * The scalar factors of the elementary reflectors (see Further
...@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) ...@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
*/ */
void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb, int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *T, int Tm, int Tn, int ldt) const CHAM_desc_t *T, int Tm, int Tn, int ldT)
{ {
(void)nb; (void)nb;
struct starpu_codelet *codelet = &cl_zgelqt; struct starpu_codelet *codelet = &cl_zgelqt;
...@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, ...@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int), STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( nb * (ib+1), ib * (ib+nb) ) */ /* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH, options->ws_worker, STARPU_SCRATCH, options->ws_worker,
/* /\* ib*n + 3*ib*ib + max(m,n) *\/ */ /* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
...@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, ...@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_NAME, "zgelqt", STARPU_NAME, "zgelqt",
#endif #endif
0); 0);
(void)ldT;
(void)ldA;
} }
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16 * @date 2014-11-16
* @precisions normal z -> c d s * @precisions normal z -> c d s
* *
...@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg) ...@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
int k; int k;
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t *A; CHAMELEON_Complex64_t *A;
int lda; int ldA;
CHAMELEON_Complex64_t *B; CHAMELEON_Complex64_t *B;
int ldb; int ldB;
CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *C; CHAMELEON_Complex64_t *C;
int ldc; int ldC;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]); A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
CORE_zgemm(transA, transB, CORE_zgemm(transA, transB,
m, n, k, m, n, k,
alpha, A, lda, alpha, A, ldA,
B, ldb, B, ldB,
beta, C, ldc); beta, C, ldC);
} }
#ifdef CHAMELEON_USE_CUDA #ifdef CHAMELEON_USE_CUDA
...@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) ...@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
int k; int k;
cuDoubleComplex alpha; cuDoubleComplex alpha;
const cuDoubleComplex *A; const cuDoubleComplex *A;
int lda; int ldA;
const cuDoubleComplex *B; const cuDoubleComplex *B;
int ldb; int ldB;
cuDoubleComplex beta; cuDoubleComplex beta;
cuDoubleComplex *C; cuDoubleComplex *C;
int ldc; int ldC;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc); ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
RUNTIME_getStream( stream ); RUNTIME_getStream( stream );
CUDA_zgemm( CUDA_zgemm(
transA, transB, transA, transB,
m, n, k, m, n, k,
&alpha, A, lda, &alpha, A, ldA,
B, ldb, B, ldB,
&beta, C, ldc, &beta, C, ldC,
stream); stream);
#ifndef STARPU_CUDA_ASYNC #ifndef STARPU_CUDA_ASYNC
...@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) ...@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
void INSERT_TASK_zgemm(const RUNTIME_option_t *options, void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB, cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb, int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn, int ldb, const CHAM_desc_t *B, int Bm, int Bn, int ldB,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc) CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
{ {
(void)nb; (void)nb;
struct starpu_codelet *codelet = &cl_zgemm; struct starpu_codelet *codelet = &cl_zgemm;
...@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, ...@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,