Commit aa0fa3e9 authored by BARROS DE ASSIS Lucas's avatar BARROS DE ASSIS Lucas Committed by Mathieu Faverge

Fix codelets

parent 9b777d34
......@@ -15,6 +15,7 @@
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
int M;
int N;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
double *work;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda);
CORE_dzasum(storev, uplo, M, N, A, lda, work);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N);
CORE_dzasum(storev, uplo, M, N, A, ldA, work);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
cham_store_t storev, cham_uplo_t uplo, int M, int N,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn )
{
struct starpu_codelet *codelet = &cl_dzasum;
......@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
......@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_NAME, "dzasum",
#endif
0);
(void)ldA;
}
......@@ -20,6 +20,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Lucas Barros de Assis
* @date 2016-09-08
* @precisions normal z -> c d s
*
......@@ -31,19 +32,21 @@
static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
{
CHAMELEON_Complex64_t *A;
int ld;
int ldA;
void *user_data;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ldA, void *user_data) ;
int row_min, row_max, col_min, col_max;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
ldA = STARPU_MATRIX_GET_LD( descr[0] );
starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback );
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the address 'buffer' with leading dimension 'ld'
*/
user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
user_build_callback(row_min, row_max, col_min, col_max, A, ldA, user_data);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *A, int Am, int An, int ldA,
void *user_data, void* user_build_callback )
{
......@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_VALUE, &col_min, sizeof(int),
STARPU_VALUE, &col_max, sizeof(int),
STARPU_W, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &user_data, sizeof(void*),
STARPU_VALUE, &user_build_callback, sizeof(void*),
STARPU_PRIORITY, options->priority,
......@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_NAME, "zbuild",
#endif
0);
(void)ldA;
}
......@@ -17,6 +17,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int N;
CHAMELEON_Complex64_t alpha;
const CHAMELEON_Complex64_t *A;
int LDA;
int ldA;
CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *B;
int LDB;
int ldB;
A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
CORE_zgeadd(trans, M, N, alpha, A, ldA, beta, B, ldB);
return;
}
......@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
int N;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int lda;
int ldA;
cuDoubleComplex beta;
cuDoubleComplex *B;
int ldb;
int ldB;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
RUNTIME_getStream( stream );
CUDA_zgeadd(
trans,
M, N,
&alpha, A, lda,
&beta, B, ldb,
&alpha, A, ldA,
&beta, B, ldB,
stream);
#ifndef STARPU_CUDA_ASYNC
......@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
* Scalar factor of A.
*
* @param[in] A
* Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
* Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M
* otherwise.
*
* @param[in] LDA
* Leading dimension of the array A. LDA >= max(1,k), with k=M, if
* @param[in] ldA
* Leading dimension of the array A. ldA >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise.
*
* @param[in] beta
* Scalar factor of B.
*
* @param[in,out] B
* Matrix of size LDB-by-N.
* Matrix of size ldB-by-N.
* On exit, B = alpha * op(A) + beta * B
*
* @param[in] LDB
* Leading dimension of the array B. LDB >= max(1,M)
* @param[in] ldB
* Leading dimension of the array B. ldB >= max(1,M)
*
*******************************************************************************
*
......@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
*/
void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn, int ldB )
{
struct starpu_codelet *codelet = &cl_zgeadd;
void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
......@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zgeadd",
#endif
0);
(void)ldA;
(void)nb;
}
......@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
int n;
int ib;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
CHAMELEON_Complex64_t *T;
int ldt;
int ldT;
CHAMELEON_Complex64_t *TAU, *WORK;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldT = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
WORK = TAU + chameleon_max( m, n );
CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt );
CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldT );
CORE_zgelqt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
* @param[in] ldA
* The leading dimension of the array A. ldA >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
* @param[in] ldT
* The leading dimension of the array T. ldT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
......@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
*/
void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *T, int Tm, int Tn, int ldT)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgelqt;
......@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH, options->ws_worker,
/* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
......@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_NAME, "zgelqt",
#endif
0);
(void)ldT;
(void)ldA;
}
......@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
int k;
CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
CHAMELEON_Complex64_t *B;
int ldb;
int ldB;
CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *C;
int ldc;
int ldC;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
CORE_zgemm(transA, transB,
m, n, k,
alpha, A, lda,
B, ldb,
beta, C, ldc);
alpha, A, ldA,
B, ldB,
beta, C, ldC);
}
#ifdef CHAMELEON_USE_CUDA
......@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
int k;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int lda;
int ldA;
const cuDoubleComplex *B;
int ldb;
int ldB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
int ldC;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldB = STARPU_MATRIX_GET_LD( descr[1] );
ldC = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
RUNTIME_getStream( stream );
CUDA_zgemm(
transA, transB,
m, n, k,
&alpha, A, lda,
B, ldb,
&beta, C, ldc,
&alpha, A, ldA,
B, ldB,
&beta, C, ldC,
stream);
#ifndef STARPU_CUDA_ASYNC
......@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
cham_trans_t transA, cham_trans_t transB,
int m, int n, int k, int nb,
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *B, int Bm, int Bn, int ldB,
CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgemm;
......@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
STARPU_VALUE, &k, sizeof(int),
STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t),
STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
STARPU_VALUE, &ldc, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zgemm",
#endif
0);
(void)ldA;
(void)ldB;
(void)ldC;
}
......@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -34,21 +35,23 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
int n;
int ib;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
CHAMELEON_Complex64_t *T;
int ldt;
int ldT;
CHAMELEON_Complex64_t *TAU, *WORK;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */
ldA = STARPU_MATRIX_GET_LD( descr[0] );
ldT = STARPU_MATRIX_GET_LD( descr[1] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work);
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
WORK = TAU + chameleon_max( m, n );
CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt );
CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldT );
CORE_zgeqrt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -95,16 +98,16 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
* @param[in] ldA
* The leading dimension of the array A. ldA >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
* @param[in] ldT
* The leading dimension of the array T. ldT >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
......@@ -120,8 +123,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
*/
void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
int m, int n, int ib, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *T, int Tm, int Tn, int ldt)
const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *T, int Tm, int Tn, int ldT)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgeqrt;
......@@ -139,9 +142,7 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH, options->ws_worker,
/* ib * (m+3*ib) + max(m,n) */
......@@ -152,4 +153,6 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_NAME, "zgeqrt",
#endif
0);
(void)ldT;
(void)ldA;
}
......@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -34,16 +35,21 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
int k;
int ib;
int *IPIV;
int ldl;
int ldL;
CHAMELEON_Complex64_t *D;
int ldd;
int ldD;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
ldL = STARPU_MATRIX_GET_LD( descr[0] );
ldD = STARPU_MATRIX_GET_LD( descr[1] );
ldA = STARPU_MATRIX_GET_LD( descr[2] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV);
CORE_zgessm(m, n, k, ib, IPIV, D, ldD, A, ldA);
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -80,15 +86,15 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
* @param[in] L
* The M-by-K lower triangular tile.
*
* @param[in] LDL
* The leading dimension of the array L. LDL >= max(1,M).
* @param[in] ldL
* The leading dimension of the array L. ldL >= max(1,M).
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, updated by the application of L.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
* @param[in] ldA
* The leading dimension of the array A. ldA >= max(1,M).
*
*******************************************************************************
*
......@@ -100,9 +106,9 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
int m, int n, int k, int ib, int nb,
int *IPIV,
const CHAM_desc_t *L, int Lm, int Ln, int ldl,
const CHAM_desc_t *D, int Dm, int Dn, int ldd,
const CHAM_desc_t *A, int Am, int An, int lda )
const CHAM_desc_t *L, int Lm, int Ln, int ldL,
const CHAM_desc_t *D, int Dm, int Dn, int ldD,
const CHAM_desc_t *A, int Am, int An, int ldA )
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgessm;
......@@ -122,15 +128,14 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
STARPU_VALUE, &ib, sizeof(int),
STARPU_VALUE, &IPIV, sizeof(int*),
STARPU_R, RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),
STARPU_VALUE, &ldl, sizeof(int),
STARPU_R, RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn),
STARPU_VALUE, &ldd, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zgessm",
#endif
0);
(void)ldD;
(void)ldL;
}
......@@ -15,6 +15,7 @@
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -29,13 +30,15 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
int m;
int n;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
double *SCALESUMSQ;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &m, &n, &lda);
CORE_zgessq( storev, m, n, A, lda, SCALESUMSQ );
starpu_codelet_unpack_args(cl_arg, &storev, &m, &n);
CORE_zgessq( storev, m, n, A, ldA, SCALESUMSQ );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -46,7 +49,7 @@ CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
cham_store_t storev, int m, int n,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *A, int Am, int An, int ldA,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
{
struct starpu_codelet *codelet = &cl_zgessq;
......@@ -63,7 +66,6 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
......@@ -71,4 +73,5 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
STARPU_NAME, "zgessq",
#endif
0);
(void)ldA;
}
......@@ -17,6 +17,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -30,7 +31,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
int m;
int n;
CHAMELEON_Complex64_t *A;
int lda;
int ldA;
int *IPIV;
cham_bool_t check_info;
int iinfo;
......@@ -39,9 +40,10 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
int info = 0;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
ldA = STARPU_MATRIX_GET_LD( descr[0] );
starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request);
CORE_zgetrf( m, n, A, lda, IPIV, &info );
starpu_codelet_unpack_args(cl_arg, &m, &n, &IPIV, &check_info, &iinfo, &sequence, &request);
CORE_zgetrf( m, n, A, ldA, IPIV, &info );
if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
......@@ -56,7 +58,7 @@ CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *A, int Am, int An, int ldA,
int *IPIV,
cham_bool_t check_info, int iinfo )
{
......@@ -73,7 +75,6 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &IPIV, sizeof(int*),
STARPU_VALUE, &check_info, sizeof(cham_bool_t),
STARPU_VALUE, &iinfo, sizeof(int),
......@@ -85,4 +86,5 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
STARPU_NAME, "zgetrf",
#endif
0);
(void)ldA;
}
......@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
......@@ -34,7 +35,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)