Commit a4f56680 authored by Mathieu Faverge's avatar Mathieu Faverge

Fix distributed norms

parent 3c0f1115
......@@ -32,7 +32,7 @@
static inline void
chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
CHAM_desc_t *Wcol, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
RUNTIME_option_t *options )
{
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
......@@ -58,7 +58,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
if ( (n == m) && (uplo != ChamUpperLower) ) {
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm(
options,
ChamColumnwise, uplo, diag, tempmm, tempnn,
......@@ -95,7 +95,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
INSERT_TASK_dlange(
options,
ChamMaxNorm, 1, tempnn, A->nb,
W( Wcol, 0, n), 1, W( Welt, 0, n));
W( Wcol, 0, n ), 1,
W( Welt, 0, n ) );
}
/**
......@@ -105,7 +106,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for(n = Q; n < NT; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, 0, n), W( Welt, 0, n%Q) );
W( Welt, 0, n ),
W( Welt, 0, n%Q ) );
}
/**
......@@ -115,7 +117,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, 0, n), W( Welt, 0, 0) );
W( Welt, 0, n ),
W( Welt, 0, 0 ) );
}
}
......@@ -247,13 +250,14 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
INSERT_TASK_zlange(
options,
ChamMaxNorm, tempmm, tempnn, A->nb,
A(m, n), ldam, W( Welt, m, n));
A(m, n), ldam, W( Welt, m, n ));
}
if ( n >= Q ) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, n), W( Welt, m, n%Q) );
W( Welt, m, n ),
W( Welt, m, n%Q ) );
}
}
......@@ -264,7 +268,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, n), W( Welt, m, 0) );
W( Welt, m, n ),
W( Welt, m, 0 ) );
}
}
......@@ -275,7 +280,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, 0), W( Welt, m%P, 0) );
W( Welt, m, 0 ),
W( Welt, m%P, 0 ) );
}
/**
......@@ -285,7 +291,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, 0), W( Welt, 0, 0) );
W( Welt, m, 0 ),
W( Welt, 0, 0 ) );
}
}
......@@ -382,7 +389,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
double alpha = 0.0;
double beta = 0.0;
int workn, workmt, worknt;
int workmt, worknt;
int m, n, wcol_init = 0;
chamctxt = chameleon_context_self();
......@@ -395,7 +402,6 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
switch ( norm ) {
case ChamOneNorm:
......@@ -502,7 +508,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
......@@ -514,7 +520,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
CHAMELEON_Desc_Flush( &Welt, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q );
*result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ));
if ( wcol_init ) {
chameleon_desc_destroy( &Wcol );
......
......@@ -315,7 +315,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
double alpha = 0.0;
double beta = 0.0;
int workn, workmt, worknt;
int workmt, worknt;
int m, n, wcol_init = 0;
chamctxt = chameleon_context_self();
......@@ -328,7 +328,6 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
switch ( norm ) {
case ChamOneNorm:
......@@ -415,7 +414,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
......
......@@ -77,11 +77,8 @@
* Global shortcuts
*/
#define CHAMELEON_RANK chameleon_rank(chamctxt)
#define CHAMELEON_SIZE chamctxt->world_size
#define CHAMELEON_GRPSIZE chamctxt->group_size
#define CHAMELEON_NB chamctxt->nb
#define CHAMELEON_IB chamctxt->ib
#define CHAMELEON_SCHEDULING chamctxt->scheduling
#define CHAMELEON_RHBLK chamctxt->rhblock
#define CHAMELEON_TRANSLATION chamctxt->translation
#define CHAMELEON_PARALLEL chamctxt->parallel_enabled
......
......@@ -109,8 +109,6 @@ typedef struct chameleon_context_s {
int my_mpi_rank;
int mpi_comm_size;
#endif
int world_size;
int group_size;
/* Boolean flags */
cham_bool_t warnings_enabled;
......
......@@ -57,18 +57,18 @@
*
*/
void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn )
const CHAM_desc_t *IN, int INm, int INn,
const CHAM_desc_t *OUT, int OUTm, int OUTn )
{
double *scalesum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
double *scl = RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn);
#pragma omp task depend(in: scalesum[0]) depend(inout: scl[0])
double *sclssq_in = RTBLKADDR(IN, double, INm, INn );
double *sclssq_out = RTBLKADDR(OUT, double, OUTm, OUTn);
#pragma omp task depend(in: sclssq_in[0]) depend(inout: sclssq_out[0])
{
if( scl[0] < scalesum[0] ) {
scl[1] = scalesum[1] + (scl[1] * (( scl[0] / scalesum[0] ) * ( scl[0] / scalesum[0] )));
scl[0] = scalesum[0];
if( sclssq_out[0] < sclssq_in[0] ) {
sclssq_out[1] = sclssq_in[1] + (sclssq_out[1] * (( sclssq_out[0] / sclssq_in[0] ) * ( sclssq_out[0] / sclssq_in[0] )));
sclssq_out[0] = sclssq_in[0];
} else {
scl[1] = scl[1] + (scalesum[1] * (( scalesum[0] / scl[0] ) * ( scalesum[0] / scl[0] )));
sclssq_out[1] = sclssq_out[1] + (sclssq_in[1] * (( sclssq_in[0] / sclssq_out[0] ) * ( sclssq_in[0] / sclssq_out[0] )));
}
}
}
......
......@@ -56,17 +56,20 @@ static inline int
CORE_zplssq_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
parsec_dtd_unpack_args(
this_task, &SCALESUMSQ, &SCLSSQ );
this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
(void)context;
......
......@@ -26,16 +26,19 @@
void CORE_zplssq_quark(Quark *quark)
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
quark_unpack_args_2( quark, SCALESUMSQ, SCLSSQ );
quark_unpack_args_2( quark, SCLSSQ_IN, SCLSSQ_OUT );
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
}
......
......@@ -120,13 +120,13 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int M;
int N;
CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t *A;
const CHAMELEON_Complex64_t *A;
int LDA;
CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *B;
int LDB;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
......
......@@ -32,17 +32,17 @@
*
*/
void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int nb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
cham_uplo_t uplo, int m, int n, int nb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zlacpy;
void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_W(B, Bm, Bn);
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_W( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -65,13 +65,13 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
}
void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb)
cham_uplo_t uplo, int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb)
{
INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
0, A, Am, An, lda,
0, B, Bm, Bn, ldb );
0, A, Am, An, lda,
0, B, Bm, Bn, ldb );
}
#if !defined(CHAMELEON_SIMULATION)
......
......@@ -85,24 +85,25 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_clag2z;
void (*callback)(void*) = options->profiling ? cl_clag2z_callback : NULL;
if ( chameleon_desc_islocal( A, Am, An ) ||
chameleon_desc_islocal( B, Bm, Bn ) )
{
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_W( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "clag2z",
STARPU_NAME, "clag2z",
#endif
0);
}
0);
}
......
......@@ -70,7 +70,7 @@ static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
CORE_zlange( norm, M, N, A, LDA, work, normA);
CORE_zlange( norm, M, N, A, LDA, work, normA );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -86,34 +86,35 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_zlange_max;
void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL;
if ( chameleon_desc_islocal( A, Am, An ) ||
chameleon_desc_islocal( B, Bm, Bn ) )
{
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(A, double, Am, An),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_RW( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(A, double, Am, An),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zlange_max",
STARPU_NAME, "zlange_max",
#endif
0);
}
0);
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
{
double *A;
double *normA;
A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
normA = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
double *B;
if ( *A > *normA )
*normA = *A;
A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
if ( *A > *B ) {
*B = *A;
}
(void)cl_arg;
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -55,21 +55,21 @@
*
*/
void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn )
const CHAM_desc_t *SCLSSQ_IN, int SCLSSQ_INm, int SCLSSQ_INn,
const CHAM_desc_t *SCLSSQ_OUT, int SCLSSQ_OUTm, int SCLSSQ_OUTn )
{
struct starpu_codelet *codelet = &cl_zplssq;
void (*callback)(void*) = options->profiling ? cl_zplssq_callback : NULL;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(SCALESUMSQ, SCALESUMSQm, SCALESUMSQn);
CHAMELEON_ACCESS_RW(SCLSSQ, SCLSSQm, SCLSSQn);
CHAMELEON_ACCESS_R( SCLSSQ_IN, SCLSSQ_INm, SCLSSQ_INn );
CHAMELEON_ACCESS_RW( SCLSSQ_OUT, SCLSSQ_OUTm, SCLSSQ_OUTn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
STARPU_RW, RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn),
STARPU_R, RTBLKADDR( SCLSSQ_IN, double, SCLSSQ_INm, SCLSSQ_INn ),
STARPU_RW, RTBLKADDR( SCLSSQ_OUT, double, SCLSSQ_OUTm, SCLSSQ_OUTn ),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
......@@ -82,17 +82,20 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
#if !defined(CHAMELEON_SIMULATION)
static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
SCLSSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
(void)cl_arg;
......@@ -110,17 +113,19 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_zplssq2;
void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL;
if ( chameleon_desc_islocal( RESULT, RESULTm, RESULTn ) ) {
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW( RESULT, RESULTm, RESULTn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zplssq2",
STARPU_NAME, "zplssq2",
#endif
0);
}
0);
}
......
......@@ -66,8 +66,8 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
int lda;
double *work;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
}
......
......@@ -182,6 +182,7 @@ int main (int argc, char **argv)
CHAMELEON_Disable(CHAMELEON_AUTOTUNING);
CHAMELEON_Set(CHAMELEON_TILE_SIZE, nb );
CHAMELEON_Set(CHAMELEON_INNER_BLOCK_SIZE, ib );
CHAMELEON_user_tag_size( 64, 54 );
argc -= 6;
argv += 6;
......
......@@ -51,11 +51,9 @@ int testing_zlange(int argc, char **argv)
/* Allocate Data */
CHAMELEON_Complex64_t *A = (CHAMELEON_Complex64_t *)malloc(LDAxN*sizeof(CHAMELEON_Complex64_t));
double *work = (double*) malloc(max(M,N)*sizeof(double));
double *work = (double*) malloc(max(M,N)*sizeof(double));
double normcham, normlapack, result;
RUNTIME_comm_set_tag_sizes( 31, 16 );
eps = LAPACKE_dlamch_work('e');
printf("\n");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment