Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 30b1fa7d authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'fixnorms' into 'master'

Fix distributed norms

See merge request !136
parents 3c0f1115 a4f56680
No related branches found
No related tags found
1 merge request!136Fix distributed norms
Showing
with 132 additions and 120 deletions
......@@ -32,7 +32,7 @@
static inline void
chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
CHAM_desc_t *Wcol, CHAM_desc_t *Welt,
RUNTIME_option_t *options)
RUNTIME_option_t *options )
{
int m, n;
int minMNT = chameleon_min( A->mt, A->nt );
......@@ -58,7 +58,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
int ldam = BLKLDD( A, m );
if ( (n == m) && (uplo != ChamUpperLower) ) {
if ( (n == m) && (uplo != ChamUpperLower) ) {
INSERT_TASK_ztrasm(
options,
ChamColumnwise, uplo, diag, tempmm, tempnn,
......@@ -95,7 +95,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
INSERT_TASK_dlange(
options,
ChamMaxNorm, 1, tempnn, A->nb,
W( Wcol, 0, n), 1, W( Welt, 0, n));
W( Wcol, 0, n ), 1,
W( Welt, 0, n ) );
}
/**
......@@ -105,7 +106,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for(n = Q; n < NT; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, 0, n), W( Welt, 0, n%Q) );
W( Welt, 0, n ),
W( Welt, 0, n%Q ) );
}
/**
......@@ -115,7 +117,8 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, 0, n), W( Welt, 0, 0) );
W( Welt, 0, n ),
W( Welt, 0, 0 ) );
}
}
......@@ -247,13 +250,14 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
INSERT_TASK_zlange(
options,
ChamMaxNorm, tempmm, tempnn, A->nb,
A(m, n), ldam, W( Welt, m, n));
A(m, n), ldam, W( Welt, m, n ));
}
if ( n >= Q ) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, n), W( Welt, m, n%Q) );
W( Welt, m, n ),
W( Welt, m, n%Q ) );
}
}
......@@ -264,7 +268,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(n = 1; n < Q; n++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, n), W( Welt, m, 0) );
W( Welt, m, n ),
W( Welt, m, 0 ) );
}
}
......@@ -275,7 +280,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(m = P; m < MT; m++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, 0), W( Welt, m%P, 0) );
W( Welt, m, 0 ),
W( Welt, m%P, 0 ) );
}
/**
......@@ -285,7 +291,8 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for(m = 1; m < P; m++) {
INSERT_TASK_dlange_max(
options,
W( Welt, m, 0), W( Welt, 0, 0) );
W( Welt, m, 0 ),
W( Welt, 0, 0 ) );
}
}
......@@ -382,7 +389,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
double alpha = 0.0;
double beta = 0.0;
int workn, workmt, worknt;
int workmt, worknt;
int m, n, wcol_init = 0;
chamctxt = chameleon_context_self();
......@@ -395,7 +402,6 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
switch ( norm ) {
case ChamOneNorm:
......@@ -502,7 +508,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
......@@ -514,7 +520,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
CHAMELEON_Desc_Flush( &Welt, sequence );
RUNTIME_sequence_wait(chamctxt, sequence);
*result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q );
*result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ));
if ( wcol_init ) {
chameleon_desc_destroy( &Wcol );
......
......@@ -315,7 +315,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
double alpha = 0.0;
double beta = 0.0;
int workn, workmt, worknt;
int workmt, worknt;
int m, n, wcol_init = 0;
chamctxt = chameleon_context_self();
......@@ -328,7 +328,6 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workn = chameleon_max( A->n, A->q );
switch ( norm ) {
case ChamOneNorm:
......@@ -415,7 +414,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
*/
for(m = 0; m < A->p; m++) {
for(n = 0; n < A->q; n++) {
if ( (m != 0) && (n != 0) ) {
if ( (m != 0) || (n != 0) ) {
INSERT_TASK_dlacpy(
&options,
ChamUpperLower, 1, 1, 1,
......
......@@ -77,11 +77,8 @@
* Global shortcuts
*/
#define CHAMELEON_RANK chameleon_rank(chamctxt)
#define CHAMELEON_SIZE chamctxt->world_size
#define CHAMELEON_GRPSIZE chamctxt->group_size
#define CHAMELEON_NB chamctxt->nb
#define CHAMELEON_IB chamctxt->ib
#define CHAMELEON_SCHEDULING chamctxt->scheduling
#define CHAMELEON_RHBLK chamctxt->rhblock
#define CHAMELEON_TRANSLATION chamctxt->translation
#define CHAMELEON_PARALLEL chamctxt->parallel_enabled
......
......@@ -109,8 +109,6 @@ typedef struct chameleon_context_s {
int my_mpi_rank;
int mpi_comm_size;
#endif
int world_size;
int group_size;
/* Boolean flags */
cham_bool_t warnings_enabled;
......
......@@ -57,18 +57,18 @@
*
*/
void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn )
const CHAM_desc_t *IN, int INm, int INn,
const CHAM_desc_t *OUT, int OUTm, int OUTn )
{
double *scalesum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
double *scl = RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn);
#pragma omp task depend(in: scalesum[0]) depend(inout: scl[0])
double *sclssq_in = RTBLKADDR(IN, double, INm, INn );
double *sclssq_out = RTBLKADDR(OUT, double, OUTm, OUTn);
#pragma omp task depend(in: sclssq_in[0]) depend(inout: sclssq_out[0])
{
if( scl[0] < scalesum[0] ) {
scl[1] = scalesum[1] + (scl[1] * (( scl[0] / scalesum[0] ) * ( scl[0] / scalesum[0] )));
scl[0] = scalesum[0];
if( sclssq_out[0] < sclssq_in[0] ) {
sclssq_out[1] = sclssq_in[1] + (sclssq_out[1] * (( sclssq_out[0] / sclssq_in[0] ) * ( sclssq_out[0] / sclssq_in[0] )));
sclssq_out[0] = sclssq_in[0];
} else {
scl[1] = scl[1] + (scalesum[1] * (( scalesum[0] / scl[0] ) * ( scalesum[0] / scl[0] )));
sclssq_out[1] = sclssq_out[1] + (sclssq_in[1] * (( sclssq_in[0] / sclssq_out[0] ) * ( sclssq_in[0] / sclssq_out[0] )));
}
}
}
......
......@@ -56,17 +56,20 @@ static inline int
CORE_zplssq_parsec( parsec_execution_stream_t *context,
parsec_task_t *this_task )
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
parsec_dtd_unpack_args(
this_task, &SCALESUMSQ, &SCLSSQ );
this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
(void)context;
......
......@@ -26,16 +26,19 @@
void CORE_zplssq_quark(Quark *quark)
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
quark_unpack_args_2( quark, SCALESUMSQ, SCLSSQ );
quark_unpack_args_2( quark, SCLSSQ_IN, SCLSSQ_OUT );
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
}
......
......@@ -120,13 +120,13 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int M;
int N;
CHAMELEON_Complex64_t alpha;
CHAMELEON_Complex64_t *A;
const CHAMELEON_Complex64_t *A;
int LDA;
CHAMELEON_Complex64_t beta;
CHAMELEON_Complex64_t *B;
int LDB;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
......
......@@ -32,17 +32,17 @@
*
*/
void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int nb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
cham_uplo_t uplo, int m, int n, int nb,
int displA, const CHAM_desc_t *A, int Am, int An, int lda,
int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zlacpy;
void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(A, Am, An);
CHAMELEON_ACCESS_W(B, Bm, Bn);
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_W( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
......@@ -65,13 +65,13 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
}
void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
cham_uplo_t uplo, int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb)
cham_uplo_t uplo, int m, int n, int nb,
const CHAM_desc_t *A, int Am, int An, int lda,
const CHAM_desc_t *B, int Bm, int Bn, int ldb)
{
INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
0, A, Am, An, lda,
0, B, Bm, Bn, ldb );
0, A, Am, An, lda,
0, B, Bm, Bn, ldb );
}
#if !defined(CHAMELEON_SIMULATION)
......
......@@ -85,24 +85,25 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_clag2z;
void (*callback)(void*) = options->profiling ? cl_clag2z_callback : NULL;
if ( chameleon_desc_islocal( A, Am, An ) ||
chameleon_desc_islocal( B, Bm, Bn ) )
{
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_W( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_R, RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_W, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "clag2z",
STARPU_NAME, "clag2z",
#endif
0);
}
0);
}
......
......@@ -70,7 +70,7 @@ static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
CORE_zlange( norm, M, N, A, LDA, work, normA);
CORE_zlange( norm, M, N, A, LDA, work, normA );
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......@@ -86,34 +86,35 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_zlange_max;
void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL;
if ( chameleon_desc_islocal( A, Am, An ) ||
chameleon_desc_islocal( B, Bm, Bn ) )
{
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(A, double, Am, An),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R( A, Am, An );
CHAMELEON_ACCESS_RW( B, Bm, Bn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(A, double, Am, An),
STARPU_RW, RTBLKADDR(B, double, Bm, Bn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zlange_max",
STARPU_NAME, "zlange_max",
#endif
0);
}
0);
}
#if !defined(CHAMELEON_SIMULATION)
static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
{
double *A;
double *normA;
A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
normA = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
double *B;
if ( *A > *normA )
*normA = *A;
A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
if ( *A > *B ) {
*B = *A;
}
(void)cl_arg;
}
#endif /* !defined(CHAMELEON_SIMULATION) */
......
......@@ -55,21 +55,21 @@
*
*/
void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
const CHAM_desc_t *SCLSSQ, int SCLSSQm, int SCLSSQn )
const CHAM_desc_t *SCLSSQ_IN, int SCLSSQ_INm, int SCLSSQ_INn,
const CHAM_desc_t *SCLSSQ_OUT, int SCLSSQ_OUTm, int SCLSSQ_OUTn )
{
struct starpu_codelet *codelet = &cl_zplssq;
void (*callback)(void*) = options->profiling ? cl_zplssq_callback : NULL;
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_R(SCALESUMSQ, SCALESUMSQm, SCALESUMSQn);
CHAMELEON_ACCESS_RW(SCLSSQ, SCLSSQm, SCLSSQn);
CHAMELEON_ACCESS_R( SCLSSQ_IN, SCLSSQ_INm, SCLSSQ_INn );
CHAMELEON_ACCESS_RW( SCLSSQ_OUT, SCLSSQ_OUTm, SCLSSQ_OUTn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_R, RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
STARPU_RW, RTBLKADDR(SCLSSQ, double, SCLSSQm, SCLSSQn),
STARPU_R, RTBLKADDR( SCLSSQ_IN, double, SCLSSQ_INm, SCLSSQ_INn ),
STARPU_RW, RTBLKADDR( SCLSSQ_OUT, double, SCLSSQ_OUTm, SCLSSQ_OUTn ),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
......@@ -82,17 +82,20 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
#if !defined(CHAMELEON_SIMULATION)
static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
{
double *SCALESUMSQ;
double *SCLSSQ;
double *SCLSSQ_IN;
double *SCLSSQ_OUT;
SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
SCLSSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
SCLSSQ_IN = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
if( SCLSSQ[0] < SCALESUMSQ[0] ) {
SCLSSQ[1] = SCALESUMSQ[1] + (SCLSSQ[1] * (( SCLSSQ[0] / SCALESUMSQ[0] ) * ( SCLSSQ[0] / SCALESUMSQ[0] )));
SCLSSQ[0] = SCALESUMSQ[0];
assert( SCLSSQ_OUT[0] >= 0. );
if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
SCLSSQ_OUT[1] = SCLSSQ_IN[1] + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
SCLSSQ_OUT[0] = SCLSSQ_IN[0];
} else {
SCLSSQ[1] = SCLSSQ[1] + (SCALESUMSQ[1] * (( SCALESUMSQ[0] / SCLSSQ[0] ) * ( SCALESUMSQ[0] / SCLSSQ[0] )));
if ( SCLSSQ_OUT[0] > 0 ) {
SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1] * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
}
}
(void)cl_arg;
......@@ -110,17 +113,19 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
struct starpu_codelet *codelet = &cl_zplssq2;
void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL;
if ( chameleon_desc_islocal( RESULT, RESULTm, RESULTn ) ) {
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
CHAMELEON_BEGIN_ACCESS_DECLARATION;
CHAMELEON_ACCESS_RW( RESULT, RESULTm, RESULTn );
CHAMELEON_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, "zplssq2",
STARPU_NAME, "zplssq2",
#endif
0);
}
0);
}
......
......@@ -66,8 +66,8 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
int lda;
double *work;
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
}
......
......@@ -182,6 +182,7 @@ int main (int argc, char **argv)
CHAMELEON_Disable(CHAMELEON_AUTOTUNING);
CHAMELEON_Set(CHAMELEON_TILE_SIZE, nb );
CHAMELEON_Set(CHAMELEON_INNER_BLOCK_SIZE, ib );
CHAMELEON_user_tag_size( 64, 54 );
argc -= 6;
argv += 6;
......
......@@ -51,11 +51,9 @@ int testing_zlange(int argc, char **argv)
/* Allocate Data */
CHAMELEON_Complex64_t *A = (CHAMELEON_Complex64_t *)malloc(LDAxN*sizeof(CHAMELEON_Complex64_t));
double *work = (double*) malloc(max(M,N)*sizeof(double));
double *work = (double*) malloc(max(M,N)*sizeof(double));
double normcham, normlapack, result;
RUNTIME_comm_set_tag_sizes( 31, 16 );
eps = LAPACKE_dlamch_work('e');
printf("\n");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment