diff --git a/compute/pzlange.c b/compute/pzlange.c index e72a19c28c416eb9e387e61ca3c993c085f40228..2bb14d7a2124b4a8328597063b601adc8d38d716 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -517,7 +517,11 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia } } + if ( wcol_init ) { + CHAMELEON_Desc_Flush( &Wcol, sequence ); + } CHAMELEON_Desc_Flush( &Welt, sequence ); + CHAMELEON_Desc_Flush( A, sequence ); RUNTIME_sequence_wait(chamctxt, sequence); *result = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q )); diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 6a0e2bb0086e4892d7eab3deadb7644093c1fb81..5763df96dc4c25fd7bc097699ec354e59b62a28d 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -423,7 +423,11 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra } } + if ( wcol_init ) { + CHAMELEON_Desc_Flush( &Wcol, sequence ); + } CHAMELEON_Desc_Flush( &Welt, sequence ); + CHAMELEON_Desc_Flush( A, sequence ); RUNTIME_sequence_wait(chamctxt, sequence); *result = *(double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ); diff --git a/compute/zlange.c b/compute/zlange.c index cb8ff14bf9b74a53ee17bb54d386302b125aeeb6..522c8c204369a4042b34ae17b2f592177e924883 100644 --- a/compute/zlange.c +++ b/compute/zlange.c @@ -188,7 +188,7 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N, * @sa CHAMELEON_slange_Tile * */ -double CHAMELEON_zlange_Tile(cham_normtype_t norm, CHAM_desc_t *A ) +double CHAMELEON_zlange_Tile( cham_normtype_t norm, CHAM_desc_t *A ) { CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h index 8265e990686f22639acc23d0919348d017947cb2..ca0ae0e21faba5d75b9bcecc86ee09ab99d78174 100644 --- a/include/chameleon/tasks_z.h +++ b/include/chameleon/tasks_z.h @@ -34,6 +34,10 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int M, int N, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn ); +void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, + int M, CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An, int incA, + const CHAM_desc_t *B, int Bm, int Bn, int incB ); void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, cham_trans_t trans, int m, int n, int nb, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda, diff --git a/runtime/parsec/codelets/codelet_zaxpy.c b/runtime/parsec/codelets/codelet_zaxpy.c index d0d1c169a88bd5ac929573aae39826777ebc4ca8..cc79a219c7074395d5a64ac2672af6f7480ffe1d 100644 --- a/runtime/parsec/codelets/codelet_zaxpy.c +++ b/runtime/parsec/codelets/codelet_zaxpy.c @@ -49,11 +49,11 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, parsec_dtd_taskpool_insert_task( PARSEC_dtd_taskpool, CORE_zaxpy_parsec, options->priority, "axpy", - sizeof(int), &M, VALUE, + sizeof(int), &M, VALUE, sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, - PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT, - sizeof(int), &incA, VALUE, - PASSED_BY_REF, RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY, - sizeof(int), &incB, VALUE, + PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT, + sizeof(int), &incA, VALUE, + PASSED_BY_REF, RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY, + sizeof(int), &incB, VALUE, PARSEC_DTD_ARG_END ); } diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c index 5e8f0870ec4aff3ebfe03a8bca290e89c6a97371..2ced4ad717b3ce18e8bb17834625ba1ba59ead51 100644 --- a/runtime/quark/codelets/codelet_zaxpy.c +++ b/runtime/quark/codelets/codelet_zaxpy.c @@ -37,7 +37,7 @@ void CORE_zaxpy_quark(Quark *quark) } void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, - int M, CHAMELEON_Complex64_t *alpha, + int M, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int incA, const CHAM_desc_t *B, int Bm, int Bn, int incB) { @@ -45,10 +45,10 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, DAG_CORE_AXPY; QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt, sizeof(int), &M, VALUE, - sizeof(CHAMELEON_Complex64_t), alpha, VALUE, - sizeof(CHAMELEON_Complex64_t)*M, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, + sizeof(CHAMELEON_Complex64_t), &alpha, VALUE, + sizeof(CHAMELEON_Complex64_t)*M, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT, sizeof(int), &incA, VALUE, - sizeof(CHAMELEON_Complex64_t)*M, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, + sizeof(CHAMELEON_Complex64_t)*M, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT, sizeof(int), &incB, VALUE, 0); } diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 0a35f27f4ffc5c4964cc39dbd004487d4c6ef998..0b70bb6f9f6ecf46599184cb56d4797c401383dd 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -36,7 +36,7 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options, starpu_insert_task( starpu_mpi_codelet(codelet), STARPU_VALUE, &M, sizeof(int), - STARPU_VALUE, alpha, sizeof(CHAMELEON_Complex64_t), + STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &incA, sizeof(int), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),