From 829a343c2cf9c1d6a98382ee76208b492c6a9063 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Fri, 26 Jun 2020 18:03:53 +0200 Subject: [PATCH] Reduce the data access types whenever it's possible to get more flexibility with the allocation onf the fly --- runtime/starpu/codelets/codelet_zaxpy.c | 4 ++++ runtime/starpu/codelets/codelet_zgeadd.c | 8 +++++++- runtime/starpu/codelets/codelet_zgemm.c | 8 +++++++- runtime/starpu/codelets/codelet_zgram.c | 20 ++++++++++---------- runtime/starpu/codelets/codelet_zhe2ge.c | 10 +++++----- runtime/starpu/codelets/codelet_zhemm.c | 8 +++++++- runtime/starpu/codelets/codelet_zher2k.c | 8 +++++++- runtime/starpu/codelets/codelet_zherk.c | 8 +++++++- runtime/starpu/codelets/codelet_zlascal.c | 20 ++++++++++++++------ runtime/starpu/codelets/codelet_zsymm.c | 8 +++++++- runtime/starpu/codelets/codelet_zsyr2k.c | 8 +++++++- runtime/starpu/codelets/codelet_zsyrk.c | 8 +++++++- runtime/starpu/codelets/codelet_ztradd.c | 8 +++++++- runtime/starpu/codelets/codelet_ztrmm.c | 7 ++++++- runtime/starpu/include/runtime_codelets.h | 2 +- 15 files changed, 103 insertions(+), 32 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 88cf08e02..7857b25e8 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -52,6 +52,10 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + if ( alpha == 0. ) { + return; + } + CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 27ce15a01..20d1b23bd 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -144,10 +144,16 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, B, Bm, Bn ); + } + struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -162,7 +168,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 034136bed..1eed0875f 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -108,11 +108,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zgemm; void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -131,7 +137,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c index 83643fc4f..ff38f7308 100644 --- a/runtime/starpu/codelets/codelet_zgram.c +++ b/runtime/starpu/codelets/codelet_zgram.c @@ -51,21 +51,21 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options, const CHAM_desc_t *Di, int Dim, int Din, const CHAM_desc_t *Dj, int Djm, int Djn, const CHAM_desc_t *D, int Dm, int Dn, - CHAM_desc_t *A, int Am, int An) + CHAM_desc_t *A, int Am, int An ) { - struct starpu_codelet *codelet = &cl_zgram; - void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL; + struct starpu_codelet *codelet = &cl_zgram; + void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(Di, Dim, Din); - CHAMELEON_ACCESS_R(Dj, Djm, Djn); - CHAMELEON_ACCESS_R(D, Dm, Dn); - CHAMELEON_ACCESS_RW(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(Di, Dim, Din); + CHAMELEON_ACCESS_R(Dj, Djm, Djn); + CHAMELEON_ACCESS_R(D, Dm, Dn); + CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_END_ACCESS_DECLARATION; - starpu_insert_task( + starpu_insert_task( starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index c7a24022b..2f5fc6a58 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -49,11 +49,11 @@ CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { (void)mb; struct starpu_codelet *codelet = &cl_zhe2ge; diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index b6a827896..1d19bc6ae 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -107,11 +107,17 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zhemm; void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -129,7 +135,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 291fef279..d8b8b11ba 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -104,11 +104,17 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, double beta, const CHAM_desc_t *C, int Cm, int Cn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zher2k; void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -126,7 +132,7 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 6a8b17cd6..a1e2adf8e 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -99,11 +99,17 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, double alpha, const CHAM_desc_t *A, int Am, int An, double beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zherk; void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -119,7 +125,7 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(double), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 0916e8aa5..129a8afec 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -45,12 +45,20 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) */ CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) -void INSERT_TASK_zlascal(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An) +void INSERT_TASK_zlascal( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, uplo, m, n, + alpha, alpha, A, Am, An ); + } + else if ( alpha == 1. ) { + return; + } + (void)nb; struct starpu_codelet *codelet = &cl_zlascal; void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL; @@ -67,7 +75,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 689be9b62..6bc2d770d 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -107,11 +107,17 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsymm; void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -129,7 +135,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 86037a377..d14520cbe 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -103,11 +103,17 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsyr2k; void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -125,7 +131,7 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 66782975f..148c836e9 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -99,11 +99,17 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsyrk; void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -119,7 +125,7 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index e19dd2636..d4469556c 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -112,10 +112,16 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, m, n, nb, + beta, B, Bm, Bn ); + } + struct starpu_codelet *codelet = &cl_ztradd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -131,7 +137,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index a1d24cf95..e8ed6d596 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -102,6 +102,11 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, ChamUpperLower, m, n, + alpha, alpha, B, Bm, Bn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_ztrmm; void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; @@ -122,7 +127,7 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index 9b9fdc5b1..8c66bf799 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -47,7 +47,7 @@ .cpu_func = ((cpu_func_name)), \ CODELET_CUDA_FLAGS(cuda_flags) \ .cuda_func = ((cuda_func_name)), \ - .nbuffers = ((_nbuffers)), \ + .nbuffers = STARPU_VARIABLE_NBUFFERS, \ .model = &cl_##cl_name##_model, \ .name = #cl_name \ }; \ -- GitLab