diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index 657741dbab25d4008c4dfc2ebdf34a3f43bf00e6..be970c0169f847f9a61993d2e6a7cd49a409399e 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit 657741dbab25d4008c4dfc2ebdf34a3f43bf00e6 +Subproject commit be970c0169f847f9a61993d2e6a7cd49a409399e diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c index b88605ba14695571cee582d43bf67688bf8bc281..7ef0d27e3e2331832d9a1d430add8249f1c76b5b 100644 --- a/runtime/starpu/codelets/codelet_dzasum.c +++ b/runtime/starpu/codelets/codelet_dzasum.c @@ -44,7 +44,7 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func) +CODELETS_CPU(dzasum, cl_dzasum_cpu_func) void INSERT_TASK_dzasum( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int M, int N, diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c index 65dd72e33fe2328e32e252d88d2d3a0468a6b735..d56451136a102f292863887dd601969f0344d143 100644 --- a/runtime/starpu/codelets/codelet_map.c +++ b/runtime/starpu/codelets/codelet_map.c @@ -39,7 +39,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(map, 1, cl_map_cpu_func) +CODELETS_CPU(map, cl_map_cpu_func) void INSERT_TASK_map( const RUNTIME_option_t *options, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 88cf08e022e8781a1dcaeb534d6347a55b973541..a4c1df2ebdfe0bc7c66ea6b16159b35d98753731 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -40,7 +40,7 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func) +CODELETS_CPU(zaxpy, cl_zaxpy_cpu_func) void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, int M, CHAMELEON_Complex64_t alpha, @@ -52,6 +52,10 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options, starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + if ( alpha == 0. ) { + return; + } + CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); CHAMELEON_ACCESS_RW(B, Bm, Bn); diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index ad52e6452e2064d80fc9858aa6c7db600af65b4d..b8b14ae4a3a2531149d5524a8964949dcd53b752 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -52,7 +52,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func) +CODELETS_CPU(zbuild, cl_zbuild_cpu_func) void INSERT_TASK_zbuild( const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 27ce15a01640fd9c19121788b35781a45bf25125..bd027eff028f460333ab83651600783c3d085389 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -81,9 +81,9 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) * Codelet definition */ #if defined(CHAMELEON_USE_CUBLAS_V2) -CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC) #else -CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func) +CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func) #endif /** @@ -144,10 +144,16 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, B, Bm, Bn ); + } + struct starpu_codelet *codelet = &cl_zgeadd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -162,7 +168,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 63d7b9ca3cecc93332114b9fac3754fc1da599a3..9a26068cae6b8a15b87190f515328a3a47867891 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -56,7 +56,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) +CODELETS_CPU(zgelqt, cl_zgelqt_cpu_func) void INSERT_TASK_zgelqt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 034136bed27c4c6ffb264463a44800d3641af842..54e6256b57d2b0447d579bfbaf59d075f4c33bc4 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -94,7 +94,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -108,11 +108,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zgemm; void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -131,7 +137,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index 9f1f407d519485f26bb05fbd87b0a4c22f7fb6fb..ae8ad0d53459a717f5ac3b9153b4b1113bb8be93 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -57,7 +57,7 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) +CODELETS_CPU(zgeqrt, cl_zgeqrt_cpu_func) void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options, int m, int n, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 310e7e2b9f01c0c09147212457c338a0be0035fd..9e97aa44540d981c2ad8b16d5fb295d04ac8634e 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -50,7 +50,7 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) +CODELETS_CPU(zgessm, cl_zgessm_cpu_func) void INSERT_TASK_zgessm( const RUNTIME_option_t *options, int m, int n, int k, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index f22e28a2416461272b6dc71ed88be065683efed1..7bcaabb15982ffabdd85a7377202927b1a4af4f7 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -43,7 +43,7 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func) +CODELETS_CPU(zgessq, cl_zgessq_cpu_func) void INSERT_TASK_zgessq( const RUNTIME_option_t *options, cham_store_t storev, int m, int n, diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index 23e40e8738d6bdb02742e16fa37474948f1843b4..947fb8d2b0dd2a243eb42fe6c768d233f3ffafb6 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -52,7 +52,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func) +CODELETS_CPU(zgetrf, cl_zgetrf_cpu_func) void INSERT_TASK_zgetrf( const RUNTIME_option_t *options, int m, int n, int nb, diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 4b2b788ba8ce1edb2aab076ea180bc38de3a1730..460a8e1a5b34b0eea3cccd55aae924ffcee3e033 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -56,7 +56,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) +CODELETS_CPU(zgetrf_incpiv, cl_zgetrf_incpiv_cpu_func) void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options, int m, int n, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index 776415a755c28c0bf360439df68be75885edbea8..c84418518e9e4469360c9db00466f11450d2f337 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -53,7 +53,7 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) +CODELETS_CPU(zgetrf_nopiv, cl_zgetrf_nopiv_cpu_func) void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options, int m, int n, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c index 83643fc4f9839c8dc091e4ad6fd4c53426f3e1b0..1a5e8e4e9825272ec9d158c21b2cb44dd86bc247 100644 --- a/runtime/starpu/codelets/codelet_zgram.c +++ b/runtime/starpu/codelets/codelet_zgram.c @@ -43,7 +43,7 @@ static void cl_zgram_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zgram, 4, cl_zgram_cpu_func) +CODELETS_CPU(zgram, cl_zgram_cpu_func) void INSERT_TASK_zgram( const RUNTIME_option_t *options, cham_uplo_t uplo, @@ -51,21 +51,21 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options, const CHAM_desc_t *Di, int Dim, int Din, const CHAM_desc_t *Dj, int Djm, int Djn, const CHAM_desc_t *D, int Dm, int Dn, - CHAM_desc_t *A, int Am, int An) + CHAM_desc_t *A, int Am, int An ) { - struct starpu_codelet *codelet = &cl_zgram; - void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL; + struct starpu_codelet *codelet = &cl_zgram; + void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; - CHAMELEON_BEGIN_ACCESS_DECLARATION; - CHAMELEON_ACCESS_R(Di, Dim, Din); - CHAMELEON_ACCESS_R(Dj, Djm, Djn); - CHAMELEON_ACCESS_R(D, Dm, Dn); - CHAMELEON_ACCESS_RW(A, Am, An); - CHAMELEON_END_ACCESS_DECLARATION; + CHAMELEON_BEGIN_ACCESS_DECLARATION; + CHAMELEON_ACCESS_R(Di, Dim, Din); + CHAMELEON_ACCESS_R(Dj, Djm, Djn); + CHAMELEON_ACCESS_R(D, Dm, Dn); + CHAMELEON_ACCESS_RW(A, Am, An); + CHAMELEON_END_ACCESS_DECLARATION; - starpu_insert_task( + starpu_insert_task( starpu_mpi_codelet(codelet), STARPU_VALUE, &uplo, sizeof(int), STARPU_VALUE, &m, sizeof(int), diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index c7a24022b444d7f50fb8b91b74001ee6440899f0..fe1f9eb291209851ecfb84e1dd7039cfdd560d1a 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -42,18 +42,18 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func) +CODELETS_CPU(zhe2ge, cl_zhe2ge_cpu_func) /** * * @ingroup INSERT_TASK_Complex64_t * */ -void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int mb, - const CHAM_desc_t *A, int Am, int An, - const CHAM_desc_t *B, int Bm, int Bn) +void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int mb, + const CHAM_desc_t *A, int Am, int An, + const CHAM_desc_t *B, int Bm, int Bn ) { (void)mb; struct starpu_codelet *codelet = &cl_zhe2ge; diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index b6a827896636970bfe338cbeacaae776b3dbc401..5c90271ece923555a05f3c3d9d2374b9ba41b000 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -93,7 +93,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zhemm, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -107,11 +107,17 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zhemm; void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -129,7 +135,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 291fef2794b146994577fc2ec83ff2704397a826..0e93a35c99f0b9469839b5c5e8e17d8428598bcd 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -89,7 +89,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -104,11 +104,17 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, double beta, const CHAM_desc_t *C, int Cm, int Cn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zher2k; void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -126,7 +132,7 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index e967fdfd1051a69c6796c03515c3c6f1a4102c39..d92f922eb9abf2b21435254bd5a076896f91d02a 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -78,7 +78,7 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zherfb, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC) /** * diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 6a8b17cd68d47fad48596896f05c00daf37bd049..915cc9b77d4a13cdc43b4ba14c67c4871a49dd37 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -86,7 +86,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -99,11 +99,17 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, double alpha, const CHAM_desc_t *A, int Am, int An, double beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zherk; void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -119,7 +125,7 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(double), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(double), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 5703507ed9baba655125f1168df80485cbd7202d..312555129509bfdcaaa79337609b2d053471f310 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -57,7 +57,7 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func) +CODELETS_CPU(zlacpy, cl_zlacpy_cpu_func) void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, cham_uplo_t uplo, int m, int n, int nb, diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index c3f44bc2a78d4b3916ece9f7f2e99a54f69ef4bb..b9eb9188913ca7e875116898fd8355fecfe4b9ea 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -45,7 +45,7 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func) +CODELETS_CPU(zlag2c, cl_zlag2c_cpu_func) /** * @@ -103,7 +103,7 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func) +CODELETS_CPU(clag2z, cl_clag2z_cpu_func) void INSERT_TASK_clag2z(const RUNTIME_option_t *options, int m, int n, int nb, diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index 23ca2f7cc02283e521edf3c6995b9e5ee00cab55..35a7251a8f05216cda7598bcb6af9d3810644592 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -47,7 +47,7 @@ static void cl_zlange_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlange, 3, cl_zlange_cpu_func) +CODELETS_CPU(zlange, cl_zlange_cpu_func) void INSERT_TASK_zlange( const RUNTIME_option_t *options, cham_normtype_t norm, int M, int N, int NB, @@ -105,7 +105,7 @@ static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func) +CODELETS_CPU(zlange_max, cl_zlange_max_cpu_func) void INSERT_TASK_zlange_max(const RUNTIME_option_t *options, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index f31fea4c4c82b3dff4cfc5c47f8d6713f2bd2da4..45123eeb74fd3a94b27719a7783c40cf5d325474 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -47,7 +47,7 @@ static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func) +CODELETS_CPU(zlanhe, cl_zlanhe_cpu_func) void INSERT_TASK_zlanhe(const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index 0fb7e31ef66ea97877ad65d6d5ffa9fc93deb421..ef7eacbd92cdb511aecb209ed009a76371a507da 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -47,7 +47,7 @@ static void cl_zlansy_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func) +CODELETS_CPU(zlansy, cl_zlansy_cpu_func) void INSERT_TASK_zlansy( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, int N, int NB, diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index f13dd13bbe951e4474e41287fadc642af4b889c3..b763625dd770bfc0e29986975e72f8a3179699e9 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -43,7 +43,7 @@ static void cl_zlantr_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func) +CODELETS_CPU(zlantr, cl_zlantr_cpu_func) void INSERT_TASK_zlantr( const RUNTIME_option_t *options, cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 0916e8aa5e73887ad67cda9db946c1a81ecbbc24..d1bfc3fd35a70e56451b16e70e714433a49dbd1d 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -43,14 +43,22 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func) +CODELETS_CPU(zlascal, cl_zlascal_cpu_func) -void INSERT_TASK_zlascal(const RUNTIME_option_t *options, - cham_uplo_t uplo, - int m, int n, int nb, - CHAMELEON_Complex64_t alpha, - const CHAM_desc_t *A, int Am, int An) +void INSERT_TASK_zlascal( const RUNTIME_option_t *options, + cham_uplo_t uplo, + int m, int n, int nb, + CHAMELEON_Complex64_t alpha, + const CHAM_desc_t *A, int Am, int An) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, uplo, m, n, + alpha, alpha, A, Am, An ); + } + else if ( alpha == 1. ) { + return; + } + (void)nb; struct starpu_codelet *codelet = &cl_zlascal; void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL; @@ -67,7 +75,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_RW, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index df278acd98ab3af5b8d13c9bd16c8573c0cdad68..90d3ad925cafc15c1144eeb79106ec5a88783999 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -46,7 +46,7 @@ static void cl_zlaset_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func) +CODELETS_CPU(zlaset, cl_zlaset_cpu_func) void INSERT_TASK_zlaset(const RUNTIME_option_t *options, cham_uplo_t uplo, int M, int N, diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index 61256da734846416a495753fc65fbffc92fab8d3..0f19d0e946115d6f5903d1b59dbe5b535aca7f37 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -44,7 +44,7 @@ static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func) +CODELETS_CPU(zlaset2, cl_zlaset2_cpu_func) void INSERT_TASK_zlaset2(const RUNTIME_option_t *options, cham_uplo_t uplo, int M, int N, diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index 20ed9394cc2b0beca557a9535e359ff2836f6437..718fd3ce56bfa53c395962a009d9cf06a451ca0f 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -48,7 +48,7 @@ static void cl_zlatro_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func) +CODELETS_CPU(zlatro, cl_zlatro_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 59d67ebf5d984c0baf12f4a1b977441ff6858cd8..00dcda4754e77a37a3e3649dde9ca382dd733ba0 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -44,7 +44,7 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) +CODELETS_CPU(zlauum, cl_zlauum_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 361ebc17e91e164a7e6666522451883da86dafc2..23ccbc3dab28f52c8fe0b711f8f9c29892dc7533 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -51,7 +51,7 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func) +CODELETS_CPU(zplghe, cl_zplghe_cpu_func) void INSERT_TASK_zplghe( const RUNTIME_option_t *options, double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index efc64b037946ed5bd5848342c8e373cd4aa66b5c..665f31002735de92d2855a9352eec5015fb90467 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -51,7 +51,7 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func) +CODELETS_CPU(zplgsy, cl_zplgsy_cpu_func) void INSERT_TASK_zplgsy( const RUNTIME_option_t *options, CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 0d86aeff00b89bbdda7a5d137eeff1b65b7b3926..4f2910f7d80c417b3c5310a53d641e626e9d0d8f 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -48,7 +48,7 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func) +CODELETS_CPU(zplrnt, cl_zplrnt_cpu_func) void INSERT_TASK_zplrnt( const RUNTIME_option_t *options, int m, int n, const CHAM_desc_t *A, int Am, int An, diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c index b29f1d74e8230e45876d2b960508a955a1ff59c7..6201abff7748ecfd5dd404bb2851365cda88f8fa 100644 --- a/runtime/starpu/codelets/codelet_zplssq.c +++ b/runtime/starpu/codelets/codelet_zplssq.c @@ -48,7 +48,7 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func) +CODELETS_CPU(zplssq, cl_zplssq_cpu_func) void INSERT_TASK_zplssq( const RUNTIME_option_t *options, cham_store_t storev, int M, int N, @@ -101,7 +101,7 @@ static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func) +CODELETS_CPU(zplssq2, cl_zplssq2_cpu_func) void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N, const CHAM_desc_t *RESULT, int RESULTm, int RESULTn ) diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index de6c1886b7e349411d89a34be3089b9e1264a902..35edf5234888074001c5ef18933b8259ae931219 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -52,7 +52,7 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) +CODELETS_CPU(zpotrf, cl_zpotrf_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 29ef312bd6207d7d3c2476e5a764bcacf6a3f975..5c7125646a54e11ff6bde3f61f88ab940fbea7cf 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -54,7 +54,7 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) +CODELETS_CPU(zssssm, cl_zssssm_cpu_func) void INSERT_TASK_zssssm( const RUNTIME_option_t *options, int m1, int n1, int m2, int n2, int k, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 689be9b62368127011176dca3a6507a429f37913..40ed44bcbb3e44904160bc28faa1d9d58dfccf32 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -93,7 +93,7 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zsymm, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -107,11 +107,17 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsymm; void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -129,7 +135,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 86037a3778b6511a54d7b2e2f023856cd9ee5f7a..51f013036ddde9370e06e62de325c32c259cb01a 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -89,7 +89,7 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -103,11 +103,17 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, const CHAM_desc_t *B, int Bm, int Bn, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsyr2k; void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -125,7 +131,7 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options, STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 66782975f6bdf5a409f431117e2096028a3cf870..83c51f5997d6195cc5542ab63386e235b7241f21 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -86,7 +86,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -99,11 +99,17 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, n, n, nb, + beta, C, Cm, Cn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_zsyrk; void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -119,7 +125,7 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), + accessC, RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 951e60d1b6b10779ff3737f8915831232ee7f280..dd6300a18a18e3d89eb5b1540a9a1e4e8926c638 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -41,7 +41,7 @@ static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func) +CODELETS_CPU(zsyssq, cl_zsyssq_cpu_func) void INSERT_TASK_zsyssq( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int n, diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index cbac3596ee2cab76abc4ed7c49b1bbff6f6f1527..2e92e4cfd195d95307cc753fc4c0e9f9bdc17e3a 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -44,7 +44,7 @@ static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func) +CODELETS_CPU(zsytrf_nopiv, cl_zsytrf_nopiv_cpu_func) void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options, cham_uplo_t uplo, int n, int nb, diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index 0754c207f01e2fa3463e309991f451f3b3be3510..708098635f6a8130390a2a902afe8a6a4a5df435 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -48,7 +48,7 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func) +CODELETS_CPU(ztplqt, cl_ztplqt_cpu_func) void INSERT_TASK_ztplqt( const RUNTIME_option_t *options, int M, int N, int L, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 92d9e3bac6644997c5fddf9f6e281cf8a5e721b1..1a250a195a1bcf1f1021012391cce8af499d679d 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -92,7 +92,7 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(ztpmlqt, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index c0da2c794afedca487488388d42bd47204b22f3a..e36abb2cd24b0de669e2c9a3f13d773baf7618c2 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -92,7 +92,7 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(ztpmqrt, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 596bb44a28ee49045c69a512c0dff598cc0a7f30..9e1de5db0547c4db4026c7e4f5a985ac4e544e9b 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -47,7 +47,7 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func) +CODELETS_CPU(ztpqrt, cl_ztpqrt_cpu_func) void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options, int M, int N, int L, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index e19dd2636fce5ea0d9ffb13345cc37953eadd9a5..ac3dc8bfaecc14657dc48399b2c10a010babe83f 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -46,7 +46,7 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func) +CODELETS_CPU(ztradd, cl_ztradd_cpu_func) /** ****************************************************************************** @@ -112,10 +112,16 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t beta, const CHAM_desc_t *B, int Bm, int Bn ) { + if ( alpha == 0. ) { + return INSERT_TASK_zlascal( options, uplo, m, n, nb, + beta, B, Bm, Bn ); + } + struct starpu_codelet *codelet = &cl_ztradd; void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL; starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt); int workerid = (schedopt == NULL) ? -1 : schedopt->workerid; + int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -131,7 +137,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options, STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_VALUE, &beta, sizeof(CHAMELEON_Complex64_t), - STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), + accessB, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, STARPU_EXECUTE_ON_WORKER, workerid, diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index 527c7e44b2511285229ea05e9b90039d6bd23655..f870eb563ebd1e9ec2f970c79018007f2d2f7f40 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -44,7 +44,7 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func) +CODELETS_CPU(ztrasm, cl_ztrasm_cpu_func) void INSERT_TASK_ztrasm( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index a1d24cf9542f5ac1e310511aa440f293fb35ab56..d1404ba960ac1991c06b84dca012c2e2b930e248 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -89,7 +89,7 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC) /** * @@ -102,6 +102,11 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, const CHAM_desc_t *B, int Bm, int Bn) { + if ( alpha == 0. ) { + return INSERT_TASK_zlaset( options, ChamUpperLower, m, n, + alpha, alpha, B, Bm, Bn ); + } + (void)nb; struct starpu_codelet *codelet = &cl_ztrmm; void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL; @@ -122,7 +127,7 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options, STARPU_VALUE, &m, sizeof(int), STARPU_VALUE, &n, sizeof(int), STARPU_VALUE, &alpha, sizeof(CHAMELEON_Complex64_t), - STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), + STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), STARPU_RW, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index f4ab409209f9d82b333322ac88bf23ed2dfc060e..13fb16fad1e5aab314ab1b2906572702200b0d8e 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -87,7 +87,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC) /** * diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index d7ed201afc60538aebadeaa78294c38fc7f1c5c4..a453d7acafb964c49f641554099b2d0ddb521665 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -42,7 +42,7 @@ static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func) +CODELETS_CPU(ztrssq, cl_ztrssq_cpu_func) void INSERT_TASK_ztrssq( const RUNTIME_option_t *options, cham_uplo_t uplo, cham_diag_t diag, diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 758542d8895ab175550bdf3bb266d6ac171b8661..97d8c56dddb7fd85eeaa4f9ac269c824e4804d03 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -52,7 +52,7 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) +CODELETS_CPU(ztrtri, cl_ztrtri_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index 01704f33ac1fa6d97849d97aa73a6416e2dc208c..be215519fef4b2f0b1432af2f8fd2e46ede4b2e1 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -56,7 +56,7 @@ static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func) +CODELETS_CPU(ztsmlq_hetra1, cl_ztsmlq_hetra1_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index 4c6b5229e72eb9083db9ac2df481b4cb64e68c9a..7f8c0488de3ba7c8ba695c49da4a8976667f5668 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -56,7 +56,7 @@ static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func) +CODELETS_CPU(ztsmqr_hetra1, cl_ztsmqr_hetra1_cpu_func) /** * diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index 32b3fb5ad829a995f4db5a52c7c1fc7a8ceac4af..0e3f717f4a57c06e0b3e6345dfd87a2bb1b15744 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -67,7 +67,7 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) +CODELETS_CPU(ztstrf, cl_ztstrf_cpu_func) void INSERT_TASK_ztstrf( const RUNTIME_option_t *options, int m, int n, int ib, int nb, diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index 7f2b6b24f37bc06680bf8ae50fe66c40720a06ba..9f42aa67f7750644b864103fa31220b9e58e981b 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -95,7 +95,7 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zunmlq, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zunmlq( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index de61f52c9e5bdf04bba2c231a3cc407b0330b06a..e44279dd25e8df1e2fad2e8b58fcbd20d420b590 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -94,7 +94,7 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) +CODELETS(zunmqr, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) void INSERT_TASK_zunmqr( const RUNTIME_option_t *options, cham_side_t side, cham_trans_t trans, diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index 8402ec55b4d1fe7610092916f550e97aa99cc631..a6d9f84c7f9923bb379707516c32f5d3ccf0056b 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -31,7 +31,7 @@ These values can be changed through the call CHAMELEON_user_tag_size(int tag_width, int tag_sep) */ #define TAG_WIDTH_MIN 20 static int tag_width = 64; -static int tag_sep = 50; +static int tag_sep = 40; static int _tag_mpi_initialized_ = 0; static inline int diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index 9b9fdc5b1569f5611c07727c421a5815c225443d..98b0c7760885aa15633f71e484de2889a04147c2 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -31,7 +31,7 @@ #define CODELET_CUDA_FLAGS(flags) #endif -#define CODELETS_ALL(cl_name, _nbuffers, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \ +#define CODELETS_ALL(cl_name, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \ struct starpu_perfmodel cl_##cl_name##_fake = { \ .type = STARPU_HISTORY_BASED, \ .symbol = "fake_"#cl_name \ @@ -47,7 +47,7 @@ .cpu_func = ((cpu_func_name)), \ CODELET_CUDA_FLAGS(cuda_flags) \ .cuda_func = ((cuda_func_name)), \ - .nbuffers = ((_nbuffers)), \ + .nbuffers = STARPU_VARIABLE_NBUFFERS, \ .model = &cl_##cl_name##_model, \ .name = #cl_name \ }; \ @@ -69,15 +69,15 @@ } #if defined(CHAMELEON_SIMULATION) -#define CODELETS_CPU(name, _nbuffers, cpu_func_name) \ - CODELETS_ALL( name, _nbuffers, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 ) +#define CODELETS_CPU(name, cpu_func_name) \ + CODELETS_ALL( name, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 ) #else -#define CODELETS_CPU(name, _nbuffers, cpu_func_name) \ - CODELETS_ALL( name, _nbuffers, cpu_func_name, NULL, STARPU_CPU, 0 ) +#define CODELETS_CPU(name, cpu_func_name) \ + CODELETS_ALL( name, cpu_func_name, NULL, STARPU_CPU, 0 ) #endif -#define CODELETS_GPU(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_ALL( name, _nbuffers, cpu_func_name, cuda_func_name, STARPU_CPU | STARPU_CUDA, cuda_flags ) +#define CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) \ + CODELETS_ALL( name, cpu_func_name, cuda_func_name, STARPU_CPU | STARPU_CUDA, cuda_flags ) #define CODELETS_ALL_HEADER(name) \ CHAMELEON_CL_CB_HEADER(name); \ @@ -89,24 +89,24 @@ #if defined(CHAMELEON_SIMULATION) #if defined(CHAMELEON_USE_CUDA) -#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_GPU(name, _nbuffers, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags) +#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ + CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #else -#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_CPU(name, _nbuffers, (starpu_cpu_func_t) 1) +#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ + CODELETS_CPU(name, (starpu_cpu_func_t) 1) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #endif #elif defined(CHAMELEON_USE_CUDA) -#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_GPU(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) +#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ + CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #else -#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \ - CODELETS_CPU(name, _nbuffers, cpu_func_name) +#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \ + CODELETS_CPU(name, cpu_func_name) #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #endif diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index e41178a64e4afad629fa148408fba776a5470def..0d6d6e5d93d4ff1c1251f1994ad07ffcb04e28d3 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -98,10 +98,11 @@ static parameter_t parameters[] = { { "||A||", "Norm of the matrix A", 1005, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, { "||B||", "Norm of the matrix B", 1006, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, { "||C||", "Norm of the matrix C", 1007, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, - { "||b||", "Norm of the vector b", 1008, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, - { "||x||", "Norm of the vector x", 1009, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, - { "||Ax-b||/N/eps/(||A||||x||+||b||", "", 1010, PARAM_OUTPUT, 2, 22, TestValDouble, {0}, NULL, pread_double, sprint_double }, - { "||I-QQ'||", "Orthonormality of Q", 1011, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||R||", "Residual norm", 1008, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||b||", "Norm of the vector b", 1009, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||x||", "Norm of the vector x", 1010, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||Ax-b||/N/eps/(||A||||x||+||b||", "", 1011, PARAM_OUTPUT, 2, 22, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||I-QQ'||", "Orthonormality of Q", 1012, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, }; #define STR_MAX_LENGTH 256 diff --git a/testing/input/geadd.in b/testing/input/geadd.in index 180fa9a87214d6b5bf1715c8ef341f39f76aa4d9..8e6fe98f11e7ec97f464df6a4cbb76f114f22daa 100644 --- a/testing/input/geadd.in +++ b/testing/input/geadd.in @@ -12,6 +12,8 @@ # alpha: Scalar alpha # beta: Scalar beta +alpha = 0., 3.45 +beta = 0., -4.86 op = geadd nb = 16, 17 ib = 8 diff --git a/testing/input/gemm.in b/testing/input/gemm.in index f4df5ce2ce5384429096b78aa37d291962c09c0b..ae01aee4dc82bcba9d6ebc60584d296dadb4a9e2 100644 --- a/testing/input/gemm.in +++ b/testing/input/gemm.in @@ -15,6 +15,8 @@ # alpha: Scalar alpha # beta: Scalar beta +alpha = 0., 3.45 +beta = 0., -4.86 op = gemm nb = 16, 17 ib = 8 diff --git a/testing/input/hemm.in b/testing/input/hemm.in index a571443045e1661b43a50adcd3bb9f96d5038362..047e5e9a6192508b071b714e77b6b6f80c79b508 100644 --- a/testing/input/hemm.in +++ b/testing/input/hemm.in @@ -15,6 +15,8 @@ # beta: Scalar beta # bump: bump value for Hermitian matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = hemm nb = 16, 17 ib = 8 diff --git a/testing/input/her2k.in b/testing/input/her2k.in index 8b420b62aa61c76e1937b7adbef00a6453d04c7c..261c63cee3a189eb96d5616ff494db43dc3c67ae 100644 --- a/testing/input/her2k.in +++ b/testing/input/her2k.in @@ -15,6 +15,8 @@ # beta: Scalar beta # bump: Bump value for symmetric matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = her2k nb = 16, 17 ib = 8 diff --git a/testing/input/herk.in b/testing/input/herk.in index c00df4c16f8fb4fa1f1c97e975a117d3de1b1974..eeec8f5201125c10128242fab32e7cced7250c83 100644 --- a/testing/input/herk.in +++ b/testing/input/herk.in @@ -14,6 +14,8 @@ # beta: Scalar beta # bump: Bump value for symmetric matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = herk nb = 16, 17 ib = 8 diff --git a/testing/input/lascal.in b/testing/input/lascal.in index ba7171b1b6abf98c90a8ab333a937456f566b4a6..14bccea0cb61f9ee3c6d09557e42c7b3f4340351 100644 --- a/testing/input/lascal.in +++ b/testing/input/lascal.in @@ -10,6 +10,7 @@ # uplo: Part of the matrix to be copied (0 for Upper, 1 for Lower and 2 for UpperLower) # alpha: Scale to apply +alpha = 0., 3.45 op = lascal nb = 16, 17 ib = 8 diff --git a/testing/input/symm.in b/testing/input/symm.in index 17c7c3dacaa616116c1aa8fba89a6b5797034724..6981e403d4a56a29242983db24e61e02d3a7f799 100644 --- a/testing/input/symm.in +++ b/testing/input/symm.in @@ -15,6 +15,8 @@ # beta: Scalar beta # bump: bump value for Hermitian matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = symm nb = 16, 17 ib = 8 diff --git a/testing/input/syr2k.in b/testing/input/syr2k.in index fe434797ba148ca5847228e1c08ea845dfb42af8..24ced3073bec2c06dee53f4ea44803791a5c779d 100644 --- a/testing/input/syr2k.in +++ b/testing/input/syr2k.in @@ -15,6 +15,8 @@ # beta: Scalar beta # bump: Bump value for symmetric matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = syr2k nb = 16, 17 ib = 8 diff --git a/testing/input/syrk.in b/testing/input/syrk.in index 82daa929d76c69cfd164dceea7af1632d9c89fd4..b37b6a823d5db1bf788dd046190c39852219d6ae 100644 --- a/testing/input/syrk.in +++ b/testing/input/syrk.in @@ -14,6 +14,8 @@ # beta: Scalar beta # bump: Bump value for symmetric matrices +alpha = 0., 3.45 +beta = 0., -4.86 op = syrk nb = 16, 17 ib = 8 diff --git a/testing/input/tradd.in b/testing/input/tradd.in index b930006627c68b1e14a3ebe245fb418e53150bdb..a574f03519847cf4183b360ba06e743d2c243492 100644 --- a/testing/input/tradd.in +++ b/testing/input/tradd.in @@ -13,6 +13,8 @@ # alpha: Scalar alpha # beta: Scalar beta +alpha = 0., 3.45 +beta = 0., -4.86 op = tradd nb = 16, 17 ib = 8 diff --git a/testing/input/trmm.in b/testing/input/trmm.in index d570337401e27e1c6991f68b42cb09d5a363e0df..6c6228f1430fae996d28e3f4588d2cff4764f665 100644 --- a/testing/input/trmm.in +++ b/testing/input/trmm.in @@ -14,6 +14,7 @@ # diag: Whether or not A is unit triangular # alpha: Scalar alpha +alpha = 0., 3.45 op = trmm nb = 16, 17 ib = 8 diff --git a/testing/input/trsm.in b/testing/input/trsm.in index 2882e91427de91845e5a63c5a5188d393e1e505e..93098cc254da1411d468f01b564e5bb50dcb3ca9 100644 --- a/testing/input/trsm.in +++ b/testing/input/trsm.in @@ -15,6 +15,7 @@ # diag: Whether or not A is unit triangular # alpha: Scalar alpha +alpha = 0., 3.45 op = trsm nb = 16, 17 ib = 8 diff --git a/testing/testing_zcheck.c b/testing/testing_zcheck.c index 5a5f9f302e24a3c962aac79989f521f683e5feae..e0d4c1964c4ef857b78d3d79f1f9829a84ae039f 100644 --- a/testing/testing_zcheck.c +++ b/testing/testing_zcheck.c @@ -105,7 +105,12 @@ int check_zmatrices( run_arg_list_t *args, cham_uplo_t uplo, CHAM_desc_t *descA, Rnorm = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), 'N', M, N, B, LDA, work ); } - result = Rnorm / (Anorm * eps); + if ( Anorm != 0. ) { + result = Rnorm / (Anorm * eps); + } + else { + result = Rnorm; + } /* Verifies if the result is inside a threshold */ if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { @@ -537,7 +542,16 @@ int check_zgemm( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, /* Calculates the norm with the core function's result */ Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); - result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps); + if ( ( alpha != 0. ) || (beta != 0. ) ) { + result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps); + } + else { + result = Rnorm; + } + run_arg_add_double( args, "||A||", Anorm ); + run_arg_add_double( args, "||B||", Bnorm ); + run_arg_add_double( args, "||C||", Crefnorm ); + run_arg_add_double( args, "||R||", Rnorm ); /* Verifies if the result is inside a threshold */ if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { @@ -685,7 +699,12 @@ int check_zsymm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_side_t s Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); - result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * An * eps); + if ( ( alpha != 0. ) || (beta != 0. ) ) { + result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * An * eps); + } + else { + result = Rnorm; + } /* Verifies if the result is inside a threshold */ if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c index 17d39874d0d3799dbdb482a789f58e25393fd041..b3f2bea7cc7190abfa2487585a1efe09f912bede 100644 --- a/testing/testing_zgemm.c +++ b/testing/testing_zgemm.c @@ -114,7 +114,7 @@ testing_t test_zgemm; const char *zgemm_params[] = { "mtxfmt", "nb", "transA", "transB", "m", "n", "k", "lda", "ldb", "ldc", "alpha", "beta", "seedA", "seedB", "seedC", NULL }; const char *zgemm_output[] = { NULL }; -const char *zgemm_outchk[] = { "RETURN", NULL }; +const char *zgemm_outchk[] = { "||A||", "||B||", "||C||", "||R||", "RETURN", NULL }; /** * @brief Testing registration function