diff --git a/include/chameleon.h b/include/chameleon.h index 12c295a7732ef73f1a1fac421bb38be6f0cdd9ea..7aeaf7d331227c35cd34288db6a95b68fc50f2fc 100644 --- a/include/chameleon.h +++ b/include/chameleon.h @@ -30,7 +30,6 @@ * CHAMELEON types and constants */ #include "chameleon/config.h" -#define _GNU_SOURCE 1 #include <stdio.h> #include "chameleon/constants.h" #include "chameleon/types.h" diff --git a/runtime/starpu/codelets/codelet_convert.c b/runtime/starpu/codelets/codelet_convert.c index 8a8b34fccc628069e41d23ed3eb2ad11fa6942b7..2ce6ff8e888873ccfae5e1788f92b8f16118b0f4 100644 --- a/runtime/starpu/codelets/codelet_convert.c +++ b/runtime/starpu/codelets/codelet_convert.c @@ -16,7 +16,7 @@ * @date 2023-07-06 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelets.h" #define PRECISION_z #include "runtime_codelet_z.h" diff --git a/runtime/starpu/codelets/codelet_dlag2h.c b/runtime/starpu/codelets/codelet_dlag2h.c index f2106e3b6214d57db268a3edf4053693e3bd37b6..227d71ab21effd7ef098007d81aa75ef5d753d63 100644 --- a/runtime/starpu/codelets/codelet_dlag2h.c +++ b/runtime/starpu/codelets/codelet_dlag2h.c @@ -22,7 +22,7 @@ * @precisions normal d -> d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_d.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_dlag2z.c b/runtime/starpu/codelets/codelet_dlag2z.c index a3dc89cd5cb5f25183db641107d3a1092264d076..647b4f194b1d086712663a045e18fa17f2efe519 100644 --- a/runtime/starpu/codelets/codelet_dlag2z.c +++ b/runtime/starpu/codelets/codelet_dlag2z.c @@ -17,7 +17,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c index 3c4a6b4d80594b12fca50a9f938a2a49e10c25cf..cf0a81c2ffc95611a68bfda3092a8c11cc7048fb 100644 --- a/runtime/starpu/codelets/codelet_dzasum.c +++ b/runtime/starpu/codelets/codelet_dzasum.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_gemm.c b/runtime/starpu/codelets/codelet_gemm.c index 64cac09f4bea08a6eae21247394ade1f7bbe0fa4..718b191a525591112d5878ba9da706b79aa96d58 100644 --- a/runtime/starpu/codelets/codelet_gemm.c +++ b/runtime/starpu/codelets/codelet_gemm.c @@ -16,7 +16,7 @@ * @date 2024-03-11 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelets.h" #define PRECISION_z #include "runtime_codelet_z.h" diff --git a/runtime/starpu/codelets/codelet_gemmex.c b/runtime/starpu/codelets/codelet_gemmex.c index a7a76caaf5cbf003412b86e4f6ced8afbc37e084..68e8615a54c5c79909b900b4b6bd1baf14d9d78e 100644 --- a/runtime/starpu/codelets/codelet_gemmex.c +++ b/runtime/starpu/codelets/codelet_gemmex.c @@ -16,7 +16,7 @@ * @date 2023-07-06 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelets.h" CHAMELEON_CL_CB( gemmex, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */ diff --git a/runtime/starpu/codelets/codelet_hgemm.c b/runtime/starpu/codelets/codelet_hgemm.c index 325dd3aa59c3fab93ae94028ba7b0900b226e0e5..4456b974e5e06cd56cbc8cbdf460fe87a9b898de 100644 --- a/runtime/starpu/codelets/codelet_hgemm.c +++ b/runtime/starpu/codelets/codelet_hgemm.c @@ -16,7 +16,7 @@ * @date 2024-03-11 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelets.h" CHAMELEON_CL_CB( hgemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */ diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c index e5dba252a6312d625a825485cc84d0657973f435..5290dbabfc474d270ca116f513a5b15463daab50 100644 --- a/runtime/starpu/codelets/codelet_ipiv.c +++ b/runtime/starpu/codelets/codelet_ipiv.c @@ -16,7 +16,7 @@ * @date 2024-03-16 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelets.h" static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg) diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c index 2ef297b0d71e0152a5e997be70bf0880e2260046..982942cb5c48a1578d95a58eedc03d3f07780be1 100644 --- a/runtime/starpu/codelets/codelet_map.c +++ b/runtime/starpu/codelets/codelet_map.c @@ -15,7 +15,7 @@ * @date 2024-03-11 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_map_args_s { diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c index 3aa9e8523a395a0fe63ab22bbfdd58dfb6f038d3..df99e1e1bc4ebd183edda804c9dc60779cff5e60 100644 --- a/runtime/starpu/codelets/codelet_zaxpy.c +++ b/runtime/starpu/codelets/codelet_zaxpy.c @@ -19,7 +19,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c index f96db26a88ad966fcf3d4c41d4cf6a9d021bbbdd..5d59377cad2788af7e5629b622a7aa701341fa2d 100644 --- a/runtime/starpu/codelets/codelet_zbuild.c +++ b/runtime/starpu/codelets/codelet_zbuild.c @@ -27,7 +27,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index a5dca09b287bad72facf4e159a56e90b5d842f58..9f10f796c9c6ef2c57d9ce9d24d05503c8e2b131 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -20,7 +20,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if defined(PRECISION_z) || defined(PRECISION_c) diff --git a/runtime/starpu/codelets/codelet_zccallback.c b/runtime/starpu/codelets/codelet_zccallback.c index 534e4b047a179729299bbf031aa706322b93a33f..a1d3f893f6279d98045393bb59f5abced3126e29 100644 --- a/runtime/starpu/codelets/codelet_zccallback.c +++ b/runtime/starpu/codelets/codelet_zccallback.c @@ -19,7 +19,7 @@ * @precisions mixed zc -> ds * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_zc.h" CHAMELEON_CL_CB(zlag2c, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N) diff --git a/runtime/starpu/codelets/codelet_zcesca.c b/runtime/starpu/codelets/codelet_zcesca.c index 25708af1c25b4673c6e127ac9c64fc0f8829f088..0549b5906ed0bccd17a0edefd2ed114aa1fff7b2 100644 --- a/runtime/starpu/codelets/codelet_zcesca.c +++ b/runtime/starpu/codelets/codelet_zcesca.c @@ -15,7 +15,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zcesca_args_s { diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c index 37c6a78790701f77ce2937cb8c61f2531af610c1..5bfaa5252e1c02ef4a8390c810fa9cc9cc885803 100644 --- a/runtime/starpu/codelets/codelet_zgeadd.c +++ b/runtime/starpu/codelets/codelet_zgeadd.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 3727a8f52e9fcb789dc5deb4ea89b398b732bc81..357d73ef179a4903c4731c7a48b9050a174ca792 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c index 9a5dbcc326859044fbbeb7065d2e51c48e5c8f07..f1d684d1899d915ba40bee4c0e9f3e9cd91e86ac 100644 --- a/runtime/starpu/codelets/codelet_zgemm.c +++ b/runtime/starpu/codelets/codelet_zgemm.c @@ -28,7 +28,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgemv.c b/runtime/starpu/codelets/codelet_zgemv.c index 35830c312217dab4f7b8ca1162f281cf4b7b0033..b951c8686ef5c952e4bbda3c863ca15a6fbef44e 100644 --- a/runtime/starpu/codelets/codelet_zgemv.c +++ b/runtime/starpu/codelets/codelet_zgemv.c @@ -17,7 +17,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index bb350b8d4f953da08e8e4872231cc991214d41a6..efdd162a62d25d1d3a035c2bbdb0df805a839c34 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c index fe1c4927ef525aa24dd53a6b83f22d3c5e9959f4..d7a132200d603dca976f5c3d90ecf69cd2a33ab3 100644 --- a/runtime/starpu/codelets/codelet_zgered.c +++ b/runtime/starpu/codelets/codelet_zgered.c @@ -18,7 +18,7 @@ * @precisions normal z -> d * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include <coreblas/lapacke.h> #include "runtime_codelet_zc.h" #include "runtime_codelet_z.h" diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c index 9a5c825f149c171dd2ad14f812d6bab7ed926546..f0fbdc1a40cffc2f88f46f8cb32807acb0cd5720 100644 --- a/runtime/starpu/codelets/codelet_zgerst.c +++ b/runtime/starpu/codelets/codelet_zgerst.c @@ -16,7 +16,7 @@ * @precisions normal z -> d * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include <coreblas/lapacke.h> #include "runtime_codelet_zc.h" #include "runtime_codelet_z.h" diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c index dd44fb9f61150ebf4219af469b79bfd779cf8085..8e5223087f8ecc53bd2c06016521fc42a815618b 100644 --- a/runtime/starpu/codelets/codelet_zgersum.c +++ b/runtime/starpu/codelets/codelet_zgersum.c @@ -19,7 +19,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 4c2f1fb1b51671bce652acdd344ef11b4a2cb8ea..bca40f046c0ae633429ccd20aee1cb5ce1ec579d 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c index a951ac6bc1248f74ed52bfc3c42cc3076614ecac..5973286920582f4cc16297670834ff5409494a98 100644 --- a/runtime/starpu/codelets/codelet_zgessq.c +++ b/runtime/starpu/codelets/codelet_zgessq.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgesum.c b/runtime/starpu/codelets/codelet_zgesum.c index a301533d1de006ab7e2cb07eb03d57df60f9b952..13a07b920bf994327983e525e06320b705794cde 100644 --- a/runtime/starpu/codelets/codelet_zgesum.c +++ b/runtime/starpu/codelets/codelet_zgesum.c @@ -15,7 +15,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zgesum_args_s { diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c index 181f57df0a156220595ad64f7f657614e4294958..f55d866043bbc4c765318ccba6d832ad24bc4d59 100644 --- a/runtime/starpu/codelets/codelet_zgetrf.c +++ b/runtime/starpu/codelets/codelet_zgetrf.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgetrf_batched.c b/runtime/starpu/codelets/codelet_zgetrf_batched.c index d9c55d76cd3fa290ab004ebc854e3d5f4638cf93..3a96d1e9a96059554fbaafeb7b7e74472d1d967c 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_batched.c +++ b/runtime/starpu/codelets/codelet_zgetrf_batched.c @@ -21,9 +21,8 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" -#include <coreblas/cblas_wrapper.h> struct cl_getrf_batched_args_t { const char *cl_name; diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c index 8739f27deb22f8ba019fa85338c4fdcbc0a0d789..944759fa27f71a3482181be0580872d19d990dc4 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c +++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c @@ -19,9 +19,8 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" -#include <coreblas/cblas_wrapper.h> CHAMELEON_CL_CB( zgetrf_blocked_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ); CHAMELEON_CL_CB( zgetrf_blocked_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M ); diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index 4ff218c4c0f8cd050cc63ac921e29d822255c594..899046a2441ddda529a173c39aee7099532019d0 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index 3c0bccc80158b41ef3bbc7a2b732f0dcc478bd64..842dcfae5ea18fc3a0e9d87ee566fd19dd8b23f1 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -23,7 +23,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" /* diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c index 5643f3e638c5c6f45adb57d6c4518ec5eabd6d75..d55e50e3beebc69ef2151b35e44acd80f72e8c70 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c @@ -17,9 +17,8 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" -#include <coreblas/cblas_wrapper.h> CHAMELEON_CL_CB( zgetrf_nopiv_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ); CHAMELEON_CL_CB( zgetrf_nopiv_percol_trsm, cti_handle_get_m(task->handles[0]), 0, 0, M ); diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c index 0b556f81605a9cc78faea6fa6e312ffc0e643631..df84a84260708598630f3ef00346c042e4af5f08 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_percol.c +++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c @@ -19,9 +19,8 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" -#include <coreblas/cblas_wrapper.h> CHAMELEON_CL_CB( zgetrf_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M ); CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M ); diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c index d43e7884f99e868600338a9c9f349ef8b0d97bf6..feb1af5dc295cd39c8cf2edced92a864a7f4c2a3 100644 --- a/runtime/starpu/codelets/codelet_zgram.c +++ b/runtime/starpu/codelets/codelet_zgram.c @@ -17,7 +17,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c index 19af61a01076047ff6d00aa28a66a985c6642754..83e945da165df5158336baa2b447afa2b30b6ab7 100644 --- a/runtime/starpu/codelets/codelet_zhe2ge.c +++ b/runtime/starpu/codelets/codelet_zhe2ge.c @@ -20,7 +20,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c index 7d882b0345224aff4a049e59e45b75328d9e3380..3766fdfbd40a71d9da11e3817d77a2995e2fcb64 100644 --- a/runtime/starpu/codelets/codelet_zhemm.c +++ b/runtime/starpu/codelets/codelet_zhemm.c @@ -24,7 +24,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zhemm_args_s { diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c index 400f14612eedc17699f313def07115ebd6452083..974ea61221403f3a5688b773d5701b6926ef6ea7 100644 --- a/runtime/starpu/codelets/codelet_zher2k.c +++ b/runtime/starpu/codelets/codelet_zher2k.c @@ -24,7 +24,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c index f7c168edda1839004c51689971eedf7c15b615b3..f59ec51641f1c0e4893664123823f84f7f37522e 100644 --- a/runtime/starpu/codelets/codelet_zherfb.c +++ b/runtime/starpu/codelets/codelet_zherfb.c @@ -20,7 +20,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c index 243d8346c148cd2af465479b36d96087911481eb..09395d98fb5f2a28260f74ed90aedcad17ccd962 100644 --- a/runtime/starpu/codelets/codelet_zherk.c +++ b/runtime/starpu/codelets/codelet_zherk.c @@ -25,7 +25,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zherk_args_s { diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c index e256d530242f9b5a1ed0f2b0f6ac764d3403f641..1f951c8b61939f7bbd8456198c0c66ff91014d46 100644 --- a/runtime/starpu/codelets/codelet_zhessq.c +++ b/runtime/starpu/codelets/codelet_zhessq.c @@ -20,7 +20,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" void INSERT_TASK_zhessq( const RUNTIME_option_t *options, diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c index 13a41ceb04be76b2f89419a20bd6209d3aebd6e3..f296bb3df7433bf6eb8d8c47e468b262ee741f16 100644 --- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c +++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c @@ -15,9 +15,8 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" -#include <coreblas/cblas_wrapper.h> #if defined(CHAMELEON_USE_MPI) struct cl_redux_args_t { diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index 2a3d9781e676d85507d0135848b25211d04e263a..2d227b37b7167fa28f687652cdcc4b5e734b3299 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -25,7 +25,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zlacpy_args_s { diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c index 1a56bc748671464a452e8dbcb29aff43630ce060..d95671b6b8db065d088eb975b89053d67cd630f3 100644 --- a/runtime/starpu/codelets/codelet_zlag2c.c +++ b/runtime/starpu/codelets/codelet_zlag2c.c @@ -22,7 +22,7 @@ * @precisions mixed zc -> ds * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_zc.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c index 01ac3d111d591531423bc495ba3e40e6ece46bdc..d6924c836b120eaaf80f7fe6a07f005593cb7894 100644 --- a/runtime/starpu/codelets/codelet_zlange.c +++ b/runtime/starpu/codelets/codelet_zlange.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c index 3a12a9befcd4c6e5fed4a6b26756e3d5081ebc76..1477286e6c00e963d00c618a83a374d8cfa18afe 100644 --- a/runtime/starpu/codelets/codelet_zlanhe.c +++ b/runtime/starpu/codelets/codelet_zlanhe.c @@ -24,7 +24,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c index d07dd10b7dcb480fb3e3def31c66600315553e7d..d602b34feb86b3e95755af04c91aa94854199ebf 100644 --- a/runtime/starpu/codelets/codelet_zlansy.c +++ b/runtime/starpu/codelets/codelet_zlansy.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c index cee8c0b7f7447dc95f320d2ec8bd59e629d4d9b0..565580d880bd3e8ac258c3dcde02a68612162a2f 100644 --- a/runtime/starpu/codelets/codelet_zlantr.c +++ b/runtime/starpu/codelets/codelet_zlantr.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c index 1df2a126731effd96cb59047bdedcb009b4b5f3e..ab76d808ee3b0c4c8c13d452b165eff718508a03 100644 --- a/runtime/starpu/codelets/codelet_zlascal.c +++ b/runtime/starpu/codelets/codelet_zlascal.c @@ -21,7 +21,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zlascal_args_s { diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c index 19f77a3520c70ab70e862e9da460e030c71ca8b9..e2c168968c3505e4d7ec7e2f069b7800e0f6d5d2 100644 --- a/runtime/starpu/codelets/codelet_zlaset.c +++ b/runtime/starpu/codelets/codelet_zlaset.c @@ -23,7 +23,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zlaset_args_s { diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c index deef53db08adf0af79411f1bd7f62aa5673f1d48..29967f340cd21f9cf89ef01174b3281de273a3d6 100644 --- a/runtime/starpu/codelets/codelet_zlaset2.c +++ b/runtime/starpu/codelets/codelet_zlaset2.c @@ -25,7 +25,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlaswp.c b/runtime/starpu/codelets/codelet_zlaswp.c index 4ffa8384cdec2faab1f264571cc3757263cee138..ade365c68ff52757a11b9c8077d14ce28e7208d0 100644 --- a/runtime/starpu/codelets/codelet_zlaswp.c +++ b/runtime/starpu/codelets/codelet_zlaswp.c @@ -16,7 +16,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c index 9ce1578774c8a5581506978eb4947b71fa2536bf..e5a2c67d9b1b18684d7a7ffd5aa8d74f6eab6398 100644 --- a/runtime/starpu/codelets/codelet_zlatro.c +++ b/runtime/starpu/codelets/codelet_zlatro.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index 34a6f3657e810520cae6cb276360a65f8c858c8c..8ed5a979ca52ca235ea7eb81f8bd0c5afcd409cf 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zlauum_args_s { diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c index 82c3a6511f6cd5f7ef00470abb15992f065d87a9..b93fd808d01f79f7f301e8bb91ae2a315fdfb481 100644 --- a/runtime/starpu/codelets/codelet_zplghe.c +++ b/runtime/starpu/codelets/codelet_zplghe.c @@ -24,7 +24,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zplghe_args_s { diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c index dfb38163ce6769ecabca7dc1c6d0cf7ffe3fd8a0..36f42b355a1d5f7110cae098d23b95ba007f5aa7 100644 --- a/runtime/starpu/codelets/codelet_zplgsy.c +++ b/runtime/starpu/codelets/codelet_zplgsy.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zplgsy_args_s { diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c index 343793a086a351b5f36533e5527579d976605ca0..5b104fd45fd36f599432e432db55a6ba084d42c3 100644 --- a/runtime/starpu/codelets/codelet_zplrnt.c +++ b/runtime/starpu/codelets/codelet_zplrnt.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zplrnt_args_s { diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c index 15f3bb16e0f38a19189d8f3bcab4927c74201811..ab1457e5eaa41f065f69f4553543eb044db5aa40 100644 --- a/runtime/starpu/codelets/codelet_zplssq.c +++ b/runtime/starpu/codelets/codelet_zplssq.c @@ -21,7 +21,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index 5c9e183b71d83663a522f27d3ccab6e3af7db5cf..929c9b75dfa172dea2eb1cb79fe25496adbcba26 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -25,7 +25,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zpotrf_args_s { diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index 56b3fd54c253f04ea1037d319283ba71021841be..d7172e5254a8ba1a2e8fcabc75b2ee9fd3571ecb 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c index 86dd2753c38fadec0dfe9f7135f02027040c0969..b5b09b1852aebdec4d13c1d09eeacea55a39b80b 100644 --- a/runtime/starpu/codelets/codelet_zsymm.c +++ b/runtime/starpu/codelets/codelet_zsymm.c @@ -25,7 +25,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zsymm_args_s { diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c index 2c2d35c2fb9a557719be7c6058f52e0fd1c14f1f..9428811f680debc4eef62616c036d57a03efcf5c 100644 --- a/runtime/starpu/codelets/codelet_zsyr2k.c +++ b/runtime/starpu/codelets/codelet_zsyr2k.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c index 0bd8f348f4b2e3bb2a73bc0b69b2f65326bfacee..f0005e36529aa851df7c6a84642b68319a8b4d40 100644 --- a/runtime/starpu/codelets/codelet_zsyrk.c +++ b/runtime/starpu/codelets/codelet_zsyrk.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_zsyrk_args_s { diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c index 53815341b125cbb97d51c47941ffa970b0d172f9..adcbc009b683c8e3b4070f15af7d4e5aee697029 100644 --- a/runtime/starpu/codelets/codelet_zsyssq.c +++ b/runtime/starpu/codelets/codelet_zsyssq.c @@ -21,7 +21,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c index 44d1bb95e8151135cb96824d9d12f137d5f12fd3..dd29deac7c4c1d126718ffe675907b08b178ca4a 100644 --- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c @@ -25,7 +25,7 @@ * @precisions normal z -> c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c index 7bef4104106c32c6bdda54573a5b9ac8539c9bf8..186665b03f0657cb1157e4560b0feb7b6c82693f 100644 --- a/runtime/starpu/codelets/codelet_ztplqt.c +++ b/runtime/starpu/codelets/codelet_ztplqt.c @@ -19,7 +19,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c index 62b0e5d208d6dc2f18e3ce00e274d7bde7cc0852..685605a0f991cc74428d29c698122da34b3624d0 100644 --- a/runtime/starpu/codelets/codelet_ztpmlqt.c +++ b/runtime/starpu/codelets/codelet_ztpmlqt.c @@ -17,7 +17,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index e2e0349bf0f31bdef5fdd99209500de7d1a96f74..5d992f7a1c56e40ef5a809af830e9cdb678f050d 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -17,7 +17,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index df369b0b26ad82ec029094dd5401853109f8d1fa..65cd6d6d371f1fa2dd90eafc572c98ef856f7108 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -20,7 +20,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c index 62f1424d5d56b31fb07e8d2052ceffaa74ba9491..101bf72a38c61aa9989c39c9bb341c028ecb2323 100644 --- a/runtime/starpu/codelets/codelet_ztradd.c +++ b/runtime/starpu/codelets/codelet_ztradd.c @@ -20,7 +20,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_ztradd_args_s { diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c index c0f14b574f70f44dee0b4914074556dc795c3f0e..6c7ffd5c5c36b1cf1b0612aad43071177baf8b12 100644 --- a/runtime/starpu/codelets/codelet_ztrasm.c +++ b/runtime/starpu/codelets/codelet_ztrasm.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c index 13341ad0b51a5ceff2c3dcc8a5324b44d8964160..ced241e54df8bf789e8406d2ea4df4c22d5bde6f 100644 --- a/runtime/starpu/codelets/codelet_ztrmm.c +++ b/runtime/starpu/codelets/codelet_ztrmm.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_ztrmm_args_s { diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c index c6e92447583a3773c21ed16d0e7dfb6e6f67aef1..f20b7c5ec7b8469c4a9ac4d65b428da34134d9dc 100644 --- a/runtime/starpu/codelets/codelet_ztrsm.c +++ b/runtime/starpu/codelets/codelet_ztrsm.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_ztrsm_args_s { diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c index 23e1c525a031a58bce8f19cbde50c6c99e46d58c..186e34e24187ed24090b5e77ae99c3f45ce2ca30 100644 --- a/runtime/starpu/codelets/codelet_ztrssq.c +++ b/runtime/starpu/codelets/codelet_ztrssq.c @@ -21,7 +21,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index e9e000753494781624bfb1036d84b61c0c8d6a1a..5582aac9fbce8872dafd5a92fd07304596fa4c41 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -24,7 +24,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" struct cl_ztrtri_args_s { diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c index 4aed2ed5a590fdfdc7c88f6e0005471e5735d4cb..1edd2d910a6b26e05ee37bbe0cb497053c7df265 100644 --- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c index ca9798845c33444be582a9bdaac3fed9a2da254a..1b73994231e7217abc1b1d2b153d44008eb821cb 100644 --- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c +++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c @@ -22,7 +22,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index e9743232bd8a658e0f0aef9c317f3e618a8377a9..9140671e52a93786da4a22bd6013345d5f1e39be 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index dd4251dcc5d1660fb314da81a35f66ee6b155ea9..f4f2cb09910d5c620ab7c58600ecaf2e2042c62c 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -27,7 +27,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index 4640a678e21e58d3f30e7b1b4ba16bc29939fd1b..8f2b77e0596ce710cca31f210d97c7ea77e4dbfe 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -26,7 +26,7 @@ * @precisions normal z -> c d s * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if !defined(CHAMELEON_SIMULATION) diff --git a/runtime/starpu/control/runtime_async.c b/runtime/starpu/control/runtime_async.c index ea19203fcf95effe511255ff690ccdb7810f28eb..4133aeefd3088338862add96d3343773b20fef1e 100644 --- a/runtime/starpu/control/runtime_async.c +++ b/runtime/starpu/control/runtime_async.c @@ -19,7 +19,7 @@ * @date 2024-03-16 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" /** * Create a sequence @@ -93,9 +93,9 @@ int RUNTIME_request_create( CHAM_context_t *chamctxt, { (void)chamctxt; /* allocate schedopt */ - request->schedopt = (starpu_option_request_t*)malloc(sizeof(starpu_option_request_t)); + request->schedopt = (RUNTIME_request_starpu_t*)malloc(sizeof(RUNTIME_request_starpu_t)); /* initialize schedopt */ - starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt); + RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt); /* default is to not use "execute_on_a_specific_worker" i.e. -1 */ schedopt->workerid = -1; request->status = CHAMELEON_SUCCESS; @@ -124,7 +124,7 @@ int RUNTIME_request_set( CHAM_context_t *chamctxt, chameleon_error("RUNTIME_request_set", "request not initialized"); return CHAMELEON_ERR_NOT_INITIALIZED; } - starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt); + RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt); switch ( param ) { case CHAMELEON_REQUEST_WORKERID: diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c index 56966d8cbfd34660b97b2237f59833e4ad901f25..65f7f663a90e3dc120d3a570eeaf0afdbfb966a2 100644 --- a/runtime/starpu/control/runtime_context.c +++ b/runtime/starpu/control/runtime_context.c @@ -21,7 +21,7 @@ * @date 2022-02-22 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" /** * @brief Store the status of some flags to knwo when enable/disable them @@ -47,7 +47,7 @@ void RUNTIME_context_create( CHAM_context_t *chamctxt ) chamctxt->scheduler = RUNTIME_SCHED_STARPU; if ( !starpu_is_initialized() ) { - starpu_sched_opt_t *sched_opt = malloc( sizeof(starpu_sched_opt_t) ); + CHAM_context_starpu_t *sched_opt = malloc( sizeof(CHAM_context_starpu_t) ); sched_opt->pw_config = NULL; starpu_conf_init( &(sched_opt->starpu_conf) ); diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index 96e1c3ff7cbad2e0c5336a2804ab5d98e08ee56d..c2cb79397c0605f5e18acdd86ffba59bd90fe5b2 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -24,7 +24,7 @@ * @date 2024-03-16 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include <stdio.h> #include <stdlib.h> #if defined(STARPU_USE_FXT) @@ -34,7 +34,7 @@ static int starpu_initialized = 0; #if defined(STARPU_HAVE_HWLOC) && defined(HAVE_STARPU_PARALLEL_WORKER) -void chameleon_starpu_parallel_worker_init( starpu_sched_opt_t *sched_opt ) +void chameleon_starpu_parallel_worker_init( CHAM_context_starpu_t *sched_opt ) { char *env_pw_level = chameleon_getenv( "CHAMELEON_PARALLEL_WORKER_LEVEL" ); @@ -85,7 +85,7 @@ void chameleon_starpu_parallel_worker_init( starpu_sched_opt_t *sched_opt ) chameleon_cleanenv( env_pw_level ); } -void chameleon_starpu_parallel_worker_fini( starpu_sched_opt_t *sched_opt ) +void chameleon_starpu_parallel_worker_fini( CHAM_context_starpu_t *sched_opt ) { if ( sched_opt->pw_config != NULL ) { starpu_parallel_worker_shutdown( sched_opt->pw_config ); @@ -149,7 +149,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt, int ncudas, int nthreads_per_worker ) { - starpu_sched_opt_t *sched_opt = (starpu_sched_opt_t*)(chamctxt->schedopt); + CHAM_context_starpu_t *sched_opt = (CHAM_context_starpu_t*)(chamctxt->schedopt); struct starpu_conf *conf = &sched_opt->starpu_conf; int hres = CHAMELEON_ERR_NOT_INITIALIZED; @@ -248,7 +248,7 @@ void RUNTIME_finalize( CHAM_context_t *chamctxt ) return; } - starpu_sched_opt_t *sched_opt = (starpu_sched_opt_t*)(chamctxt->schedopt); + CHAM_context_starpu_t *sched_opt = (CHAM_context_starpu_t*)(chamctxt->schedopt); chameleon_starpu_parallel_worker_fini( sched_opt ); starpu_cham_tile_interface_fini(); diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index e00b75badbc9a0108f55c4b5a54b6f7160e5b11c..2cb65a53232db5665a47838ba974def4b9de122c 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -23,7 +23,7 @@ * @date 2024-07-17 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" /** * Malloc/Free of the data diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c index 1ad0f7a142fd9272a3ffb445dd797db774959d60..e8cc1a1b3500ab1a742563bc9d3ad86a00929e12 100644 --- a/runtime/starpu/control/runtime_descriptor_ipiv.c +++ b/runtime/starpu/control/runtime_descriptor_ipiv.c @@ -17,7 +17,7 @@ * @date 2024-03-16 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" /** * Create ws_pivot runtime structures diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c index 444975b497849f9d9f1a4052ecfe5a0323d29198..8ec2551f59d59e94c0eb763196576911db49d0cc 100644 --- a/runtime/starpu/control/runtime_options.c +++ b/runtime/starpu/control/runtime_options.c @@ -19,12 +19,12 @@ * @date 2023-07-04 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt); + RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt); options->sequence = sequence; options->request = request; options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; diff --git a/runtime/starpu/control/runtime_profiling.c b/runtime/starpu/control/runtime_profiling.c index 9ed7566df9d7f87281cf007285dcf7361248fb9d..1baed8e3d0390a7068fd6ad37d5a8bff0880f721 100644 --- a/runtime/starpu/control/runtime_profiling.c +++ b/runtime/starpu/control/runtime_profiling.c @@ -20,7 +20,7 @@ * @date 2022-02-22 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include <math.h> #if defined(STARPU_USE_FXT) #include <starpu_fxt.h> diff --git a/runtime/starpu/control/runtime_tags.c b/runtime/starpu/control/runtime_tags.c index 031a556641fd4a59b57f1dd78a548d84e0787628..cc05a73afcc3b07e24d63c77d4ecc88421292490 100644 --- a/runtime/starpu/control/runtime_tags.c +++ b/runtime/starpu/control/runtime_tags.c @@ -17,7 +17,7 @@ * @{ * **/ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #if defined(CHAMELEON_USE_MPI) diff --git a/runtime/starpu/control/runtime_workspace.c b/runtime/starpu/control/runtime_workspace.c index 5625bb876383a9b5f38d227ff9b39e99fe6f47a3..82bed164954cff9152d220298c0c4b163c795a09 100644 --- a/runtime/starpu/control/runtime_workspace.c +++ b/runtime/starpu/control/runtime_workspace.c @@ -19,7 +19,7 @@ * @date 2023-01-30 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #if defined(CHAMELEON_USE_CUDA) #define GPU_WORKER_TYPE STARPU_CUDA_WORKER diff --git a/runtime/starpu/control/runtime_zlocality.c b/runtime/starpu/control/runtime_zlocality.c index f0c9e375d26133a41e0c28e7df3f05587d338649..e906a874bd0d5aab61ed8e5c1a9b3cad84cbf3cd 100644 --- a/runtime/starpu/control/runtime_zlocality.c +++ b/runtime/starpu/control/runtime_zlocality.c @@ -20,7 +20,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" #if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP) diff --git a/runtime/starpu/control/runtime_zprofiling.c b/runtime/starpu/control/runtime_zprofiling.c index f771cce8a7f32816f4a07182a0501ca909de6c82..10ad6a2cf1debc2149b61c654277947f6cb674c4 100644 --- a/runtime/starpu/control/runtime_zprofiling.c +++ b/runtime/starpu/control/runtime_zprofiling.c @@ -20,7 +20,7 @@ * @precisions normal z -> s d c * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #include "runtime_codelet_z.h" void RUNTIME_zdisplay_allprofile() diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in index 6c4632da84520449a2e2c9f96fedef2209d196e9..44d03baa53c167871669b744dd6dac105b1810b1 100644 --- a/runtime/starpu/include/chameleon_starpu.h.in +++ b/runtime/starpu/include/chameleon_starpu.h.in @@ -26,7 +26,7 @@ #ifndef _chameleon_starpu_h_ #define _chameleon_starpu_h_ -#include "control/common.h" +#include <chameleon.h> /* StarPU options */ #cmakedefine HAVE_STARPU_IDLE_PREFETCH @@ -93,173 +93,4 @@ # endif #endif -#include "control/common.h" -#include "runtime_codelets.h" -#include "runtime_profiling.h" -#include "runtime_codelet_profile.h" -#include "runtime_workspace.h" -#include "cham_tile_interface.h" -#include "cppi_interface.h" - -typedef struct starpu_schedopt_s -{ - struct starpu_conf starpu_conf; /**< StarPU main configuration structure */ - struct starpu_parallel_worker_config *pw_config; /**< StarPU parallel workers configuration */ -} starpu_sched_opt_t; - -/* Structure used to give some options during one request (procedure) */ -typedef struct starpu_option_request_s { - int workerid; // to force task execution on a specific workerid -} starpu_option_request_t; - -/**/ - -static inline int cham_to_starpu_access( cham_access_t accessA ) { - assert( (enum starpu_data_access_mode)ChamR == STARPU_R ); - assert( (enum starpu_data_access_mode)ChamW == STARPU_W ); - assert( (enum starpu_data_access_mode)ChamRW == STARPU_RW ); - return accessA; -} - -void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, - cham_access_t access, cham_flttype_t flttype, - const CHAM_desc_t *A, int m, int n ); - -/* - * MPI Redefinitions - */ -#if defined(CHAMELEON_USE_MPI) - -#if defined(CHAMELEON_RUNTIME_SYNC) -#define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) -#else -#define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ ) -#endif - -#else - -#if defined(CHAMELEON_RUNTIME_SYNC) -#define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) -#else -#define rt_starpu_insert_task( _codelet_, ... ) \ - starpu_insert_task( (_codelet_), ##__VA_ARGS__ ) -#endif - -#endif - -#if defined(CHAMELEON_RUNTIME_SYNC) -#define rt_shm_starpu_insert_task( _codelet_, ... ) \ - starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) -#else -#define rt_shm_starpu_insert_task( _codelet_, ... ) \ - starpu_insert_task( (_codelet_), ##__VA_ARGS__ ) -#endif - -/* - * Enable codelets names - */ -#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1)) -#define CHAMELEON_CODELETS_HAVE_NAME -#endif - -/** - * MPI tag management - */ -void chameleon_starpu_tag_init( ); -int64_t chameleon_starpu_tag_book( int64_t nbtags ); -void chameleon_starpu_tag_release( int64_t min ); - -/** - * Access to block pointer and leading dimension - */ -#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_data_getaddr( desc, m, n ) ) - -void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp); - -#include "runtime_mpi.h" -#include "runtime_wontuse.h" - -#if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE) -static inline int -chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n) -{ - int64_t mm = m + (A->i / A->mb); - int64_t nn = n + (A->j / A->nb); - - starpu_data_handle_t *ptrtile = A->schedopt; - ptrtile += ((int64_t)A->lmt) * nn + mm; - - if (!(*ptrtile)) { - return 0; - } - - return starpu_mpi_cached_receive(*ptrtile); -} - -#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do { \ - if (chameleon_starpu_data_iscached(A, Am, An)) __chameleon_need_submit = 1; } while(0) - -#else - -#if defined(CHAMELEON_USE_MPI) -#warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater" -#endif -#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {} while (0) - -#endif - -#ifdef CHAMELEON_ENABLE_PRUNING_STATS - -#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \ - int __chameleon_exec = 0; \ - int __chameleon_changed = 0; - -#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \ - if (chameleon_desc_islocal(A, Am, An)) \ - __chameleon_exec = 1; - -#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \ - RUNTIME_total_tasks++; \ - if (__chameleon_exec) \ - RUNTIME_exec_tasks++; \ - else if (__chameleon_need_submit) \ - RUNTIME_comm_tasks++; \ - else if (__chameleon_changed) \ - RUNTIME_changed_tasks++; - -#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \ - int __chameleon_myrank; \ - RUNTIME_comm_rank(&__chameleon_myrank); \ - __chameleon_exec = (rank) == __chameleon_myrank; \ - __chameleon_changed = 1; \ - -#else -#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION -#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) -#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION -#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) -#endif - -#define RUNTIME_BEGIN_ACCESS_DECLARATION \ - RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION - -#define RUNTIME_ACCESS_R(A, Am, An) - -#define RUNTIME_ACCESS_W(A, Am, An) \ - RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ - RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) - -#define RUNTIME_ACCESS_RW(A, Am, An) \ - RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ - RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) - -#define RUNTIME_RANK_CHANGED(rank) \ - RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) - -#define RUNTIME_END_ACCESS_DECLARATION \ - RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION; - #endif /* _chameleon_starpu_h_ */ diff --git a/runtime/starpu/include/chameleon_starpu_internal.h b/runtime/starpu/include/chameleon_starpu_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..7ffa39bb2940f683cc75e2402753ae37abcde79b --- /dev/null +++ b/runtime/starpu/include/chameleon_starpu_internal.h @@ -0,0 +1,208 @@ +/** + * + * @file starpu/chameleon_starpu_internal.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU runtime header + * + * @version 1.3.0 + * @author Mathieu Faverge + * @author Cedric Castagnede + * @author Florent Pruvost + * @author Philippe Swartvagher + * @author Samuel Thibault + * @author Loris Lucido + * @author Terry Cojean + * @author Matthieu Kuhn + * @date 2024-03-16 + * + */ +#ifndef _chameleon_starpu_internal_h_ +#define _chameleon_starpu_internal_h_ + +#include "control/common.h" +#include "chameleon_starpu.h" + +/* Chameleon interfaces for StarPU */ +#include "cham_tile_interface.h" +#include "cppi_interface.h" + +/** + * @brief StarPU specific Chameleon structure stored in the schedopt field + */ +typedef struct CHAM_context_starpu_s +{ + struct starpu_conf starpu_conf; /**< StarPU main configuration structure */ + struct starpu_parallel_worker_config *pw_config; /**< StarPU parallel workers configuration */ +} CHAM_context_starpu_t; + +/** + * @brief StarPU specific request field stored in the schedopt field + */ +typedef struct RUNTIME_request_starpu_s { + int workerid; // to force task execution on a specific workerid +} RUNTIME_request_starpu_t; + +/** + * @brief Convert the Chameleon access enum to the StarPU one + */ +static inline int cham_to_starpu_access( cham_access_t accessA ) { + assert( (enum starpu_data_access_mode)ChamR == STARPU_R ); + assert( (enum starpu_data_access_mode)ChamW == STARPU_W ); + assert( (enum starpu_data_access_mode)ChamRW == STARPU_RW ); + return accessA; +} + +#include "runtime_codelets.h" +#include "runtime_profiling.h" +#include "runtime_codelet_profile.h" +#include "runtime_workspace.h" + +void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options, + cham_access_t access, cham_flttype_t flttype, + const CHAM_desc_t *A, int m, int n ); + +/* + * MPI Redefinitions + */ +#if defined(CHAMELEON_USE_MPI) + +#if defined(CHAMELEON_RUNTIME_SYNC) +#define rt_starpu_insert_task( _codelet_, ... ) \ + starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) +#else +#define rt_starpu_insert_task( _codelet_, ... ) \ + starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ ) +#endif + +#else + +#if defined(CHAMELEON_RUNTIME_SYNC) +#define rt_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) +#else +#define rt_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), ##__VA_ARGS__ ) +#endif + +#endif + +#if defined(CHAMELEON_RUNTIME_SYNC) +#define rt_shm_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ ) +#else +#define rt_shm_starpu_insert_task( _codelet_, ... ) \ + starpu_insert_task( (_codelet_), ##__VA_ARGS__ ) +#endif + +/* + * Enable codelets names + */ +#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1)) +#define CHAMELEON_CODELETS_HAVE_NAME +#endif + +/** + * MPI tag management + */ +void chameleon_starpu_tag_init( ); +int64_t chameleon_starpu_tag_book( int64_t nbtags ); +void chameleon_starpu_tag_release( int64_t min ); + +/** + * Access to block pointer and leading dimension + */ +#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_data_getaddr( desc, m, n ) ) + +void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp); + +#include "runtime_mpi.h" +#include "runtime_wontuse.h" + +#if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE) +static inline int +chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n) +{ + int64_t mm = m + (A->i / A->mb); + int64_t nn = n + (A->j / A->nb); + + starpu_data_handle_t *ptrtile = A->schedopt; + ptrtile += ((int64_t)A->lmt) * nn + mm; + + if (!(*ptrtile)) { + return 0; + } + + return starpu_mpi_cached_receive(*ptrtile); +} + +#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do { \ + if (chameleon_starpu_data_iscached(A, Am, An)) __chameleon_need_submit = 1; } while(0) + +#else + +#if defined(CHAMELEON_USE_MPI) +#warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater" +#endif +#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {} while (0) + +#endif + +#ifdef CHAMELEON_ENABLE_PRUNING_STATS + +#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \ + int __chameleon_exec = 0; \ + int __chameleon_changed = 0; + +#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \ + if (chameleon_desc_islocal(A, Am, An)) \ + __chameleon_exec = 1; + +#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \ + RUNTIME_total_tasks++; \ + if (__chameleon_exec) \ + RUNTIME_exec_tasks++; \ + else if (__chameleon_need_submit) \ + RUNTIME_comm_tasks++; \ + else if (__chameleon_changed) \ + RUNTIME_changed_tasks++; + +#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \ + int __chameleon_myrank; \ + RUNTIME_comm_rank(&__chameleon_myrank); \ + __chameleon_exec = (rank) == __chameleon_myrank; \ + __chameleon_changed = 1; \ + +#else +#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION +#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) +#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION +#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) +#endif + +#define RUNTIME_BEGIN_ACCESS_DECLARATION \ + RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION + +#define RUNTIME_ACCESS_R(A, Am, An) + +#define RUNTIME_ACCESS_W(A, Am, An) \ + RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ + RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) + +#define RUNTIME_ACCESS_RW(A, Am, An) \ + RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An); \ + RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) + +#define RUNTIME_RANK_CHANGED(rank) \ + RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) + +#define RUNTIME_END_ACCESS_DECLARATION \ + RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION; + +#endif /* _chameleon_starpu_internal_h_ */ diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c index ff25fde78dd7a49816f13813b1994920f822113e..c1bca1ec918d4b7ea3a4d61eba0c89062734ba83 100644 --- a/runtime/starpu/interface/cham_tile_interface.c +++ b/runtime/starpu/interface/cham_tile_interface.c @@ -18,7 +18,7 @@ * @date 2024-07-17 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #if defined(CHAMELEON_USE_HMATOSS) #include "coreblas/hmat.h" diff --git a/runtime/starpu/interface/cppi_interface.c b/runtime/starpu/interface/cppi_interface.c index 6b1f8063180e78dbebf1ea443ee28f98920a7723..862ea7545b4bdb629047628c49e23014ab362a50 100644 --- a/runtime/starpu/interface/cppi_interface.c +++ b/runtime/starpu/interface/cppi_interface.c @@ -16,7 +16,7 @@ * @date 2023-08-22 * */ -#include "chameleon_starpu.h" +#include "chameleon_starpu_internal.h" #undef HAVE_STARPU_REUSE_DATA_ON_NODE CHAM_pivot_t * diff --git a/tools/check_header.sh b/tools/check_header.sh index 2348bee3d7e780c075572cfb47e71813d9fa6de2..acae8a8476fdb528a8689f789666b5ecf7bac669 100755 --- a/tools/check_header.sh +++ b/tools/check_header.sh @@ -186,6 +186,7 @@ files=$( git ls-files | grep -v CTest | grep -v cblas.h | grep -v "lapacke.*\.h" | + grep -v "coreblas/lapack\.h" | grep -v ".*eztrace_module$" | grep -v "simucore/perfmodels/\.starpu" | grep -v "\.xml" |