diff --git a/include/chameleon.h b/include/chameleon.h
index 12c295a7732ef73f1a1fac421bb38be6f0cdd9ea..7aeaf7d331227c35cd34288db6a95b68fc50f2fc 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -30,7 +30,6 @@
  * CHAMELEON types and constants
  */
 #include "chameleon/config.h"
-#define _GNU_SOURCE 1
 #include <stdio.h>
 #include "chameleon/constants.h"
 #include "chameleon/types.h"
diff --git a/runtime/starpu/codelets/codelet_convert.c b/runtime/starpu/codelets/codelet_convert.c
index 8a8b34fccc628069e41d23ed3eb2ad11fa6942b7..2ce6ff8e888873ccfae5e1788f92b8f16118b0f4 100644
--- a/runtime/starpu/codelets/codelet_convert.c
+++ b/runtime/starpu/codelets/codelet_convert.c
@@ -16,7 +16,7 @@
  * @date 2023-07-06
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelets.h"
 #define PRECISION_z
 #include "runtime_codelet_z.h"
diff --git a/runtime/starpu/codelets/codelet_dlag2h.c b/runtime/starpu/codelets/codelet_dlag2h.c
index f2106e3b6214d57db268a3edf4053693e3bd37b6..227d71ab21effd7ef098007d81aa75ef5d753d63 100644
--- a/runtime/starpu/codelets/codelet_dlag2h.c
+++ b/runtime/starpu/codelets/codelet_dlag2h.c
@@ -22,7 +22,7 @@
  * @precisions normal d -> d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_d.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_dlag2z.c b/runtime/starpu/codelets/codelet_dlag2z.c
index a3dc89cd5cb5f25183db641107d3a1092264d076..647b4f194b1d086712663a045e18fa17f2efe519 100644
--- a/runtime/starpu/codelets/codelet_dlag2z.c
+++ b/runtime/starpu/codelets/codelet_dlag2z.c
@@ -17,7 +17,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c
index 3c4a6b4d80594b12fca50a9f938a2a49e10c25cf..cf0a81c2ffc95611a68bfda3092a8c11cc7048fb 100644
--- a/runtime/starpu/codelets/codelet_dzasum.c
+++ b/runtime/starpu/codelets/codelet_dzasum.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_gemm.c b/runtime/starpu/codelets/codelet_gemm.c
index 64cac09f4bea08a6eae21247394ade1f7bbe0fa4..718b191a525591112d5878ba9da706b79aa96d58 100644
--- a/runtime/starpu/codelets/codelet_gemm.c
+++ b/runtime/starpu/codelets/codelet_gemm.c
@@ -16,7 +16,7 @@
  * @date 2024-03-11
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelets.h"
 #define PRECISION_z
 #include "runtime_codelet_z.h"
diff --git a/runtime/starpu/codelets/codelet_gemmex.c b/runtime/starpu/codelets/codelet_gemmex.c
index a7a76caaf5cbf003412b86e4f6ced8afbc37e084..68e8615a54c5c79909b900b4b6bd1baf14d9d78e 100644
--- a/runtime/starpu/codelets/codelet_gemmex.c
+++ b/runtime/starpu/codelets/codelet_gemmex.c
@@ -16,7 +16,7 @@
  * @date 2023-07-06
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelets.h"
 
 CHAMELEON_CL_CB( gemmex, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */
diff --git a/runtime/starpu/codelets/codelet_hgemm.c b/runtime/starpu/codelets/codelet_hgemm.c
index 325dd3aa59c3fab93ae94028ba7b0900b226e0e5..4456b974e5e06cd56cbc8cbdf460fe87a9b898de 100644
--- a/runtime/starpu/codelets/codelet_hgemm.c
+++ b/runtime/starpu/codelets/codelet_hgemm.c
@@ -16,7 +16,7 @@
  * @date 2024-03-11
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelets.h"
 
 CHAMELEON_CL_CB( hgemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */
diff --git a/runtime/starpu/codelets/codelet_ipiv.c b/runtime/starpu/codelets/codelet_ipiv.c
index e5dba252a6312d625a825485cc84d0657973f435..5290dbabfc474d270ca116f513a5b15463daab50 100644
--- a/runtime/starpu/codelets/codelet_ipiv.c
+++ b/runtime/starpu/codelets/codelet_ipiv.c
@@ -16,7 +16,7 @@
  * @date 2024-03-16
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelets.h"
 
 static void cl_ipiv_init_cpu_func(void *descr[], void *cl_arg)
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
index 2ef297b0d71e0152a5e997be70bf0880e2260046..982942cb5c48a1578d95a58eedc03d3f07780be1 100644
--- a/runtime/starpu/codelets/codelet_map.c
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -15,7 +15,7 @@
  * @date 2024-03-11
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_map_args_s {
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 3aa9e8523a395a0fe63ab22bbfdd58dfb6f038d3..df99e1e1bc4ebd183edda804c9dc60779cff5e60 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -19,7 +19,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index f96db26a88ad966fcf3d4c41d4cf6a9d021bbbdd..5d59377cad2788af7e5629b622a7aa701341fa2d 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -27,7 +27,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index a5dca09b287bad72facf4e159a56e90b5d842f58..9f10f796c9c6ef2c57d9ce9d24d05503c8e2b131 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -20,7 +20,7 @@
  *  @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if defined(PRECISION_z) || defined(PRECISION_c)
diff --git a/runtime/starpu/codelets/codelet_zccallback.c b/runtime/starpu/codelets/codelet_zccallback.c
index 534e4b047a179729299bbf031aa706322b93a33f..a1d3f893f6279d98045393bb59f5abced3126e29 100644
--- a/runtime/starpu/codelets/codelet_zccallback.c
+++ b/runtime/starpu/codelets/codelet_zccallback.c
@@ -19,7 +19,7 @@
  * @precisions mixed zc -> ds
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_zc.h"
 
 CHAMELEON_CL_CB(zlag2c,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                      M*N)
diff --git a/runtime/starpu/codelets/codelet_zcesca.c b/runtime/starpu/codelets/codelet_zcesca.c
index 25708af1c25b4673c6e127ac9c64fc0f8829f088..0549b5906ed0bccd17a0edefd2ed114aa1fff7b2 100644
--- a/runtime/starpu/codelets/codelet_zcesca.c
+++ b/runtime/starpu/codelets/codelet_zcesca.c
@@ -15,7 +15,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zcesca_args_s {
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 37c6a78790701f77ce2937cb8c61f2531af610c1..5bfaa5252e1c02ef4a8390c810fa9cc9cc885803 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 3727a8f52e9fcb789dc5deb4ea89b398b732bc81..357d73ef179a4903c4731c7a48b9050a174ca792 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 9a5dbcc326859044fbbeb7065d2e51c48e5c8f07..f1d684d1899d915ba40bee4c0e9f3e9cd91e86ac 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -28,7 +28,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgemv.c b/runtime/starpu/codelets/codelet_zgemv.c
index 35830c312217dab4f7b8ca1162f281cf4b7b0033..b951c8686ef5c952e4bbda3c863ca15a6fbef44e 100644
--- a/runtime/starpu/codelets/codelet_zgemv.c
+++ b/runtime/starpu/codelets/codelet_zgemv.c
@@ -17,7 +17,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index bb350b8d4f953da08e8e4872231cc991214d41a6..efdd162a62d25d1d3a035c2bbdb0df805a839c34 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c
index fe1c4927ef525aa24dd53a6b83f22d3c5e9959f4..d7a132200d603dca976f5c3d90ecf69cd2a33ab3 100644
--- a/runtime/starpu/codelets/codelet_zgered.c
+++ b/runtime/starpu/codelets/codelet_zgered.c
@@ -18,7 +18,7 @@
  * @precisions normal z -> d
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include <coreblas/lapacke.h>
 #include "runtime_codelet_zc.h"
 #include "runtime_codelet_z.h"
diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c
index 9a5c825f149c171dd2ad14f812d6bab7ed926546..f0fbdc1a40cffc2f88f46f8cb32807acb0cd5720 100644
--- a/runtime/starpu/codelets/codelet_zgerst.c
+++ b/runtime/starpu/codelets/codelet_zgerst.c
@@ -16,7 +16,7 @@
  * @precisions normal z -> d
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include <coreblas/lapacke.h>
 #include "runtime_codelet_zc.h"
 #include "runtime_codelet_z.h"
diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c
index dd44fb9f61150ebf4219af469b79bfd779cf8085..8e5223087f8ecc53bd2c06016521fc42a815618b 100644
--- a/runtime/starpu/codelets/codelet_zgersum.c
+++ b/runtime/starpu/codelets/codelet_zgersum.c
@@ -19,7 +19,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 4c2f1fb1b51671bce652acdd344ef11b4a2cb8ea..bca40f046c0ae633429ccd20aee1cb5ce1ec579d 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index a951ac6bc1248f74ed52bfc3c42cc3076614ecac..5973286920582f4cc16297670834ff5409494a98 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgesum.c b/runtime/starpu/codelets/codelet_zgesum.c
index a301533d1de006ab7e2cb07eb03d57df60f9b952..13a07b920bf994327983e525e06320b705794cde 100644
--- a/runtime/starpu/codelets/codelet_zgesum.c
+++ b/runtime/starpu/codelets/codelet_zgesum.c
@@ -15,7 +15,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zgesum_args_s {
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index 181f57df0a156220595ad64f7f657614e4294958..f55d866043bbc4c765318ccba6d832ad24bc4d59 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_batched.c b/runtime/starpu/codelets/codelet_zgetrf_batched.c
index d9c55d76cd3fa290ab004ebc854e3d5f4638cf93..3a96d1e9a96059554fbaafeb7b7e74472d1d967c 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_batched.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_batched.c
@@ -21,9 +21,8 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
-#include <coreblas/cblas_wrapper.h>
 
 struct cl_getrf_batched_args_t {
     const char              *cl_name;
diff --git a/runtime/starpu/codelets/codelet_zgetrf_blocked.c b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
index 8739f27deb22f8ba019fa85338c4fdcbc0a0d789..944759fa27f71a3482181be0580872d19d990dc4 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_blocked.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_blocked.c
@@ -19,9 +19,8 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
-#include <coreblas/cblas_wrapper.h>
 
 CHAMELEON_CL_CB( zgetrf_blocked_diag,    cti_handle_get_m(task->handles[0]), 0, 0, M );
 CHAMELEON_CL_CB( zgetrf_blocked_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M );
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 4ff218c4c0f8cd050cc63ac921e29d822255c594..899046a2441ddda529a173c39aee7099532019d0 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 3c0bccc80158b41ef3bbc7a2b732f0dcc478bd64..842dcfae5ea18fc3a0e9d87ee566fd19dd8b23f1 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -23,7 +23,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 /*
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
index 5643f3e638c5c6f45adb57d6c4518ec5eabd6d75..d55e50e3beebc69ef2151b35e44acd80f72e8c70 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv_percol.c
@@ -17,9 +17,8 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
-#include <coreblas/cblas_wrapper.h>
 
 CHAMELEON_CL_CB( zgetrf_nopiv_percol_diag, cti_handle_get_m(task->handles[0]), 0, 0, M );
 CHAMELEON_CL_CB( zgetrf_nopiv_percol_trsm, cti_handle_get_m(task->handles[0]), 0, 0, M );
diff --git a/runtime/starpu/codelets/codelet_zgetrf_percol.c b/runtime/starpu/codelets/codelet_zgetrf_percol.c
index 0b556f81605a9cc78faea6fa6e312ffc0e643631..df84a84260708598630f3ef00346c042e4af5f08 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_percol.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_percol.c
@@ -19,9 +19,8 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
-#include <coreblas/cblas_wrapper.h>
 
 CHAMELEON_CL_CB( zgetrf_percol_diag,    cti_handle_get_m(task->handles[0]), 0, 0, M );
 CHAMELEON_CL_CB( zgetrf_percol_offdiag, cti_handle_get_m(task->handles[0]), 0, 0, M );
diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c
index d43e7884f99e868600338a9c9f349ef8b0d97bf6..feb1af5dc295cd39c8cf2edced92a864a7f4c2a3 100644
--- a/runtime/starpu/codelets/codelet_zgram.c
+++ b/runtime/starpu/codelets/codelet_zgram.c
@@ -17,7 +17,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index 19af61a01076047ff6d00aa28a66a985c6642754..83e945da165df5158336baa2b447afa2b30b6ab7 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 7d882b0345224aff4a049e59e45b75328d9e3380..3766fdfbd40a71d9da11e3817d77a2995e2fcb64 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zhemm_args_s {
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 400f14612eedc17699f313def07115ebd6452083..974ea61221403f3a5688b773d5701b6926ef6ea7 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index f7c168edda1839004c51689971eedf7c15b615b3..f59ec51641f1c0e4893664123823f84f7f37522e 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 243d8346c148cd2af465479b36d96087911481eb..09395d98fb5f2a28260f74ed90aedcad17ccd962 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zherk_args_s {
diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c
index e256d530242f9b5a1ed0f2b0f6ac764d3403f641..1f951c8b61939f7bbd8456198c0c66ff91014d46 100644
--- a/runtime/starpu/codelets/codelet_zhessq.c
+++ b/runtime/starpu/codelets/codelet_zhessq.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zipiv_allreduce.c b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
index 13a41ceb04be76b2f89419a20bd6209d3aebd6e3..f296bb3df7433bf6eb8d8c47e468b262ee741f16 100644
--- a/runtime/starpu/codelets/codelet_zipiv_allreduce.c
+++ b/runtime/starpu/codelets/codelet_zipiv_allreduce.c
@@ -15,9 +15,8 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
-#include <coreblas/cblas_wrapper.h>
 
 #if defined(CHAMELEON_USE_MPI)
 struct cl_redux_args_t {
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 2a3d9781e676d85507d0135848b25211d04e263a..2d227b37b7167fa28f687652cdcc4b5e734b3299 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zlacpy_args_s {
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index 1a56bc748671464a452e8dbcb29aff43630ce060..d95671b6b8db065d088eb975b89053d67cd630f3 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -22,7 +22,7 @@
  * @precisions mixed zc -> ds
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_zc.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 01ac3d111d591531423bc495ba3e40e6ece46bdc..d6924c836b120eaaf80f7fe6a07f005593cb7894 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index 3a12a9befcd4c6e5fed4a6b26756e3d5081ebc76..1477286e6c00e963d00c618a83a374d8cfa18afe 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index d07dd10b7dcb480fb3e3def31c66600315553e7d..d602b34feb86b3e95755af04c91aa94854199ebf 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index cee8c0b7f7447dc95f320d2ec8bd59e629d4d9b0..565580d880bd3e8ac258c3dcde02a68612162a2f 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 1df2a126731effd96cb59047bdedcb009b4b5f3e..ab76d808ee3b0c4c8c13d452b165eff718508a03 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -21,7 +21,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zlascal_args_s {
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index 19f77a3520c70ab70e862e9da460e030c71ca8b9..e2c168968c3505e4d7ec7e2f069b7800e0f6d5d2 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -23,7 +23,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zlaset_args_s {
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index deef53db08adf0af79411f1bd7f62aa5673f1d48..29967f340cd21f9cf89ef01174b3281de273a3d6 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlaswp.c b/runtime/starpu/codelets/codelet_zlaswp.c
index 4ffa8384cdec2faab1f264571cc3757263cee138..ade365c68ff52757a11b9c8077d14ce28e7208d0 100644
--- a/runtime/starpu/codelets/codelet_zlaswp.c
+++ b/runtime/starpu/codelets/codelet_zlaswp.c
@@ -16,7 +16,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index 9ce1578774c8a5581506978eb4947b71fa2536bf..e5a2c67d9b1b18684d7a7ffd5aa8d74f6eab6398 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 34a6f3657e810520cae6cb276360a65f8c858c8c..8ed5a979ca52ca235ea7eb81f8bd0c5afcd409cf 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zlauum_args_s {
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index 82c3a6511f6cd5f7ef00470abb15992f065d87a9..b93fd808d01f79f7f301e8bb91ae2a315fdfb481 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zplghe_args_s {
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index dfb38163ce6769ecabca7dc1c6d0cf7ffe3fd8a0..36f42b355a1d5f7110cae098d23b95ba007f5aa7 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zplgsy_args_s {
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 343793a086a351b5f36533e5527579d976605ca0..5b104fd45fd36f599432e432db55a6ba084d42c3 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zplrnt_args_s {
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index 15f3bb16e0f38a19189d8f3bcab4927c74201811..ab1457e5eaa41f065f69f4553543eb044db5aa40 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -21,7 +21,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index 5c9e183b71d83663a522f27d3ccab6e3af7db5cf..929c9b75dfa172dea2eb1cb79fe25496adbcba26 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zpotrf_args_s {
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index 56b3fd54c253f04ea1037d319283ba71021841be..d7172e5254a8ba1a2e8fcabc75b2ee9fd3571ecb 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 86dd2753c38fadec0dfe9f7135f02027040c0969..b5b09b1852aebdec4d13c1d09eeacea55a39b80b 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zsymm_args_s {
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 2c2d35c2fb9a557719be7c6058f52e0fd1c14f1f..9428811f680debc4eef62616c036d57a03efcf5c 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 0bd8f348f4b2e3bb2a73bc0b69b2f65326bfacee..f0005e36529aa851df7c6a84642b68319a8b4d40 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_zsyrk_args_s {
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index 53815341b125cbb97d51c47941ffa970b0d172f9..adcbc009b683c8e3b4070f15af7d4e5aee697029 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -21,7 +21,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index 44d1bb95e8151135cb96824d9d12f137d5f12fd3..dd29deac7c4c1d126718ffe675907b08b178ca4a 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -25,7 +25,7 @@
  * @precisions normal z -> c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index 7bef4104106c32c6bdda54573a5b9ac8539c9bf8..186665b03f0657cb1157e4560b0feb7b6c82693f 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -19,7 +19,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 62b0e5d208d6dc2f18e3ce00e274d7bde7cc0852..685605a0f991cc74428d29c698122da34b3624d0 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -17,7 +17,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index e2e0349bf0f31bdef5fdd99209500de7d1a96f74..5d992f7a1c56e40ef5a809af830e9cdb678f050d 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -17,7 +17,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index df369b0b26ad82ec029094dd5401853109f8d1fa..65cd6d6d371f1fa2dd90eafc572c98ef856f7108 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index 62f1424d5d56b31fb07e8d2052ceffaa74ba9491..101bf72a38c61aa9989c39c9bb341c028ecb2323 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_ztradd_args_s {
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index c0f14b574f70f44dee0b4914074556dc795c3f0e..6c7ffd5c5c36b1cf1b0612aad43071177baf8b12 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index 13341ad0b51a5ceff2c3dcc8a5324b44d8964160..ced241e54df8bf789e8406d2ea4df4c22d5bde6f 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_ztrmm_args_s {
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index c6e92447583a3773c21ed16d0e7dfb6e6f67aef1..f20b7c5ec7b8469c4a9ac4d65b428da34134d9dc 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_ztrsm_args_s {
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index 23e1c525a031a58bce8f19cbde50c6c99e46d58c..186e34e24187ed24090b5e77ae99c3f45ce2ca30 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -21,7 +21,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index e9e000753494781624bfb1036d84b61c0c8d6a1a..5582aac9fbce8872dafd5a92fd07304596fa4c41 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -24,7 +24,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 struct cl_ztrtri_args_s {
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index 4aed2ed5a590fdfdc7c88f6e0005471e5735d4cb..1edd2d910a6b26e05ee37bbe0cb497053c7df265 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index ca9798845c33444be582a9bdaac3fed9a2da254a..1b73994231e7217abc1b1d2b153d44008eb821cb 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -22,7 +22,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index e9743232bd8a658e0f0aef9c317f3e618a8377a9..9140671e52a93786da4a22bd6013345d5f1e39be 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index dd4251dcc5d1660fb314da81a35f66ee6b155ea9..f4f2cb09910d5c620ab7c58600ecaf2e2042c62c 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -27,7 +27,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index 4640a678e21e58d3f30e7b1b4ba16bc29939fd1b..8f2b77e0596ce710cca31f210d97c7ea77e4dbfe 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -26,7 +26,7 @@
  * @precisions normal z -> c d s
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if !defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/control/runtime_async.c b/runtime/starpu/control/runtime_async.c
index ea19203fcf95effe511255ff690ccdb7810f28eb..4133aeefd3088338862add96d3343773b20fef1e 100644
--- a/runtime/starpu/control/runtime_async.c
+++ b/runtime/starpu/control/runtime_async.c
@@ -19,7 +19,7 @@
  * @date 2024-03-16
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 /**
  *  Create a sequence
@@ -93,9 +93,9 @@ int RUNTIME_request_create( CHAM_context_t    *chamctxt,
 {
     (void)chamctxt;
     /* allocate schedopt */
-    request->schedopt = (starpu_option_request_t*)malloc(sizeof(starpu_option_request_t));
+    request->schedopt = (RUNTIME_request_starpu_t*)malloc(sizeof(RUNTIME_request_starpu_t));
     /* initialize schedopt */
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt);
+    RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt);
     /* default is to not use "execute_on_a_specific_worker" i.e. -1 */
     schedopt->workerid = -1;
     request->status = CHAMELEON_SUCCESS;
@@ -124,7 +124,7 @@ int RUNTIME_request_set( CHAM_context_t  *chamctxt,
         chameleon_error("RUNTIME_request_set", "request not initialized");
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt);
+    RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt);
 
     switch ( param ) {
         case CHAMELEON_REQUEST_WORKERID:
diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c
index 56966d8cbfd34660b97b2237f59833e4ad901f25..65f7f663a90e3dc120d3a570eeaf0afdbfb966a2 100644
--- a/runtime/starpu/control/runtime_context.c
+++ b/runtime/starpu/control/runtime_context.c
@@ -21,7 +21,7 @@
  * @date 2022-02-22
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 /**
  * @brief Store the status of some flags to knwo when enable/disable them
@@ -47,7 +47,7 @@ void RUNTIME_context_create( CHAM_context_t *chamctxt )
     chamctxt->scheduler = RUNTIME_SCHED_STARPU;
 
     if ( !starpu_is_initialized() ) {
-        starpu_sched_opt_t *sched_opt = malloc( sizeof(starpu_sched_opt_t) );
+        CHAM_context_starpu_t *sched_opt = malloc( sizeof(CHAM_context_starpu_t) );
 
         sched_opt->pw_config = NULL;
         starpu_conf_init( &(sched_opt->starpu_conf) );
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index 96e1c3ff7cbad2e0c5336a2804ab5d98e08ee56d..c2cb79397c0605f5e18acdd86ffba59bd90fe5b2 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -24,7 +24,7 @@
  * @date 2024-03-16
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include <stdio.h>
 #include <stdlib.h>
 #if defined(STARPU_USE_FXT)
@@ -34,7 +34,7 @@
 static int starpu_initialized = 0;
 
 #if defined(STARPU_HAVE_HWLOC) && defined(HAVE_STARPU_PARALLEL_WORKER)
-void chameleon_starpu_parallel_worker_init( starpu_sched_opt_t *sched_opt )
+void chameleon_starpu_parallel_worker_init( CHAM_context_starpu_t *sched_opt )
 {
     char *env_pw_level = chameleon_getenv( "CHAMELEON_PARALLEL_WORKER_LEVEL" );
 
@@ -85,7 +85,7 @@ void chameleon_starpu_parallel_worker_init( starpu_sched_opt_t *sched_opt )
     chameleon_cleanenv( env_pw_level );
 }
 
-void chameleon_starpu_parallel_worker_fini( starpu_sched_opt_t *sched_opt )
+void chameleon_starpu_parallel_worker_fini( CHAM_context_starpu_t *sched_opt )
 {
     if ( sched_opt->pw_config != NULL ) {
         starpu_parallel_worker_shutdown( sched_opt->pw_config );
@@ -149,7 +149,7 @@ int RUNTIME_init( CHAM_context_t *chamctxt,
                   int ncudas,
                   int nthreads_per_worker )
 {
-    starpu_sched_opt_t *sched_opt = (starpu_sched_opt_t*)(chamctxt->schedopt);
+    CHAM_context_starpu_t *sched_opt = (CHAM_context_starpu_t*)(chamctxt->schedopt);
     struct starpu_conf *conf = &sched_opt->starpu_conf;
     int hres = CHAMELEON_ERR_NOT_INITIALIZED;
 
@@ -248,7 +248,7 @@ void RUNTIME_finalize( CHAM_context_t *chamctxt )
         return;
     }
 
-    starpu_sched_opt_t *sched_opt = (starpu_sched_opt_t*)(chamctxt->schedopt);
+    CHAM_context_starpu_t *sched_opt = (CHAM_context_starpu_t*)(chamctxt->schedopt);
     chameleon_starpu_parallel_worker_fini( sched_opt );
 
     starpu_cham_tile_interface_fini();
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index e00b75badbc9a0108f55c4b5a54b6f7160e5b11c..2cb65a53232db5665a47838ba974def4b9de122c 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -23,7 +23,7 @@
  * @date 2024-07-17
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 /**
  *  Malloc/Free of the data
diff --git a/runtime/starpu/control/runtime_descriptor_ipiv.c b/runtime/starpu/control/runtime_descriptor_ipiv.c
index 1ad0f7a142fd9272a3ffb445dd797db774959d60..e8cc1a1b3500ab1a742563bc9d3ad86a00929e12 100644
--- a/runtime/starpu/control/runtime_descriptor_ipiv.c
+++ b/runtime/starpu/control/runtime_descriptor_ipiv.c
@@ -17,7 +17,7 @@
  * @date 2024-03-16
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 /**
  *  Create ws_pivot runtime structures
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 444975b497849f9d9f1a4052ecfe5a0323d29198..8ec2551f59d59e94c0eb763196576911db49d0cc 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -19,12 +19,12 @@
  * @date 2023-07-04
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt,
                            RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(request->schedopt);
+    RUNTIME_request_starpu_t* schedopt = (RUNTIME_request_starpu_t *)(request->schedopt);
     options->sequence  = sequence;
     options->request   = request;
     options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE;
diff --git a/runtime/starpu/control/runtime_profiling.c b/runtime/starpu/control/runtime_profiling.c
index 9ed7566df9d7f87281cf007285dcf7361248fb9d..1baed8e3d0390a7068fd6ad37d5a8bff0880f721 100644
--- a/runtime/starpu/control/runtime_profiling.c
+++ b/runtime/starpu/control/runtime_profiling.c
@@ -20,7 +20,7 @@
  * @date 2022-02-22
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include <math.h>
 #if defined(STARPU_USE_FXT)
 #include <starpu_fxt.h>
diff --git a/runtime/starpu/control/runtime_tags.c b/runtime/starpu/control/runtime_tags.c
index 031a556641fd4a59b57f1dd78a548d84e0787628..cc05a73afcc3b07e24d63c77d4ecc88421292490 100644
--- a/runtime/starpu/control/runtime_tags.c
+++ b/runtime/starpu/control/runtime_tags.c
@@ -17,7 +17,7 @@
  * @{
  *
  **/
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 #if defined(CHAMELEON_USE_MPI)
 
diff --git a/runtime/starpu/control/runtime_workspace.c b/runtime/starpu/control/runtime_workspace.c
index 5625bb876383a9b5f38d227ff9b39e99fe6f47a3..82bed164954cff9152d220298c0c4b163c795a09 100644
--- a/runtime/starpu/control/runtime_workspace.c
+++ b/runtime/starpu/control/runtime_workspace.c
@@ -19,7 +19,7 @@
  * @date 2023-01-30
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 
 #if defined(CHAMELEON_USE_CUDA)
 #define GPU_WORKER_TYPE STARPU_CUDA_WORKER
diff --git a/runtime/starpu/control/runtime_zlocality.c b/runtime/starpu/control/runtime_zlocality.c
index f0c9e375d26133a41e0c28e7df3f05587d338649..e906a874bd0d5aab61ed8e5c1a9b3cad84cbf3cd 100644
--- a/runtime/starpu/control/runtime_zlocality.c
+++ b/runtime/starpu/control/runtime_zlocality.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 #if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
diff --git a/runtime/starpu/control/runtime_zprofiling.c b/runtime/starpu/control/runtime_zprofiling.c
index f771cce8a7f32816f4a07182a0501ca909de6c82..10ad6a2cf1debc2149b61c654277947f6cb674c4 100644
--- a/runtime/starpu/control/runtime_zprofiling.c
+++ b/runtime/starpu/control/runtime_zprofiling.c
@@ -20,7 +20,7 @@
  * @precisions normal z -> s d c
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #include "runtime_codelet_z.h"
 
 void RUNTIME_zdisplay_allprofile()
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 6c4632da84520449a2e2c9f96fedef2209d196e9..44d03baa53c167871669b744dd6dac105b1810b1 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -26,7 +26,7 @@
 #ifndef _chameleon_starpu_h_
 #define _chameleon_starpu_h_
 
-#include "control/common.h"
+#include <chameleon.h>
 
 /* StarPU options */
 #cmakedefine HAVE_STARPU_IDLE_PREFETCH
@@ -93,173 +93,4 @@
 # endif
 #endif
 
-#include "control/common.h"
-#include "runtime_codelets.h"
-#include "runtime_profiling.h"
-#include "runtime_codelet_profile.h"
-#include "runtime_workspace.h"
-#include "cham_tile_interface.h"
-#include "cppi_interface.h"
-
-typedef struct starpu_schedopt_s
-{
-    struct starpu_conf                    starpu_conf; /**< StarPU main configuration structure   */
-    struct starpu_parallel_worker_config *pw_config;   /**< StarPU parallel workers configuration */
-} starpu_sched_opt_t;
-
-/* Structure used to give some options during one request (procedure) */
-typedef struct starpu_option_request_s {
-    int workerid; // to force task execution on a specific workerid
-} starpu_option_request_t;
-
-/**/
-
-static inline int cham_to_starpu_access( cham_access_t accessA ) {
-    assert( (enum starpu_data_access_mode)ChamR  == STARPU_R  );
-    assert( (enum starpu_data_access_mode)ChamW  == STARPU_W  );
-    assert( (enum starpu_data_access_mode)ChamRW == STARPU_RW );
-    return accessA;
-}
-
-void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options,
-                                           cham_access_t access, cham_flttype_t flttype,
-                                           const CHAM_desc_t *A, int m, int n );
-
-/*
- * MPI Redefinitions
- */
-#if defined(CHAMELEON_USE_MPI)
-
-#if defined(CHAMELEON_RUNTIME_SYNC)
-#define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
-#else
-#define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ )
-#endif
-
-#else
-
-#if defined(CHAMELEON_RUNTIME_SYNC)
-#define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
-#else
-#define rt_starpu_insert_task( _codelet_, ... )                         \
-    starpu_insert_task( (_codelet_), ##__VA_ARGS__ )
-#endif
-
-#endif
-
-#if defined(CHAMELEON_RUNTIME_SYNC)
-#define rt_shm_starpu_insert_task( _codelet_, ... )                         \
-    starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
-#else
-#define rt_shm_starpu_insert_task( _codelet_, ... )                         \
-    starpu_insert_task( (_codelet_), ##__VA_ARGS__ )
-#endif
-
-/*
- * Enable codelets names
- */
-#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1))
-#define CHAMELEON_CODELETS_HAVE_NAME
-#endif
-
-/**
- * MPI tag management
- */
-void    chameleon_starpu_tag_init( );
-int64_t chameleon_starpu_tag_book( int64_t nbtags );
-void    chameleon_starpu_tag_release( int64_t min );
-
-/**
- * Access to block pointer and leading dimension
- */
-#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_data_getaddr( desc, m, n ) )
-
-void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp);
-
-#include "runtime_mpi.h"
-#include "runtime_wontuse.h"
-
-#if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
-static inline int
-chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n)
-{
-    int64_t mm = m + (A->i / A->mb);
-    int64_t nn = n + (A->j / A->nb);
-
-    starpu_data_handle_t *ptrtile = A->schedopt;
-    ptrtile += ((int64_t)A->lmt) * nn + mm;
-
-    if (!(*ptrtile)) {
-        return 0;
-    }
-
-    return starpu_mpi_cached_receive(*ptrtile);
-}
-
-#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {                 \
-        if (chameleon_starpu_data_iscached(A, Am, An)) __chameleon_need_submit = 1; } while(0)
-
-#else
-
-#if defined(CHAMELEON_USE_MPI)
-#warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater"
-#endif
-#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {} while (0)
-
-#endif
-
-#ifdef CHAMELEON_ENABLE_PRUNING_STATS
-
-#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \
-    int __chameleon_exec = 0; \
-    int __chameleon_changed = 0;
-
-#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \
-    if (chameleon_desc_islocal(A, Am, An)) \
-        __chameleon_exec = 1;
-
-#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \
-    RUNTIME_total_tasks++; \
-    if (__chameleon_exec) \
-        RUNTIME_exec_tasks++; \
-    else if (__chameleon_need_submit) \
-        RUNTIME_comm_tasks++; \
-    else if (__chameleon_changed) \
-        RUNTIME_changed_tasks++;
-
-#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \
-    int __chameleon_myrank; \
-    RUNTIME_comm_rank(&__chameleon_myrank); \
-    __chameleon_exec = (rank) == __chameleon_myrank; \
-    __chameleon_changed = 1; \
-
-#else
-#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
-#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
-#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
-#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
-#endif
-
-#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
-    RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
-
-#define RUNTIME_ACCESS_R(A, Am, An)
-
-#define RUNTIME_ACCESS_W(A, Am, An)             \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
-    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
-
-#define RUNTIME_ACCESS_RW(A, Am, An)            \
-    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
-    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
-
-#define RUNTIME_RANK_CHANGED(rank)              \
-    RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
-
-#define RUNTIME_END_ACCESS_DECLARATION          \
-    RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
-
 #endif /* _chameleon_starpu_h_ */
diff --git a/runtime/starpu/include/chameleon_starpu_internal.h b/runtime/starpu/include/chameleon_starpu_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ffa39bb2940f683cc75e2402753ae37abcde79b
--- /dev/null
+++ b/runtime/starpu/include/chameleon_starpu_internal.h
@@ -0,0 +1,208 @@
+/**
+ *
+ * @file starpu/chameleon_starpu_internal.h
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon StarPU runtime header
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Cedric Castagnede
+ * @author Florent Pruvost
+ * @author Philippe Swartvagher
+ * @author Samuel Thibault
+ * @author Loris Lucido
+ * @author Terry Cojean
+ * @author Matthieu Kuhn
+ * @date 2024-03-16
+ *
+ */
+#ifndef _chameleon_starpu_internal_h_
+#define _chameleon_starpu_internal_h_
+
+#include "control/common.h"
+#include "chameleon_starpu.h"
+
+/* Chameleon interfaces for StarPU */
+#include "cham_tile_interface.h"
+#include "cppi_interface.h"
+
+/**
+ * @brief StarPU specific Chameleon structure stored in the schedopt field
+ */
+typedef struct CHAM_context_starpu_s
+{
+    struct starpu_conf                    starpu_conf; /**< StarPU main configuration structure   */
+    struct starpu_parallel_worker_config *pw_config;   /**< StarPU parallel workers configuration */
+} CHAM_context_starpu_t;
+
+/**
+ * @brief StarPU specific request field stored in the schedopt field
+ */
+typedef struct RUNTIME_request_starpu_s {
+    int workerid; // to force task execution on a specific workerid
+} RUNTIME_request_starpu_t;
+
+/**
+ * @brief Convert the Chameleon access enum to the StarPU one
+ */
+static inline int cham_to_starpu_access( cham_access_t accessA ) {
+    assert( (enum starpu_data_access_mode)ChamR  == STARPU_R  );
+    assert( (enum starpu_data_access_mode)ChamW  == STARPU_W  );
+    assert( (enum starpu_data_access_mode)ChamRW == STARPU_RW );
+    return accessA;
+}
+
+#include "runtime_codelets.h"
+#include "runtime_profiling.h"
+#include "runtime_codelet_profile.h"
+#include "runtime_workspace.h"
+
+void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options,
+                                           cham_access_t access, cham_flttype_t flttype,
+                                           const CHAM_desc_t *A, int m, int n );
+
+/*
+ * MPI Redefinitions
+ */
+#if defined(CHAMELEON_USE_MPI)
+
+#if defined(CHAMELEON_RUNTIME_SYNC)
+#define rt_starpu_insert_task( _codelet_, ... )                         \
+    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
+#else
+#define rt_starpu_insert_task( _codelet_, ... )                         \
+    starpu_mpi_insert_task( options->sequence->comm, (_codelet_), ##__VA_ARGS__ )
+#endif
+
+#else
+
+#if defined(CHAMELEON_RUNTIME_SYNC)
+#define rt_starpu_insert_task( _codelet_, ... )                         \
+    starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
+#else
+#define rt_starpu_insert_task( _codelet_, ... )                         \
+    starpu_insert_task( (_codelet_), ##__VA_ARGS__ )
+#endif
+
+#endif
+
+#if defined(CHAMELEON_RUNTIME_SYNC)
+#define rt_shm_starpu_insert_task( _codelet_, ... )                         \
+    starpu_insert_task( (_codelet_), STARPU_TASK_SYNCHRONOUS, 1, ##__VA_ARGS__ )
+#else
+#define rt_shm_starpu_insert_task( _codelet_, ... )                         \
+    starpu_insert_task( (_codelet_), ##__VA_ARGS__ )
+#endif
+
+/*
+ * Enable codelets names
+ */
+#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1))
+#define CHAMELEON_CODELETS_HAVE_NAME
+#endif
+
+/**
+ * MPI tag management
+ */
+void    chameleon_starpu_tag_init( );
+int64_t chameleon_starpu_tag_book( int64_t nbtags );
+void    chameleon_starpu_tag_release( int64_t min );
+
+/**
+ * Access to block pointer and leading dimension
+ */
+#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_data_getaddr( desc, m, n ) )
+
+void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, cham_flttype_t dtyp);
+
+#include "runtime_mpi.h"
+#include "runtime_wontuse.h"
+
+#if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
+static inline int
+chameleon_starpu_data_iscached(const CHAM_desc_t *A, int m, int n)
+{
+    int64_t mm = m + (A->i / A->mb);
+    int64_t nn = n + (A->j / A->nb);
+
+    starpu_data_handle_t *ptrtile = A->schedopt;
+    ptrtile += ((int64_t)A->lmt) * nn + mm;
+
+    if (!(*ptrtile)) {
+        return 0;
+    }
+
+    return starpu_mpi_cached_receive(*ptrtile);
+}
+
+#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {                 \
+        if (chameleon_starpu_data_iscached(A, Am, An)) __chameleon_need_submit = 1; } while(0)
+
+#else
+
+#if defined(CHAMELEON_USE_MPI)
+#warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater"
+#endif
+#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {} while (0)
+
+#endif
+
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+
+#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \
+    int __chameleon_exec = 0; \
+    int __chameleon_changed = 0;
+
+#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \
+    if (chameleon_desc_islocal(A, Am, An)) \
+        __chameleon_exec = 1;
+
+#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \
+    RUNTIME_total_tasks++; \
+    if (__chameleon_exec) \
+        RUNTIME_exec_tasks++; \
+    else if (__chameleon_need_submit) \
+        RUNTIME_comm_tasks++; \
+    else if (__chameleon_changed) \
+        RUNTIME_changed_tasks++;
+
+#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \
+    int __chameleon_myrank; \
+    RUNTIME_comm_rank(&__chameleon_myrank); \
+    __chameleon_exec = (rank) == __chameleon_myrank; \
+    __chameleon_changed = 1; \
+
+#else
+#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
+#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
+#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
+#endif
+
+#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
+    RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
+
+#define RUNTIME_ACCESS_R(A, Am, An)
+
+#define RUNTIME_ACCESS_W(A, Am, An)             \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
+
+#define RUNTIME_ACCESS_RW(A, Am, An)            \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
+    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
+
+#define RUNTIME_RANK_CHANGED(rank)              \
+    RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
+
+#define RUNTIME_END_ACCESS_DECLARATION          \
+    RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
+
+#endif /* _chameleon_starpu_internal_h_ */
diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c
index ff25fde78dd7a49816f13813b1994920f822113e..c1bca1ec918d4b7ea3a4d61eba0c89062734ba83 100644
--- a/runtime/starpu/interface/cham_tile_interface.c
+++ b/runtime/starpu/interface/cham_tile_interface.c
@@ -18,7 +18,7 @@
  * @date 2024-07-17
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #if defined(CHAMELEON_USE_HMATOSS)
 #include "coreblas/hmat.h"
 
diff --git a/runtime/starpu/interface/cppi_interface.c b/runtime/starpu/interface/cppi_interface.c
index 6b1f8063180e78dbebf1ea443ee28f98920a7723..862ea7545b4bdb629047628c49e23014ab362a50 100644
--- a/runtime/starpu/interface/cppi_interface.c
+++ b/runtime/starpu/interface/cppi_interface.c
@@ -16,7 +16,7 @@
  * @date 2023-08-22
  *
  */
-#include "chameleon_starpu.h"
+#include "chameleon_starpu_internal.h"
 #undef HAVE_STARPU_REUSE_DATA_ON_NODE
 
 CHAM_pivot_t *
diff --git a/tools/check_header.sh b/tools/check_header.sh
index 2348bee3d7e780c075572cfb47e71813d9fa6de2..acae8a8476fdb528a8689f789666b5ecf7bac669 100755
--- a/tools/check_header.sh
+++ b/tools/check_header.sh
@@ -186,6 +186,7 @@ files=$( git ls-files                     |
              grep -v CTest                |
              grep -v cblas.h              |
              grep -v "lapacke.*\.h"       |
+             grep -v "coreblas/lapack\.h"       |
              grep -v ".*eztrace_module$"  |
              grep -v "simucore/perfmodels/\.starpu" |
              grep -v "\.xml"              |