diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 0752216668610bf1d0987c771e821fe372e01324..f4a1e230a900c6c2d471685dd0b26e779a0f4104 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -352,11 +352,14 @@ endif()
 set(LIBOMP_USE_THEQUEUE FALSE CACHE BOOL
   "libOMP based on T.H.E. work stealing protocol.")
 
+set(LIBOMP_USE_LINKED_DEQUEUE FALSE CACHE BOOL
+  "libOMP based on double linked queue.")
+
 set(LIBOMP_USE_THE_AGGREGATION FALSE CACHE BOOL
   "libOMP based on T.H.E. work stealing protocol extended by requests' aggregation.")
 
-if(LIBOMP_USE_THE_AGGREGATION AND (NOT LIBOMP_USE_THEQUEUE))
-  set(LIBOMP_USE_THEQUEUE TRUE)
+if(LIBOMP_USE_THE_AGGREGATION AND (NOT LIBOMP_USE_THEQUEUE AND NOT LIBOMP_USE_LINKED_DEQUEUE))
+  libomp_error_say("OpenMP aggregation protocol requires either LIBOMP_USE_THEQUEUE or LIBOMP_USE_LINKED_DEQUEUE but they are not defined")
 endif()
 
 set(LIBOMP_USE_CONCURRENT_WRITE FALSE CACHE BOOL
@@ -379,13 +382,13 @@ else()
 endif()
 
 set(LIBOMP_USE_EXTSCHED_MEM FALSE CACHE BOOL
-  "libOMP experimental memory scheduling.")
+  "libOMP experimental memory scheduling - experimental feature.")
 
 if(LIBOMP_USE_EXTSCHED_MEM AND (NOT LIBOMP_USE_NUMA))
   libomp_error_say("OpenMP memory scheduling extension requires LIBOMP_USE_NUMA but it is not available")
 endif()
-if(LIBOMP_USE_EXTSCHED_MEM AND (NOT LIBOMP_USE_THEQUEUE))
-  libomp_error_say("OpenMP memory scheduling extension requires LIBOMP_USE_THEQUEUE but it is not available")
+if(LIBOMP_USE_EXTSCHED_MEM AND (NOT LIBOMP_USE_THEQUEUE AND NOT LIBOMP_USE_LINKED_DEQUEUE))
+  libomp_error_say("OpenMP memory scheduling extension requires either LIBOMP_USE_THEQUEUE or LIBOMP_USE_LINKED_DEQUEUE but thery are not defined")
 endif()
 
 if(LIBOMP_USE_EXTSCHED_MEM)
@@ -394,6 +397,19 @@ else()
   set(OMP_EXTENSION_SCHED_MEMORY 0)
 endif()
 
+set(LIBOMP_USE_REORDER4LOCALITY FALSE CACHE BOOL
+  "libOMP reorder tasks' list for better locality - experimental feature.")
+
+if(LIBOMP_USE_REORDER4LOCALITY AND (NOT LIBOMP_USE_LINKED_DEQUEUE))
+  libomp_error_say("OpenMP locality scheduling extension requires LIBOMP_USE_LINKED_DEQUEUE but it is not defined")
+endif()
+
+if(LIBOMP_USE_REORDER4LOCALITY)
+  set(OMP_EXTENSION_SCHED_LOCALITY 1<<6)
+else()
+  set(OMP_EXTENSION_SCHED_LOCALITY 0)
+endif()
+
 set(LIBOMP_USE_PAPI FALSE CACHE BOOL
   "libOMP tracing based on PAPI")
 
@@ -469,8 +485,10 @@ if(${LIBOMP_STANDALONE_BUILD})
     libomp_say("Use OMPT-trace       -- ${LIBOMP_OMPT_TRACE}")
   endif()
   libomp_say("Use T.H.E. protocol  -- ${LIBOMP_USE_THEQUEUE}")
+  libomp_say("Use linked dqueue    -- ${LIBOMP_USE_LINKED_DEQUEUE}")
   libomp_say("Use request combining-- ${LIBOMP_USE_THE_AGGREGATION}")
   libomp_say("Use memory scheduler -- ${LIBOMP_USE_EXTSCHED_MEM}")
+  libomp_say("Use locality scheduler-- ${LIBOMP_USE_REORDER4LOCALITY}")
   libomp_say("Use concurrent write -- ${LIBOMP_USE_CONCURRENT_WRITE}")
   libomp_say("Use dyn. hashmap.    -- ${LIBOMP_USE_DYNHASH}")
   libomp_say("Use var. length dep. -- ${LIBOMP_USE_VARDEP}")
diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
index c2027c9d73e38a3c43418ca1a4223a3c4c90ad39..705568ec8fcfe13cb7b6b4a54d9db2ff7f90167d 100644
--- a/runtime/src/CMakeLists.txt
+++ b/runtime/src/CMakeLists.txt
@@ -90,6 +90,7 @@ else()
     kmp_wait_release.cpp
     kmp_affinity.cpp
     kmp_hws.cpp
+    kmp_taskreschedule.cpp
     kmp_queues.cpp
     kmp_dispatch.cpp
     kmp_lock.cpp
@@ -114,7 +115,7 @@ else()
     libomp_append(LIBOMP_CXXFILES kmp_taskdeps.cpp)
     libomp_append(LIBOMP_CXXFILES kmp_cancel.cpp)
   endif()
-  if (${LIBOMP_USE_THEQUEUE})
+  if (${LIBOMP_USE_THEQUEUE} OR ${LIBOMP_USE_LINKED_DEQUEUE})
     libomp_append(LIBOMP_CFILES kaapi_sched_ccsync.c)
     libomp_append(LIBOMP_CFILES kaapi_rt.c)
   endif()
diff --git a/runtime/src/dllexports b/runtime/src/dllexports
index 26452e71f18611e82dd6180ba6f45df6ec2c9428..31cb5bb1c22200d832ceacb1103c2478e1c387fe 100644
--- a/runtime/src/dllexports
+++ b/runtime/src/dllexports
@@ -302,6 +302,8 @@
     __kmpc_omp_info_free_memory             608
     __kmpc_omp_set_memory_limit             609
     __kmpc_omp_set_task_attr                610
+    __kmpc_omp_begin_sched_graph            611
+    __kmpc_omp_end_sched_graph              612
     __kmpc_omp_taskwait                     193
     __kmpc_omp_task_begin_if0               196
     __kmpc_omp_task_complete_if0            197
@@ -519,6 +521,8 @@ kmp_set_warnings_off                        780
     omp_task_declare_dependencies_array         4005
     omp_task_declare_dependencies_array_noalias 4006
     omp_set_task_attr                       4007
+    omp_begin_sched_graph                   4008
+    omp_end_sched_graph                     4009
 
     omp_get_num_teams                       865
     omp_get_team_num                        866
diff --git a/runtime/src/include/40/omp.h.var b/runtime/src/include/40/omp.h.var
index 451e6c52c4c24cdd9de948077495dbeefce4e2e4..a8257367bc14178af8e5aeed910f49c6155b2e0c 100644
--- a/runtime/src/include/40/omp.h.var
+++ b/runtime/src/include/40/omp.h.var
@@ -26,12 +26,14 @@
 #   define OMP_EXTENSION_CONCURRENT_WRITE  (@OMP_EXTENSION_CW@)
 #   define OMP_VARLEN_DEPENDENCIES         (@OMP_VARLEN_DEPENDENCIES@)
 #   define OMP_EXTENSION_SCHED_MEMORY      (@OMP_EXTENSION_SCHED_MEMORY@)
+#   define OMP_EXTENSION_SCHED_LOCALITY    (@OMP_EXTENSION_SCHED_LOCALITY@)
 
 #   define KMP_EXTENSION   ( OMP_EXTENSION_AFFINITY\
                              |OMP_EXTENSION_TASKNAME\
                              |OMP_EXTENSION_CONCURRENT_WRITE\
                              |OMP_VARLEN_DEPENDENCIES\
                              |OMP_EXTENSION_SCHED_MEMORY\
+                             |OMP_EXTENSION_SCHED_LOCALITY\
                              )
 #   define OMP_EXTENSION   KMP_EXTENSION
 
@@ -120,6 +122,10 @@
     extern void __KAI_KMPC_CONVENTION  omp_set_task_name (char *);
     extern void __KAI_KMPC_CONVENTION  omp_set_task_attr (char, long int);
 #endif
+#if OMP_EXTENSION_SCHED_LOCALITY
+    extern void* __KAI_KMPC_CONVENTION  omp_begin_sched_graph (int);
+    extern void __KAI_KMPC_CONVENTION  omp_end_sched_graph (void*, int);
+#endif
 #if OMP_EXTENSION_CONCURRENT_WRITE
     typedef enum omp_depend_info_t {
       omp_depend_info_none      = 0,
diff --git a/runtime/src/include/45/omp.h.var b/runtime/src/include/45/omp.h.var
index 0467d06a1366f864d3141601b2f88ef928c4eed2..8448821d9038fbae81d7557c74e3b657c4368483 100644
--- a/runtime/src/include/45/omp.h.var
+++ b/runtime/src/include/45/omp.h.var
@@ -26,12 +26,14 @@
 #   define OMP_EXTENSION_CONCURRENT_WRITE  (@OMP_EXTENSION_CW@)
 #   define OMP_VARLEN_DEPENDENCIES         (@OMP_VARLEN_DEPENDENCIES@)
 #   define OMP_EXTENSION_SCHED_MEMORY      (@OMP_EXTENSION_SCHED_MEMORY@)
+#   define OMP_EXTENSION_SCHED_LOCALITY    (@OMP_EXTENSION_SCHED_LOCALITY@)
 
 #   define KMP_EXTENSION   ( OMP_EXTENSION_AFFINITY\
                              |OMP_EXTENSION_TASKNAME\
                              |OMP_EXTENSION_CONCURRENT_WRITE\
                              |OMP_VARLEN_DEPENDENCIES\
                              |OMP_EXTENSION_SCHED_MEMORY\
+                             |OMP_EXTENSION_SCHED_LOCALITY\
                              )
 #   define OMP_EXTENSION   KMP_EXTENSION
 
@@ -138,6 +140,10 @@
     extern void __KAI_KMPC_CONVENTION  omp_set_task_name (char *);
     extern void __KAI_KMPC_CONVENTION  omp_set_task_attr (char, long int);
 #endif
+#if OMP_EXTENSION_SCHED_LOCALITY
+    extern void* __KAI_KMPC_CONVENTION  omp_begin_sched_graph (int);
+    extern void __KAI_KMPC_CONVENTION  omp_end_sched_graph (void*, int);
+#endif
 #if OMP_EXTENSION_CONCURRENT_WRITE
     typedef enum omp_depend_info_t {
       omp_depend_info_none      = 0,
diff --git a/runtime/src/include/50/omp.h.var b/runtime/src/include/50/omp.h.var
index 727a088c686a3ae1b3ff21a375d524f646f96665..9d9f0bb02a13a5a86654af8f864524e8269cc0e1 100644
--- a/runtime/src/include/50/omp.h.var
+++ b/runtime/src/include/50/omp.h.var
@@ -26,12 +26,14 @@
 #   define OMP_EXTENSION_CONCURRENT_WRITE  (@OMP_EXTENSION_CW@)
 #   define OMP_VARLEN_DEPENDENCIES         (@OMP_VARLEN_DEPENDENCIES@)
 #   define OMP_EXTENSION_SCHED_MEMORY      (@OMP_EXTENSION_SCHED_MEMORY@)
+#   define OMP_EXTENSION_SCHED_LOCALITY    (@OMP_EXTENSION_SCHED_LOCALITY@)
 
 #   define KMP_EXTENSION 	( OMP_EXTENSION_AFFINITY\
                              |OMP_EXTENSION_TASKNAME\
                              |OMP_EXTENSION_CONCURRENT_WRITE\
                              |OMP_VARLEN_DEPENDENCIES\
                              |OMP_EXTENSION_SCHED_MEMORY\
+                             |OMP_EXTENSION_SCHED_LOCALITY\
                              )
 #   define OMP_EXTENSION 	KMP_EXTENSION
 
@@ -138,6 +140,10 @@
     extern void __KAI_KMPC_CONVENTION  omp_set_task_name (char *);
     extern void __KAI_KMPC_CONVENTION  omp_set_task_attr (char, long int);
 #endif
+#if OMP_EXTENSION_SCHED_LOCALITY
+    extern void* __KAI_KMPC_CONVENTION  omp_begin_sched_graph (int);
+    extern void __KAI_KMPC_CONVENTION  omp_end_sched_graph (void*, int);
+#endif
 #if OMP_EXTENSION_CONCURRENT_WRITE
     typedef enum omp_depend_info_t {
       omp_depend_info_none      = 0,
diff --git a/runtime/src/kaapi_sched_ccsync.c b/runtime/src/kaapi_sched_ccsync.c
index 8024ef666f546fcb13871cb4dacece1014ab25e0..5790537d90d8666d832a74709bd4753af6806ca3 100644
--- a/runtime/src/kaapi_sched_ccsync.c
+++ b/runtime/src/kaapi_sched_ccsync.c
@@ -54,11 +54,6 @@
 #endif
 
 
-/* FWD
-*/
-static inline int __kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task );
-static inline kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue );
-static inline kaapi_task_t* __kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue );
 
 
 /* ============================= REQUEST ============================ */
@@ -156,6 +151,7 @@ typedef struct kaapi_push_request_t {
 } kaapi_push_request_t;
 
 
+#if LIBOMP_USE_LINKED_DEQUEUE
 /** \ingroup WS
     Arg for push request
 */
@@ -165,10 +161,10 @@ typedef struct kaapi_pushlist_request_t {
   int                           ident;          /* system wide id who is emetting the request */
   int                           mask_arch;      /* accepted arch */
   int                           status;         /* request status */
-  kaapi_task_t**                list;           /* to push */
+  kaapi_wsqueue_t*              list;           /* linked list to push */
   int                           size;           /* size of list */
 } kaapi_pushlist_request_t;
-
+#endif
 
 /** \ingroup WS
     Request emitted to get work.
@@ -180,7 +176,9 @@ typedef union kaapi_request_t {
     kaapi_steal_request_t    steal_a;
     kaapi_pop_request_t      pop_a;
     kaapi_push_request_t     push_a;
+#if LIBOMP_USE_LINKED_DEQUEUE
     kaapi_pushlist_request_t push_l;
+#endif
 } kaapi_request_t;
 
 
@@ -387,6 +385,15 @@ int kaapi_listrequest_ccsync_iterator_count(
 /* no concurrency here: always called before starting threads */
 int kaapi_wsqueue_init(kaapi_wsqueue_t* queue, size_t size, int numa_node)
 {
+#if LIBOMP_USE_LINKED_DEQUEUE
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_init_bootstrap_lock( &queue->deque_lock );
+#endif
+  queue->deque_H = 0;
+  queue->deque_T = 0;
+  queue->deque_size = 0;
+  queue->numa_node = numa_node;
+#else // LIBOMP_USE_LINKED_DEQUEUE
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_init_bootstrap_lock( &queue->deque_lock_owner );
   __kmp_init_bootstrap_lock( &queue->deque_lock );
@@ -404,6 +411,7 @@ int kaapi_wsqueue_init(kaapi_wsqueue_t* queue, size_t size, int numa_node)
     __kmp_allocate( size * sizeof(kaapi_task_t *));
 #endif
   queue->deque_size = (uint32_t)size;
+#endif // LIBOMP_USE_LINKED_DEQUEUE
 
 #if LIBOMP_USE_THE_AGGREGATION
   /* first non blocked node ! */
@@ -425,6 +433,11 @@ int kaapi_wsqueue_init(kaapi_wsqueue_t* queue, size_t size, int numa_node)
 /* no concurrency here: always called before starting threads */
 int kaapi_wsqueue_realloc(kaapi_wsqueue_t* queue )
 {
+#if LIBOMP_USE_LINKED_DEQUEUE
+    // never realloc queue
+    return 0;
+#else // #if LIBOMP_USE_LINKED_DEQUEUE
+
     kmp_int32 size = queue->deque_size;
     kmp_int32 new_size = 2 * size;
     kaapi_task_t ** new_deque;
@@ -455,6 +468,7 @@ int kaapi_wsqueue_realloc(kaapi_wsqueue_t* queue )
 #endif
     queue->deque = new_deque;
     queue->deque_size = new_size;
+#endif
     return 0;
 }
 
@@ -462,6 +476,9 @@ int kaapi_wsqueue_realloc(kaapi_wsqueue_t* queue )
 /* */
 static int kaapi_wsqueue_realloc_push_remote(kaapi_wsqueue_t* queue )
 {
+#if LIBOMP_USE_LINKED_DEQUEUE
+    return 0;
+#else // #if LIBOMP_USE_LINKED_DEQUEUE
     kmp_int32 size = queue->deque_size;
     kmp_int32 new_size = 2 * size;
     kaapi_task_t ** new_deque;
@@ -488,23 +505,30 @@ static int kaapi_wsqueue_realloc_push_remote(kaapi_wsqueue_t* queue )
     queue->deque_T += shift;
     queue->deque = new_deque;
     queue->deque_size = new_size;
+#endif
     return 0;
 }
 
 
-/* no concurrency here: always called before starting threads 
+/*
 */
 int kaapi_wsqueue_fini(kaapi_wsqueue_t* queue)
 {
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_acquire_bootstrap_lock( &queue->deque_lock );
 #endif
+  KMP_DEBUG_ASSERT(queue->deque_H == 0);
+  KMP_DEBUG_ASSERT(queue->deque_H == 0);
+
+#if LIBOMP_USE_LINKED_DEQUEUE
+#else // LIBOMP_USE_LINKED_DEQUEUE
 #if LIBOMP_USE_AFFINITY
   numa_free((void*)queue->deque, queue->deque_size );
 #else
   if (queue->deque) __kmp_free((void*)queue->deque);
 #endif
   queue->deque = 0;
+#endif
 #if LIBOMP_USE_THE_AGGREGATION
   if (queue->tail)
   {
@@ -515,8 +539,7 @@ int kaapi_wsqueue_fini(kaapi_wsqueue_t* queue)
 #endif
     queue->tail = 0;
   }
-#endif
-#if !LIBOMP_USE_THE_AGGREGATION
+#else // !LIBOMP_USE_THE_AGGREGATION
   __kmp_release_bootstrap_lock( &queue->deque_lock );
 #endif
   return 0;
@@ -643,10 +666,10 @@ int kaapi_sched_ccsync_commit_request( kaapi_wsqueue_t* queue, kaapi_request_t*
         tmpNode->wait = 0;
       } break;
 
-#if 0
+#if LIBOMP_USE_LINKED_DEQUEUE
       case KAAPI_REQUEST_OP_PUSHLIST:
       {
-        if (0 == ld->vtable->fs_pushlist(ld, req->push_l.list))
+        if (0 == __kaapi_wsqueue_push_tasklist(queue, req->push_l.list))
           req->header.status = KAAPI_REQUEST_S_OK;
         else
           req->header.status = KAAPI_REQUEST_S_NOK;
@@ -840,10 +863,38 @@ int kaapi_sched_ccsync_pgo_fini( kaapi_place_group_operation_t* kpgo )
 
 /* Assume 1 owner and multiple thieves
 */
-static inline int __kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task )
+int __kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task )
 {
   int err;
   KMP_DEBUG_ASSERT( task != 0);
+#if LIBOMP_USE_LINKED_DEQUEUE
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_acquire_bootstrap_lock( & queue->deque_lock );
+#endif
+  /* push to head */
+  task->prev = 0;
+  task->next = queue->deque_H;
+  if (task->next ==0)
+    queue->deque_T = task;
+  else
+    queue->deque_H->prev = task;
+  queue->deque_H = task;
+#if 0
+  /* push to tail */
+  task->next = 0
+  task->prev = queue->deque_T;
+  if (task->prev ==0)
+    queue->deque_H = task;
+  else
+    queue->deque_T->next = task;
+  queue->deque_T = task;
+#endif
+  ++queue->deque_size;
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_release_bootstrap_lock( & queue->deque_lock );
+#endif
+  return 0;
+#else // LIBOMP_USE_LINKED_DEQUEUE
   if (!remote)
   {
 reread:
@@ -898,22 +949,52 @@ redo_read:
     __kmp_release_bootstrap_lock( & queue->deque_lock );
 #endif
   }
+#endif // LIBOMP_USE_LINKED_DEQUEUE
   return err;
 }
 
 
+#if LIBOMP_USE_LINKED_DEQUEUE
+int __kaapi_wsqueue_push_tasklist(kaapi_wsqueue_t* queue, kaapi_wsqueue_t* list )
+{
+  KMP_DEBUG_ASSERT( list != 0);
+  if (kaapi_wsqueue_empty( list )) return 0;
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_acquire_bootstrap_lock( & queue->deque_lock );
+#endif
+  /* push to tail */
+  list->deque_T->next = 0;
+  list->deque_H->prev = queue->deque_T;
+  if (list->deque_H->prev ==0)
+    queue->deque_H = list->deque_H;
+  else
+    queue->deque_T->next = list->deque_H;
+  queue->deque_T = list->deque_T;
+  queue->deque_size += list->deque_size;;
+
+  list->deque_H = list->deque_T = 0;
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_release_bootstrap_lock( & queue->deque_lock );
+#endif
+  return 0;
+}
+#endif // LIBOMP_USE_LINKED_DEQUEUE
+
 /* Assume N owners and multiple thieves
 */
 static inline int __kaapi_wsqueue_locked_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task )
 {
   int retval;
-
+#if LIBOMP_USE_LINKED_DEQUEUE // always locked
+  retval = __kaapi_wsqueue_push_task( queue, remote, task );
+#else
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_acquire_bootstrap_lock( &queue->deque_lock_owner );
 #endif
   retval = __kaapi_wsqueue_push_task( queue, remote, task );
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_release_bootstrap_lock( &queue->deque_lock_owner );
+#endif
 #endif
   return retval;
 }
@@ -922,9 +1003,32 @@ static inline int __kaapi_wsqueue_locked_push_task(kaapi_wsqueue_t* queue, int r
 
 /*
 */
-static inline kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue )
+kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue )
 {
-  kaapi_task_t* task;
+  kaapi_task_t* task =0;
+
+#if LIBOMP_USE_LINKED_DEQUEUE
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_acquire_bootstrap_lock( & queue->deque_lock );
+#endif
+  /* pop from Head */
+  if (queue->deque_H)
+  {
+    task = queue->deque_H;
+    queue->deque_H = task->next;
+    if (queue->deque_H ==0)
+      queue->deque_T = 0;
+    else
+      task->next->prev = 0;
+
+    task->prev = task->next = 0;
+  }
+  --queue->deque_size;
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_release_bootstrap_lock( &queue->deque_lock );
+#endif
+  return task;
+#else // LIBOMP_USE_LINKED_DEQUEUE
 
   uint32_t deque_tail = queue->deque_T;
   if (queue->deque_H >= deque_tail)
@@ -960,6 +1064,7 @@ static inline kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue )
     return task;
   }
   return queue->deque[ new_tail ];
+#endif
 }
 
 
@@ -968,13 +1073,16 @@ static inline kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue )
 static inline kaapi_task_t* __kaapi_wsqueue_locked_pop_task( kaapi_wsqueue_t* queue )
 {
   kaapi_task_t* task;
-
+#if LIBOMP_USE_LINKED_DEQUEUE
+  task = __kaapi_wsqueue_pop_task( queue );
+#else
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_acquire_bootstrap_lock( &queue->deque_lock_owner );
 #endif
   task = __kaapi_wsqueue_pop_task( queue );
 #if !LIBOMP_USE_THE_AGGREGATION
   __kmp_release_bootstrap_lock( &queue->deque_lock_owner );
+#endif
 #endif
   return task;
 }
@@ -982,21 +1090,36 @@ static inline kaapi_task_t* __kaapi_wsqueue_locked_pop_task( kaapi_wsqueue_t* qu
 
 /*
 */
-static inline kaapi_task_t* __kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue )
+kaapi_task_t* __kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue )
 {
-  kaapi_task_t* task;
+  kaapi_task_t* task =0;
+#if LIBOMP_USE_LINKED_DEQUEUE
+  return __kaapi_wsqueue_pop_task(queue);
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_acquire_bootstrap_lock( & queue->deque_lock );
+#endif
+  /* steal from tail */
+  if (queue->deque_T)
+  {
+    task = queue->deque_T;
+    queue->deque_T = task->prev;
+    if (queue->deque_T ==0)
+      queue->deque_H = 0;
+    else
+      task->prev->next = 0;
 
-#if 0
-  uint32_t deque_head = queue->deque_H;
-  uint32_t deque_tail = queue->deque_T;
-  if (deque_head >= deque_tail)
-    return 0;
-#else
-  uint32_t deque_head;
-  uint32_t deque_tail;
+    task->prev = task->next = 0;
+    --queue->deque_size;
+  }
+#if !LIBOMP_USE_THE_AGGREGATION
+  __kmp_release_bootstrap_lock( & queue->deque_lock );
 #endif
+  return task;
 
-  task = 0;
+#else // LIBOMP_USE_LINKED_DEQUEUE
+
+  uint32_t deque_head;
+  uint32_t deque_tail;
 
   /* Thief lock */
 #if !LIBOMP_USE_THE_AGGREGATION
@@ -1018,6 +1141,7 @@ static inline kaapi_task_t* __kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue )
   /* Thief unlock */
   __kmp_release_bootstrap_lock( & queue->deque_lock );
 #endif
+#endif // LIBOMP_USE_LINKED_DEQUEUE
   return task;
 }
 
@@ -1044,6 +1168,23 @@ int kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* ta
 #endif
 }
 
+#if LIBOMP_USE_LINKED_DEQUEUE
+extern int kaapi_wsqueue_push_tasklist(kaapi_wsqueue_t* queue, kaapi_wsqueue_t* list )
+{
+#if LIBOMP_USE_THE_AGGREGATION
+  kaapi_request_t* request = kaapi_sched_ccsync_post_request(queue);
+  request->header.ident = 0; /* queue->kid; */
+  request->header.op  = KAAPI_REQUEST_OP_PUSHLIST;
+  request->push_l.list  = list;
+
+  kaapi_sched_ccsync_commit_request(queue, request);
+
+  return kaapi_request_get_status(request) == KAAPI_REQUEST_S_OK ? 0 : EINVAL;
+#else
+  return __kaapi_wsqueue_push_tasklist( queue, list );
+#endif
+}
+#endif
 
 /*  Client
 */
@@ -1125,3 +1266,49 @@ kaapi_task_t* kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue )
   return __kaapi_wsqueue_steal_task(queue);
 #endif
 }
+
+
+// Insert the sublist [itBegin .. itEnd] after itInsert in queue
+void kaapi_wsqueue_splice(
+      kaapi_wsqueue_t* queue,
+      kaapi_task_t* itInsert,
+      kaapi_task_t* itBegin,
+      kaapi_task_t* itEnd )
+{
+#if LIBOMP_USE_LINKED_DEQUEUE
+  kaapi_task_t* nextItinsert = itInsert->next;
+  kaapi_task_t* prevItBegin = itBegin->prev;
+  kaapi_task_t* nextItEnd = itEnd->next;
+
+#ifndef NDEBUG
+  // Note: itBegin == itEnd is supported
+  KMP_DEBUG_ASSERT(itInsert != itBegin); // Not supported yet
+  KMP_DEBUG_ASSERT(itInsert != itEnd); // Not supported yet
+  for(kaapi_task_t* it=itBegin ; it!=itEnd ; it=it->next)
+  {
+    KMP_DEBUG_ASSERT(it != NULL); // Corruption: NULL reached before itEnd
+    KMP_DEBUG_ASSERT(it != itInsert); // Corruption: insert detected
+  }
+#endif
+
+  // slice removal
+  if(prevItBegin != 0)
+    prevItBegin->next = nextItEnd;
+  else
+    queue->deque_H = nextItEnd;
+  if(nextItEnd != 0)
+    nextItEnd->prev = prevItBegin;
+  else
+    queue->deque_T = prevItBegin;
+
+  // slice insertion
+  itInsert->next = itBegin;
+  itBegin->prev = itInsert;
+  if(nextItinsert != 0)
+    nextItinsert->prev = itEnd;
+  else
+    queue->deque_T = itEnd;
+
+  itEnd->next = nextItinsert;
+#endif
+}
diff --git a/runtime/src/kaapi_wsprotocol.h b/runtime/src/kaapi_wsprotocol.h
index 1c248c66faf2f19e76c08a9494b16ca5159bd1c3..13b06980651d9a544f354d8d23702f698e1b5c33 100644
--- a/runtime/src/kaapi_wsprotocol.h
+++ b/runtime/src/kaapi_wsprotocol.h
@@ -46,8 +46,6 @@
 #ifndef _KAAPI_WSPROTOCOL_H
 #define _KAAPI_WSPROTOCOL_H 1
 
-#include "kmp.h"
-
 /* FWD
 */
 struct kaapi_request_node_t;
@@ -63,6 +61,17 @@ typedef struct kmp_taskdata kaapi_task_t;
    classical T.H.E algorithm
 */
 typedef struct kaapi_wsqueue_t {
+#if LIBOMP_USE_LINKED_DEQUEUE
+#if !LIBOMP_USE_THE_AGGREGATION
+    kmp_bootstrap_lock_t    deque_lock;                 // Lock for accessing deque
+#endif
+    kaapi_task_t *          deque_H;
+    kaapi_task_t *          deque_T;
+    kmp_int32               deque_size;                 // Number of tasks in deque
+    int                     numa_node;                  // Prefered numa node or -1 if unspecified
+
+#else // !LIBOMP_USE_LINKED_DEQUEUE
+
     volatile unsigned int   deque_H;                    // Head of deque: steal by the thief
 #if !LIBOMP_USE_THE_AGGREGATION
     kmp_bootstrap_lock_t    deque_lock;                 // Lock for accessing deque: always taken by the thieves
@@ -74,6 +83,7 @@ typedef struct kaapi_wsqueue_t {
     int                     numa_node;                  // Prefered numa node or -1 if unspecified
     kaapi_task_t**          deque;                      // Deque of tasks encountered by td_thr, dynamically allocated
     unsigned int            deque_size;                 // Size of deck
+#endif
 #if LIBOMP_USE_THE_AGGREGATION
     struct kaapi_request_node_t* tail;
 #endif
@@ -83,19 +93,40 @@ typedef struct kaapi_wsqueue_t {
 /*
 */
 static inline int kaapi_wsqueue_empty( kaapi_wsqueue_t* queue )
-{ return queue->deque_H >= queue->deque_T; }
+{
+#if LIBOMP_USE_LINKED_DEQUEUE
+  return queue->deque_H == 0;
+#else
+  return queue->deque_H >= queue->deque_T;
+#endif
+}
+
 extern int kaapi_wsqueue_init(kaapi_wsqueue_t* queue, size_t size, int numa_node );
 extern int kaapi_wsqueue_fini(kaapi_wsqueue_t* queue);
 extern int kaapi_wsqueue_realloc(kaapi_wsqueue_t* queue );
 
 /* push
    If remote = 0, must be called by the owner of the queue only (one thread)
+   If remote = 1, the task is pushed such that the next steal request will get it.
    Return 0 in case of success else return ENOMEM if it cannot push
 */
 extern int kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task );
 
+/* same as kaapi_wsqueue_push_task - assume no concurrency
+*/
+extern int __kaapi_wsqueue_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task );
+
+/* push list of task
+   Return 0 in case of success else return ENOMEM if it cannot push
+*/
+#if LIBOMP_USE_LINKED_DEQUEUE
+extern int kaapi_wsqueue_push_tasklist(kaapi_wsqueue_t* dest, kaapi_wsqueue_t* src );
+extern int __kaapi_wsqueue_push_tasklist(kaapi_wsqueue_t* queue, kaapi_wsqueue_t* list );
+#endif
+
 /* push
-   Serialize owner before pushing task.
+   Serialize owner(s) before pushing task.
+   If remote = 1, the task is pushed such that the next steal request will get it.
    Return 0 in case of success else return ENOMEM if it cannot push
 */
 extern int kaapi_wsqueue_locked_push_task(kaapi_wsqueue_t* queue, int remote, kaapi_task_t* task );
@@ -106,8 +137,13 @@ extern int kaapi_wsqueue_locked_push_task(kaapi_wsqueue_t* queue, int remote, ka
 */
 extern kaapi_task_t* kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue );
 
+/* same as kaapi_wsqueue_pop_task - assume no concurrency
+*/
+extern kaapi_task_t* __kaapi_wsqueue_pop_task( kaapi_wsqueue_t* queue );
+
+
 /* pop
-   May be called by any threads that consider to be owner of the queue.
+   May be called by owner threads of the queue.
    Return 0 in case of failure
 */
 extern kaapi_task_t* kaapi_wsqueue_locked_pop_task( kaapi_wsqueue_t* queue );
@@ -118,5 +154,21 @@ extern kaapi_task_t* kaapi_wsqueue_locked_pop_task( kaapi_wsqueue_t* queue );
 */
 extern kaapi_task_t* kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue );
 
+/* same as kaapi_wsqueue_steal_task - assume no concurrency
+*/
+extern kaapi_task_t* __kaapi_wsqueue_steal_task( kaapi_wsqueue_t* queue );
+
+
+/* Similar to std::list::splice
+   Move tasks between [itBegin,itEnd] just after the position itInsert.
+   Currently not yet accessible through the aggregation protocol and could only be used on
+   non sharing queue.
+*/
+extern void kaapi_wsqueue_splice(
+      kaapi_wsqueue_t* ready_queue,
+      kaapi_task_t* itInsert,
+      kaapi_task_t* itBegin,
+      kaapi_task_t* itEnd );
+
 
 #endif
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index 1b00c0bfd20d3712371f148cb3c85480f1316d94..259bfe68d616f0bd27be99e54c242441db8b9f8f 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -136,7 +136,7 @@ class kmp_stats_list;
 #include "ompt-internal.h"
 #endif
 
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -2397,6 +2397,17 @@ struct kmp_taskdata { /* aligned during dynamic allocation       */
   kmp_task_team_t *td_task_team;
   kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
 #endif
+#if LIBOMP_USE_LINKED_DEQUEUE
+  struct kmp_taskdata* next;
+  struct kmp_taskdata* prev;
+#endif
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  // Only useful during the construction of the graph
+  // Invariant: the range contains only predecessor of this task (todo: check)
+  struct kmp_taskdata* groupBegin; // first element of the range
+  struct kmp_taskdata* groupEnd; // last element of the range
+#endif
+
 #if OMPT_SUPPORT
     ompt_task_info_t        ompt_task_info;
 #endif
@@ -2407,9 +2418,9 @@ KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
 
 // Data for a hierarchy entity
 typedef struct kmp_base_queue_data {
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     kaapi_wsqueue_t         td_wsdeque;            // From Kaapi workqueue
-#else
+#else /* original libOMP queue */
     kmp_bootstrap_lock_t    td_deque_lock;         // Lock for accessing deque
     kmp_taskdata_t **       td_deque;              // Deque of tasks encountered by td_thr, dynamically allocated
     kmp_int32               td_deque_size;         // Size of deck
@@ -2644,6 +2655,11 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
   kmp_uint32 th_reap_state; // Non-zero indicates thread is not
   // tasking, thus safe to reap
 
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  // private list of tasks used between call to begin_graph - end_graph to store pushed & ready task
+  // the queue is made visible a end_graph and is not stolen until the next call to this function
+  kaapi_wsqueue_t*  th_tasklist;
+#endif
 #if LIBOMP_USE_VARDEP && OMP_40_ENABLED
   kmp_uint32              th_edps_size[2];        // cummulative count in th_edeps
   kmp_extra_depinfo_th_t  th_edeps[2];          // [alias/noalias]
@@ -3870,6 +3886,8 @@ KMP_EXPORT void __kmpc_omp_set_task_affinity(kmp_uint32 kind, kmp_uint64 affinit
 
 KMP_EXPORT void __kmpc_omp_set_task_name(char *name);
 KMP_EXPORT void __kmpc_omp_set_task_attr(char key, long int value);
+KMP_EXPORT void* __kmpc_omp_begin_sched_graph(int flag);
+KMP_EXPORT void __kmpc_omp_end_sched_graph(void* handle, int flag );
 
 KMP_EXPORT void __kmpc_omp_set_task_alloc_size( int rsrc, unsigned long size, void* data );
 KMP_EXPORT void __kmpc_omp_set_task_free_size( int rsrc, unsigned long size, void* data );
diff --git a/runtime/src/kmp_config.h.cmake b/runtime/src/kmp_config.h.cmake
index e20e43319ee90e1f73378880138353b120ee5888..c56daa369959970a65b4bca497b4e10827b6122b 100644
--- a/runtime/src/kmp_config.h.cmake
+++ b/runtime/src/kmp_config.h.cmake
@@ -53,7 +53,9 @@
 #cmakedefine01 LIBOMP_USE_AFFINITY
 #cmakedefine01 LIBOMP_USE_AFFINITY
 #cmakedefine01 LIBOMP_USE_EXTSCHED_MEM
+#cmakedefine01 LIBOMP_USE_REORDER4LOCALITY
 #cmakedefine01 LIBOMP_USE_THEQUEUE
+#cmakedefine01 LIBOMP_USE_LINKED_DEQUEUE
 #cmakedefine01 LIBOMP_USE_THE_AGGREGATION
 #cmakedefine01 LIBOMP_USE_CONCURRENT_WRITE
 #cmakedefine01 LIBOMP_USE_DYNHASH
diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h
index 3da40d790e60c35e1b602f69093382a019523b5f..47f65c69902b3569219b8c4457d5a2e922a404e2 100644
--- a/runtime/src/kmp_ftn_entry.h
+++ b/runtime/src/kmp_ftn_entry.h
@@ -659,6 +659,26 @@ FTN_SET_DEPEND_INFO(void * addr, int flag)
 
 #if OMP_40_ENABLED
 
+void* FTN_STDCALL
+FTN_BEGIN_SCHEDGRAPH(int flag )
+{
+    #ifdef KMP_STUB
+        // nothing
+    #else
+        return __kmpc_omp_begin_sched_graph(flag);
+    #endif
+}
+
+void FTN_STDCALL
+FTN_END_SCHEDGRAPH(void* handle, int flag )
+{
+    #ifdef KMP_STUB
+        // nothing
+    #else
+        __kmpc_omp_end_sched_graph(handle, flag);
+    #endif
+}
+
 #if LIBOMP_USE_VARDEP
 int FTN_STDCALL
 FN_TASK_DECLDEPS( int mode, int count, void** array)
diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h
index e7ed60b51c8eed76ba77ad34768b416651a212b5..0457f3209656e368e64fb242a81ef6c25c2f3503 100644
--- a/runtime/src/kmp_ftn_os.h
+++ b/runtime/src/kmp_ftn_os.h
@@ -92,6 +92,8 @@
 #define FN_TASK_DECLDEPS_ARRAY omp_task_declare_dependencies_array
 #define FN_TASK_DECLDEPS_ARRAY_NOALIAS omp_task_declare_dependencies_array_noalias
 #endif
+#define FTN_BEGIN_SCHEDGRAPH omp_begin_sched_graph
+#define FTN_END_SCHEDGRAPH omp_end_sched_graph
 #define FTN_GET_NUM_TEAMS omp_get_num_teams
 #define FTN_GET_TEAM_NUM omp_get_team_num
 #endif
@@ -225,6 +227,8 @@
 #define FN_TASK_DECLDEPS_ARRAY omp_task_declare_dependencies_array_
 #define FN_TASK_DECLDEPS_ARRAY_NOALIAS omp_task_declare_dependencies_array_noalias_
 #endif
+#define FTN_BEGIN_SCHEDGRAPH omp_begin_sched_graph_
+#define FTN_END_SCHEDGRAPH omp_end_sched_graph_
 #define FTN_GET_NUM_TEAMS omp_get_num_teams_
 #define FTN_GET_TEAM_NUM omp_get_team_num_
 #endif
@@ -359,6 +363,8 @@
 #define FN_TASK_DECLDEPS_ARRAY OMP_TASK_DECLARE_DEPENDENCIES_ARRAY
 #define FN_TASK_DECLDEPS_ARRAY_NOALIAS OMP_TASK_DECLARE_DEPENDENCIES_ARRAY_NOALIAS
 #endif
+#define FTN_BEGIN_SCHEDGRAPH OMP_BEGIN_SCHED_GRAPH
+#define FTN_END_SCHEDGRAPH OMP_END_SCHED_GRAPH
 #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS
 #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM
 #endif
@@ -485,7 +491,7 @@
 #define FTN_SET_TASK_FREE_MEM OMP_SET_TASK_FREE_SIZE_
 #define FTN_INFO_ALLOC_SIZE OMP_INFO_ALLOC_MEMORY_
 #define FTN_INFO_FREE_SIZE OMP_INFO_FREE_MEMORY_
-#define FTN_SET_MEMORY_LIMIT OMP_SET_MEMORY_LIMIT
+#define FTN_SET_MEMORY_LIMIT OMP_SET_MEMORY_LIMIT_
 #if OMP_40_ENABLED
 #if LIBOMP_USE_VARDEP
 #define FN_TASK_DECLDEPS OMP_TASK_DECLARE_DEPENDENCIES_
@@ -493,6 +499,8 @@
 #define FN_TASK_DECLDEPS_ARRAY OMP_TASK_DECLARE_DEPENDENCIES_ARRAY_
 #define FN_TASK_DECLDEPS_ARRAY_NOALIAS OMP_TASK_DECLARE_DEPENDENCIES_ARRAY_NOALIAS_
 #endif
+#define FTN_BEGIN_SCHEDGRAPH OMP_BEGIN_SCHED_GRAPH_
+#define FTN_END_SCHEDGRAPH OMP_END_SCHED_GRAPH_
 #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_
 #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_
 #endif
diff --git a/runtime/src/kmp_queues.cpp b/runtime/src/kmp_queues.cpp
index 38e766db716d6a49b1e488527f0e058a77d02024..28df51a7f100d347b767c3885290d7cf702217cc 100644
--- a/runtime/src/kmp_queues.cpp
+++ b/runtime/src/kmp_queues.cpp
@@ -15,7 +15,7 @@ int __kmp_cpu2node(int cpu) { return machine_info.cpu2node[cpu]; }
 void __kmp_init_task_deque(kmp_queue_data_t *q, int level, int level_id, int node)
 {
     kmp_base_queue_data_t *queue = &q->qd;
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     kaapi_wsqueue_init( &queue->td_wsdeque, INITIAL_TASK_DEQUE_SIZE, node);
 #else
     __kmp_alloc_task_deque(queue, node);
@@ -36,7 +36,7 @@ void __kmp_init_task_deque(kmp_queue_data_t *q, int level, int level_id, int nod
 void
 __kmp_alloc_task_deque(kmp_base_queue_data_t *queue, int node)
 {
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     kaapi_wsqueue_init( &queue->td_wsdeque, INITIAL_TASK_DEQUE_SIZE, node);
 #else
     /*TODO PV something? I need to allocate node queue on nodes...*/
@@ -74,7 +74,7 @@ __kmp_alloc_task_deque(kmp_base_queue_data_t *queue, int node)
 
 void __kmp_realloc_task_deque(kmp_base_queue_data_t *queue)
 {
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     kaapi_wsqueue_realloc( &queue->td_wsdeque );
 #else
     kmp_int32 size = TASK_DEQUE_SIZE(queue);
@@ -115,7 +115,9 @@ void __kmp_realloc_task_deque(kmp_base_queue_data_t *queue)
 void
 __kmp_free_task_deque( kmp_base_queue_data_t *own_queue )
 {
-#if !LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
+    kaapi_wsqueue_fini( &own_queue->td_wsdeque );
+#else
     __kmp_acquire_bootstrap_lock( &own_queue->td_deque_lock );
 
     if ( own_queue->td_deque != NULL ) {
@@ -128,7 +130,5 @@ __kmp_free_task_deque( kmp_base_queue_data_t *own_queue )
         own_queue->td_deque = NULL;
     }
     __kmp_release_bootstrap_lock( &own_queue->td_deque_lock );
-#else
-    kaapi_wsqueue_fini( &own_queue->td_wsdeque );
 #endif
 }
diff --git a/runtime/src/kmp_queues.h b/runtime/src/kmp_queues.h
index 24c014327b8786f0b2e987aaa6faef36778f2d95..7972428295d897ac04a94b859197c612b5a06d83 100644
--- a/runtime/src/kmp_queues.h
+++ b/runtime/src/kmp_queues.h
@@ -9,7 +9,7 @@
 
 static inline int __kmp_queue_empty(kmp_queue_data_t *queue)
 {
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     return kaapi_wsqueue_empty(&queue->qd.td_wsdeque);
 #else
     return TCR_4(queue ->qd. td_deque_ntasks) == 0;
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 265d5749914313404c59df4d6afe5e4cea5f687f..67318b5b89791043f9e4b88cab3d1dea9c3ba7e7 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -4092,6 +4092,9 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
   this_thr->th.th_task_key[0]  = 0;
   this_thr->th.th_task_key[1]  = 0;
 #endif
+#if LIBOMP_USE_REORDER4LOCALITY
+  this_thr->th.th_tasklist     = 0;
+#endif
 #if LIBOMP_USE_VARDEP && OMP_40_ENABLED
   this_thr->th.th_edps_size[0] = 0;
   this_thr->th.th_edps_size[1] = 0;
diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp
index aca5b89b06a11225d272f862465f06bc1874bc6d..3e65def03d6070d7cce94cb5c02ae3b76ffd0dfa 100644
--- a/runtime/src/kmp_settings.cpp
+++ b/runtime/src/kmp_settings.cpp
@@ -5380,6 +5380,11 @@ void __kmp_env_print_2() {
 #else
 #  define K_QUEUE ""
 #endif
+#if LIBOMP_USE_LINKED_DEQUEUE
+#  define K_LQUEUE "+linked dqueue"
+#else
+#  define K_LQUEUE ""
+#endif
 #if LIBOMP_USE_CONCURRENT_WRITE && OMP_40_ENABLED
 #  define K_CW "+Concurrent write"
 #else
@@ -5396,9 +5401,9 @@ void __kmp_env_print_2() {
 #  define K_DH ""
 #endif
 
-#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE || LIBOMP_USE_THE_AGGREGATION  \
+#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE || LIBOMP_USE_THE_AGGREGATION  \
  || LIBOMP_USE_CONCURRENT_WRITE || LIBOMP_USE_VARDEP || LIBOMP_USE_DYNHASH 
-    __kmp_str_buf_print( & buffer, "  OMP version enabled: " K_30  K_40 K_45 K_50 ", extended with: " K_QUEUE K_AFFINITY K_AGGREGATION K_CW K_DH K_VD"\n", 0 );
+    __kmp_str_buf_print( & buffer, "  OMP version enabled: " K_30  K_40 K_45 K_50 ", extended with: " K_QUEUE K_LQUEUE K_AFFINITY K_AGGREGATION K_CW K_DH K_VD"\n", 0 );
 #endif
 
   __kmp_str_buf_print(&buffer, "%s\n", KMP_I18N_STR(DisplayEnvEnd));
diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp
index f6f5665ba456030bcd6d118834ec704fc3a40743..59aa5e9c99c48151dc002adfe0f9e2204a55b009 100644
--- a/runtime/src/kmp_taskdeps.cpp
+++ b/runtime/src/kmp_taskdeps.cpp
@@ -24,6 +24,11 @@
 #include "kmp_atomic.h"
 #endif
 
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+#include "kmp_taskreschedule.h"
+#endif
+
+
 #if OMP_40_ENABLED
 
 // TODO: Improve memory allocation? keep a list of pre-allocated structures?
@@ -546,6 +551,14 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
             __kmp_track_dependence(gtid, indep, outdep, task);
             indep->dn.successors =
                 __kmp_add_node(thread, indep->dn.successors, outdep);
+#if LIBOMP_USE_REORDER4LOCALITY
+            kaapi_reorder4locality_addDependency(
+              thread->th.th_tasklist,
+              KMP_TASK_TO_TASKDATA(indep->dn.task),
+              KMP_TASK_TO_TASKDATA(task),
+              dep
+            );
+#endif
             KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from "
                           "%p to %p\n",
                           filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task),
@@ -598,6 +611,14 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
                 __kmp_track_dependence(gtid, outdep, node, task);
                 outdep->dn.successors =
                     __kmp_add_node(thread, outdep->dn.successors, node);
+#if LIBOMP_USE_REORDER4LOCALITY
+                kaapi_reorder4locality_addDependency(
+                  thread->th.th_tasklist,
+                  KMP_TASK_TO_TASKDATA(outdep->dn.task),
+                  KMP_TASK_TO_TASKDATA(task),
+                  dep
+                );
+#endif
                 KA_TRACE(
                     40,
                     ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
@@ -1293,7 +1314,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
 //TG: do not call the c entry point that may check vardep to call kmpc_omp_task_with_deps
 // generating infinite recursive function call.
 //    return __kmpc_omp_task(loc_ref, gtid, new_task);
-  return __kmp_omp_task(gtid, new_task, true);
+  return __kmp_omp_task(gtid, new_task, false); // TG: it was a true ???
 }
 
 /*!
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 498c408cc4ba65ccd44b836e46bfe2e7ea602a11..e40af23e6bddafe5b6747218fe203037811d03f6 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -27,6 +27,13 @@
 
 #include "tsan_annotations.h"
 
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+#warning "HERE"
+#include "kmp_taskreschedule.h"
+#else
+#endif
+
+
 #if LIBOMP_USE_AFFINITY
 #include <numaif.h>
 #ifndef _GNU_SOURCE
@@ -34,7 +41,6 @@
 #endif
 #include <sched.h> /* sched_getcpu */
 
-
 /*
 */
 static int kaapi_numa_getpage_id(const void* addr)
@@ -326,6 +332,14 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
   KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
   KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
 
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  if (thread->th.th_tasklist)
+  {
+    kaapi_push_task( thread, taskdata );
+    return TASK_SUCCESSFULLY_PUSHED;
+  }
+#endif
+
   // Find tasking deque specific to encountering thread
   thread_data = & task_team -> tt.tt_threads_data[ tid ];
 
@@ -335,7 +349,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
   int selected_nodeid = -1;
 #endif
 
-#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_EXTSCHED_MEM||LIBOMP_USE_AFFINITY
+#if LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE || LIBOMP_USE_AFFINITY || LIBOMP_USE_EXTSCHED_MEM
   int isremote = 1;
 #endif
 
@@ -359,7 +373,10 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
         selected_queue = &task_team->tt.tt_task_queues_mem[KMP_LEVEL_MACHINE][0].qd;
       else
 #endif
-        selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd;
+        if ( kaapi_wsqueue_empty(&task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd->td_wsdeque))
+          selected_queue = &task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd;
+        else
+          selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd;
       isremote = 0;
       KA_TRACE(10, ( "__kmp_push_task: pushing to own private queue, no affinity\n" ) );
     } break;
@@ -519,7 +536,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
   KMP_DEBUG_ASSERT(selected_queue);
 
   // No lock needed since only owner can allocate
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
   kaapi_wsqueue_t* pqueue = &(selected_queue->td_wsdeque);
   kmp_int32 err = 0;
 #if LIBOMP_USE_AFFINITY||LIBOMP_USE_EXTSCHED_MEM
@@ -530,19 +547,28 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
         taskdata
     );
   else
+#if LIBOMP_USE_AFFINITY
+    if (select_queue == &task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd)
+      err = kaapi_wsqueue_push_task(
+          pqueue,
+          isremote,
+          taskdata
+      );
+    else
+#endif
 #endif /* no affinity: 1 queue own by the current thread only, no concurrent "owners" */
-    err = kaapi_wsqueue_push_task(
-        pqueue,
-        isremote, 
-        taskdata
-    );
+      err = kaapi_wsqueue_push_task(
+          pqueue,
+          isremote,
+          taskdata
+      );
   if (err) {
     //printf("Task not pushed, remote:%i !\n", isremote);
     return TASK_NOT_PUSHED;
   }
   return TASK_SUCCESSFULLY_PUSHED;
 
-#else /* LIBOMP_USE_THEQUEUE */
+#else /* LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE */
 
   if (selected_queue -> td_deque == NULL ) {
     __kmp_alloc_task_deque( selected_queue, -1 );
@@ -1366,6 +1392,10 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag
   } else {
     task->shareds = NULL;
   }
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  taskdata->groupBegin = 0;
+  taskdata->groupEnd = 0;
+#endif
 #if OMP_40_ENABLED
   taskdata->td_flags.depsfill= 0;
   if (ndeps && (flags->depsinalloc))
@@ -2225,6 +2255,29 @@ __kmpc_omp_set_task_attr(char key, long int value)
 #endif
 }
 
+
+//-------------------------------------------------------------------------------------
+// __kmpc_omp_set_task_attr: set the name for the next task to create
+void* __kmpc_omp_begin_sched_graph(int flag)
+{
+#if LIBOMP_USE_REORDER4LOCALITY
+  kmp_info_t* thread = __kmp_threads[ __kmp_entry_gtid() ];
+  kaapi_wsqueue_t* queue = kaapi_begin_graph(thread, (uint32_t)flag);
+  return queue;
+#else
+  return 0;
+#endif
+}
+
+void __kmpc_omp_end_sched_graph(void* handle, int flag )
+{
+#if LIBOMP_USE_REORDER4LOCALITY
+  kmp_info_t* thread = __kmp_threads[ __kmp_entry_gtid() ];
+  kaapi_end_graph(thread, (uint32_t)flag);
+#endif
+}
+
+
 //-------------------------------------------------------------------------------------
 /*!
 @ingroup TASKING
@@ -2499,6 +2552,8 @@ static kmp_queue_data_t *__kmp_select_queue(
   {
     if (isapop) /* means pop operation */
     {
+      *islocal = 1;
+      CHECK_RETURN(&current_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid]); // cpu
       for (int i=0; i<2; ++i)
       {
 #if LIBOMP_USE_EXTSCHED_MEM
@@ -2614,7 +2669,7 @@ __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 cpu, kmp_int32 node, kmp_in
     return NULL;
   }
 
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
   KMP_DEBUG_ASSERT( gtid == __kmp_get_gtid() );
 #if LIBOMP_USE_EXTSCHED_MEM
 //printf("%i::Islocal:%i\n", gtid, islocal);
@@ -2645,8 +2700,15 @@ __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 cpu, kmp_int32 node, kmp_in
   else
 #endif
   if (islocal ==1) /* == own queue or locked pop on constraints */
-    taskdata = kaapi_wsqueue_pop_task( &(selected_struct_queue->qd.td_wsdeque) );
-  else
+  {
+#if LIBOMP_USE_AFFINITY
+    kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+    if (selected_struct_queue == &task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd)
+      taskdata = __kaapi_wsqueue_pop_task( &(selected_struct_queue->qd.td_wsdeque) );
+    else
+#endif
+      taskdata = kaapi_wsqueue_pop_task( &(selected_struct_queue->qd.td_wsdeque) );
+  } else
     taskdata = kaapi_wsqueue_locked_pop_task( &(selected_struct_queue->qd.td_wsdeque) );
   if (taskdata ==0) return 0;
 #if OMPT_SUPPORT
@@ -2755,7 +2817,7 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 cpu, kmp_int32 node,
 
   KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
 
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
   if (TCR_PTR(victim->th.th_task_team) != task_team) {
       // GEH: why would this happen?
     return 0;
@@ -2782,7 +2844,7 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 cpu, kmp_int32 node,
   if (__kmp_queue_empty(victim_struct_queue))
     return NULL;
 
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
   KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
               "task_team=%p queue=%p\n",
               gtid, __kmp_gtid_from_thread( victim ), task_team,
@@ -2845,7 +2907,7 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 cpu, kmp_int32 node,
 
   KMP_COUNT_BLOCK(TASK_stolen);
   return task;
-#else //LIBOMP_USE_THEQUEUE
+#else //LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
 
   KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
                 "task_team=%p ntasks=%d "
@@ -3035,7 +3097,7 @@ static inline int __kmp_execute_tasks_template(
         state = IDLE_STATE;
       }
 #endif
-#if !LIBOMP_USE_AFFINITY && !LIBOMP_USE_THEQUEUE
+#if !LIBOMP_USE_AFFINITY && !LIBOMP_USE_THEQUEUE && !LIBOMP_USE_LINKED_DEQUEUE
       if (use_own_tasks) // check on own queue first
 #else
         /*  loop to increase local execution */
@@ -3255,7 +3317,7 @@ static inline int __kmp_execute_tasks_template(
       use_own_tasks = 1;
     else
 #endif
-#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE ||LIBOMP_USE_LINKED_DEQUEUE
     //TG: avoid to return if flag condition is not check - WARN: threads are higly active
     if (flag != NULL && flag->done_check())
 #else
@@ -3273,7 +3335,7 @@ static inline int __kmp_execute_tasks_template(
       return FALSE;
     }
 
-#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_AFFINITY || LIBOMP_USE_THEQUEUE || LIBOMP_USE_LINKED_DEQUEUE
     if (final_spin)
       return FALSE;
 #else
@@ -4057,7 +4119,7 @@ static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
   kmp_base_queue_data_t *victim_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][cpu].qd;
   KMP_DEBUG_ASSERT(victim_queue);
 
-#if !LIBOMP_USE_THEQUEUE
+#if !LIBOMP_USE_THEQUEUE && !LIBOMP_USE_LINKED_DEQUEUE
   if (victim_queue->td_deque == NULL ) {
     // There's no queue in this thread, go find another one
     // We're guaranteed that at least one thread has a queue
@@ -4068,7 +4130,7 @@ static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
   }
 #endif
 
-#if LIBOMP_USE_THEQUEUE
+#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE
   kmp_int32 gtid = thread->th.th_info.ds.ds_gtid;
   if (__kmp_push_task(gtid, task) == TASK_NOT_PUSHED)
     result = false;
diff --git a/runtime/src/kmp_taskreschedule.cpp b/runtime/src/kmp_taskreschedule.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a7305fa3411c73edd9271e8a07f0ba03e0963e26
--- /dev/null
+++ b/runtime/src/kmp_taskreschedule.cpp
@@ -0,0 +1,148 @@
+//
+//  kmp_taskreschedule.cpp
+//  libomp
+//
+//  Copyright © 2018 Jérôme Richard - Gautier Thierry. All rights reserved.
+//
+#include "kmp_config.h"
+#include "kmp.h"
+#include "kmp_i18n.h"
+
+
+#include "kmp_taskreschedule.h"
+
+/*
+*/
+kaapi_wsqueue_t* kaapi_begin_graph(kmp_info_t *thread, uint32_t flag)
+{
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  kmp_task_team_t*       task_team = thread->th.th_task_team;
+  if (task_team ==0) return 0;
+  kaapi_wsqueue_t* ws;
+#if USE_FAST_MEMORY
+  ws = (kaapi_wsqueue_t *)__kmp_fast_allocate(thread, sizeof(kaapi_wsqueue_t));
+#else
+  ws = (kaapi_wsqueue_t *)__kmp_thread_malloc(thread, sizeof(kaapi_wsqueue_t));
+#endif
+  kaapi_wsqueue_init( ws, INITIAL_TASK_DEQUE_SIZE, -1 );
+  thread->th.th_tasklist = ws;
+  return ws;
+#else
+  return 0;
+#endif
+}
+
+
+/*
+*/
+void kaapi_push_task( kmp_info_t* thread, kaapi_task_t* task )
+{
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  kaapi_wsqueue_t* queue = thread->th.th_tasklist;
+  task->next = 0;
+  task->prev = queue->deque_T;
+  if (task->prev ==0)
+    queue->deque_H = task;
+  else
+    queue->deque_T->next = task;
+  queue->deque_T = task;
+  ++queue->deque_size;
+
+  task->groupBegin = task;
+  task->groupEnd = task;
+#endif
+}
+
+
+/*
+*/
+void kaapi_end_graph(kmp_info_t *thread, uint32_t flag)
+{
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  if (thread->th.th_tasklist ==0) return;
+  /* select the queue */
+  kmp_task_team_t*       task_team = thread->th.th_task_team;
+  kmp_int32              tid = thread->th.th_info.ds.ds_tid;
+  kmp_base_queue_data_t *selected_queue = NULL;
+
+#if LIBOMP_USE_AFFINITY
+  int cpu = sched_getcpu();
+  int selected_nodeid = __kmp_cpu2node(cpu);
+
+  selected_queue = &task_team->tt.tt_task_private_queues[KMP_LEVEL_NUMA][selected_nodeid].qd;
+#else
+  selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd;
+#endif
+  kaapi_wsqueue_t* queue = &(selected_queue->td_wsdeque);
+  kaapi_wsqueue_t* tasklist = thread->th.th_tasklist;
+  thread->th.th_tasklist = 0;
+  kaapi_wsqueue_push_tasklist( queue, tasklist );
+  KMP_ASSERT( kaapi_wsqueue_empty(tasklist) );
+#if USE_FAST_MEMORY
+  __kmp_fast_free(thread, tasklist);
+#else /* ! USE_FAST_MEMORY */
+  __kmp_thread_free(thread, tasklist);
+#endif
+#endif
+}
+
+
+/*
+*/
+void kaapi_reorder4locality_addDependency(
+    kaapi_wsqueue_t* ready_queue,
+    kaapi_task_t* predTask,
+    kaapi_task_t* currtask,
+    const kmp_depend_info_t* depinfo)
+{
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+  // First dependency
+  if(currtask->groupBegin == 0)
+  {
+    currtask->groupBegin = predTask->groupBegin;
+    currtask->groupEnd = predTask->groupEnd;
+  }
+  else
+  {
+    // Group of predecessor are not contiguous
+    // Assumption: submitted tasks are appended at the end of the ready list and dependencies are sorted
+    // Assumption: groups are not aliasing (to check: not always true but seems to be ok)
+    if(currtask->groupEnd->next != predTask->groupBegin)
+    {
+      // FAIL: group aliasing (with 3 levels) or replicated dependency
+
+      // Hack to avoid handling the same task again when it comes to handle multiple dependencies
+      if(currtask->groupBegin != predTask->groupBegin)
+      {
+        // Avoid partially the reordering of predTask antecedents if they are shared with those of currtask
+        // TODO: support the general case
+        if(currtask->groupEnd != predTask->groupEnd)
+        {
+          kaapi_task_t* itInsert = currtask->groupEnd;
+          kaapi_task_t* itBegin = predTask->groupBegin;
+          kaapi_task_t* itEnd = predTask->groupEnd;
+          bool reorder = true;
+
+          // In some case the algorithm fail to keep the coherence of data structure such as group markers
+          // Iterating over the whole ready list is sufficient to check if something go wrong
+          for(kaapi_task_t* it=itBegin ; it!=itEnd ; it=it->next)
+          {
+            if(it == NULL || it == itInsert)
+            {
+              reorder = false;
+              break;
+            }
+          }
+
+          // Improve locality regarding submission order by moving the predecessor group near the last one
+          if(reorder)
+            kaapi_wsqueue_splice( ready_queue, itInsert, itBegin, itEnd);
+        }
+      }
+    }
+
+    currtask->groupEnd = predTask->groupEnd;
+  }
+#endif
+}
+
diff --git a/runtime/src/kmp_taskreschedule.h b/runtime/src/kmp_taskreschedule.h
new file mode 100644
index 0000000000000000000000000000000000000000..8bf117ad3ffdc41ad18bc5a457bd4fd74885b7f0
--- /dev/null
+++ b/runtime/src/kmp_taskreschedule.h
@@ -0,0 +1,41 @@
+//
+//  kmp_taskreschedule.hpp
+//  libomp
+//
+//  Copyright © 2018 Jérôme Richard - Gautier Thierry. All rights reserved.
+//
+
+#ifndef kmp_taskreschedule_h
+#define kmp_taskreschedule_h
+
+#include "kaapi_wsprotocol.h"
+
+/* Mark start of graph construction with schedule
+   Flag can be used to specify scheduler heuristic.
+   Each future task generated by the current thread until a call to kaapi_end_graph() is
+   stored either the returned queue xor its depends on previous tasks.
+   The returned queue is the queue of ready tasks submited to the workers in kaapi_end_graph()
+*/
+KMP_EXPORT kaapi_wsqueue_t* kaapi_begin_graph(kmp_info_t *thread, uint32_t flag);
+
+#if OMP_40_ENABLED && LIBOMP_USE_REORDER4LOCALITY
+/*
+*/
+KMP_EXPORT void kaapi_push_task( kmp_info_t* thread, kaapi_task_t* task );
+
+/* Add dependency between 2 tasks.
+   taskPred has successor task by the dependency depinfo.
+   ready_queue is the queue to reorder.
+*/
+KMP_EXPORT void kaapi_reorder4locality_addDependency(
+    kaapi_wsqueue_t* ready_queue,
+    kaapi_task_t* predTask,
+    kaapi_task_t* task,
+    const kmp_depend_info_t* depinfo);
+#endif
+
+/* Submit the queue of ready task to the workers.
+*/
+KMP_EXPORT void kaapi_end_graph(kmp_info_t *thread, uint32_t flag);
+
+#endif /* kmp_taskreschedule_hpp */