diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp
index dc52957018b282d8e9e3e0dcca3d2b5a9dd308eb..218957f8823b4521769135adf244503f4f298175 100644
--- a/runtime/src/kmp_gsupport.cpp
+++ b/runtime/src/kmp_gsupport.cpp
@@ -891,15 +891,12 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu
     ndeps = 0;
 
   /* force allocation of all dependencies in task */
+  /* extra deps are allocated in taskdata struct if depsinalloc is set and VARDEP configured */
   input_flags->depsinalloc = 1;
   kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags,
     sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0,
     (kmp_routine_entry_t)func,
-#if LIBOMP_USE_VARDEP
-    ndeps+thread->th.th_edps_size[0], thread->th.th_edps_size[1]
-#else
     ndeps, 0
-#endif
   );
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   ndeps = taskdata->td_ndeps + taskdata->td_ndeps_noalias;
@@ -966,21 +963,17 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu
         dep_list[i].flags.alias = 0;
       }
     }
-    /* consider that gcc does not fill ALL dependencies fields */
-/* BUG HERE: gcc move extra dep to taskdata->deps and omp_task_with_deps recompute the total
-   number of deps using also extra deps stored in threaddata.
-*/
-#error "STOP"
-    taskdata->td_flags.depsfill= 0;
+    /* consider that gcc has filled ALL dependencies fields (excluding extra deps) */
+    taskdata->td_flags.depsfill= 1;
     if (if_cond)
       __kmpc_omp_task_with_deps( &loc, gtid, task,
-          taskdata->td_ndeps, taskdata->td_deps,
-          taskdata->td_ndeps_noalias, taskdata->td_deps_noalias
+          depend == 0 ? 0 : (kmp_int32)(uintptr_t)depend[0], taskdata->td_deps,
+          0, taskdata->td_deps_noalias
       );
     else {
       __kmpc_omp_wait_deps( &loc, gtid,
-          taskdata->td_ndeps, taskdata->td_deps,
-          taskdata->td_ndeps_noalias, taskdata->td_deps_noalias
+          depend == 0 ? 0 : (kmp_int32)(uintptr_t)depend[0], taskdata->td_deps,
+          0, taskdata->td_deps_noalias
       );
 #if OMPT_SUPPORT
       ompt_thread_info_t oldInfo;
diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp
index a6ad799cfa4c7ee66ae636ac940e802906f8e60b..95c0c38c309dd10ccb4766053653ee2d9476fa49 100644
--- a/runtime/src/kmp_taskdeps.cpp
+++ b/runtime/src/kmp_taskdeps.cpp
@@ -19,6 +19,7 @@
 #include "kmp_io.h"
 #include "kmp_wait_release.h"
 
+
 #if LIBOMP_USE_PARALLEL_SPAWN
 #include "kmp_atomic.h"
 #endif
@@ -489,7 +490,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
     KMP_DEBUG_ASSERT(dep->flags.in);
 
     if (filter &&
-#if LIBOMP_USE_VARDEP
+#if LIBOMP_USE_VARDEP && OMP_40_ENABLED
         dep->flags.alias == 1
 #else
         dep->base_addr == 0
@@ -668,7 +669,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
   // Filter deps in dep_list
   // TODO: Different algorithm for large dep_list ( > 10 ? )
   for ( i = 0; i < ndeps; i ++ ) {
-#if LIBOMP_USE_VARDEP
+#if LIBOMP_USE_VARDEP && OMP_40_ENABLED
     if (dep_list[i].flags.alias == 0)
     {
 #if LIBOMP_USE_CONCURRENT_WRITE
@@ -705,7 +706,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
           dep_list[j].base_addr = 0; // Mark j element as void
         }
     }
-#endif // #if LIBOMP_USE_VARDEP
+#endif // #if LIBOMP_USE_VARDEP && OMP_40_ENABLED
   }
 #if LIBOMP_USE_CONCURRENT_WRITE
   thread->th.th_commute_addr = 0;
@@ -1016,6 +1017,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
 //    );
 
 #if LIBOMP_USE_VARDEP
+  /* - all deps including extra deps are allocated into task data struct if depsinalloc is set */
+  /* - ndeps and ndeps_noalias does not take into account extra deps */
+  /* - if depsfill == 1 (i.e. gcc) then deps are already copied into taskdata->td_deps array */
   kmp_int32 ndeps_extra = thread->th.th_edps_size[0];
   kmp_int32 ndeps_extra_noalias = thread->th.th_edps_size[1];
   kmp_int32 total_ndeps = ndeps + ndeps_extra;
@@ -1024,24 +1028,27 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
   kmp_int32 total_ndeps = ndeps;
   kmp_int32 total_ndeps_noalias = ndeps_noalias;
 #endif
-  if (new_taskdata->td_flags.depsfill ==0) /* gcc already have stored dependencies */
+  /* allocate here if not done or not enough (+extra deps) in task_alloc */
+  if (new_taskdata->td_ndeps+new_taskdata->td_ndeps_noalias < total_ndeps+total_ndeps_noalias)
   {
-    /* allocate here if not done or not enough (+extra deps) in task_alloc */
-    if (new_taskdata->td_ndeps+new_taskdata->td_ndeps_noalias < total_ndeps+total_ndeps_noalias)
-    {
-      kmp_depend_info_t* td_deps = (kmp_depend_info_t*)__kmp_fast_allocate( thread,
-          (total_ndeps+total_ndeps_noalias)*sizeof(kmp_depend_info_t)
-      );
-      new_taskdata->td_flags.depsinalloc = 0;
-      if (new_taskdata->td_deps)
-        __kmp_fast_free(thread, new_taskdata->td_deps);
-      // do not free td_deps_noalias
-      new_taskdata->td_deps = td_deps;
-    }
-    new_taskdata->td_deps_noalias = new_taskdata->td_deps+total_ndeps;
+    kmp_depend_info_t* td_deps = (kmp_depend_info_t*)__kmp_fast_allocate( thread,
+        (total_ndeps + total_ndeps_noalias)*sizeof(kmp_depend_info_t)
+    );
+    new_taskdata->td_flags.depsinalloc = 0;
+    if (new_taskdata->td_flags.depsfill)
+      KMP_MEMCPY( td_deps, new_taskdata->td_deps, ndeps * sizeof(kmp_depend_info_t) );
+    if (new_taskdata->td_deps)
+      __kmp_fast_free(thread, new_taskdata->td_deps);
+    // do not free td_deps_noalias, always in same allocated bloc
+    new_taskdata->td_deps = td_deps;
+    new_taskdata->td_deps_noalias = td_deps+total_ndeps;
+  }
 
-    kmp_int32 i;
-    kmp_depend_info_t* deps = new_taskdata->td_deps;
+  kmp_int32 i;
+  kmp_depend_info_t* deps = new_taskdata->td_deps;
+  if (new_taskdata->td_flags.depsfill ==0) /* gcc already have stored dependencies */
+  {
+    /* if not allocated */
     if (deps != dep_list)
       for (i=0; i<ndeps; ++i) {
         dep_list[i].flags.alias = 0;
@@ -1049,62 +1056,66 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
       }
     else
       deps += ndeps;
+  }
 #if LIBOMP_USE_VARDEP
-    kmp_extra_depinfo_th_t* bloc  = &thread->th.th_edeps[0];
-    do {
-      if (bloc->ed_size) {
-        kmp_depend_info_flags_t flags;
-        flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0);
-        flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0);
-        flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0);
-        flags.commute = 0;
-        flags.alias = 0;
-        for (i=0; i<bloc->ed_size; ++i, ++deps)
-        {
-          deps->base_addr = bloc->ed_deps[i];
-          deps->len       = 1;
-          deps->flags     = flags;
-        }
+  /* deps with possibly alias */
+  kmp_extra_depinfo_th_t* bloc  = &thread->th.th_edeps[0];
+  do {
+    if (bloc->ed_size) {
+      kmp_depend_info_flags_t flags;
+      flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0);
+      flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0);
+      flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0);
+      flags.commute = 0;
+      flags.alias = 0;
+      for (i=0; i<bloc->ed_size; ++i, ++deps)
+      {
+        deps->base_addr = bloc->ed_deps[i];
+        deps->len       = 1;
+        deps->flags     = flags;
       }
-      kmp_extra_depinfo_th_t* blocn = bloc->ed_next;
-      if (bloc != &thread->th.th_edeps[0])
-        __kmp_fast_free(thread, bloc );
-      bloc = blocn;
-    } while (bloc != 0);
-#endif
-    KMP_ASSERT( deps == new_taskdata->td_deps_noalias );
-    for (i=0; i<ndeps_noalias; ++i) {
-      noalias_dep_list[i].flags.alias = 0;
-      *deps++ = noalias_dep_list[i];
     }
+    kmp_extra_depinfo_th_t* blocn = bloc->ed_next;
+    if (bloc != &thread->th.th_edeps[0])
+      __kmp_fast_free(thread, bloc );
+    bloc = blocn;
+  } while (bloc != 0);
+#endif
+  KMP_ASSERT( deps == new_taskdata->td_deps_noalias || (total_ndeps_noalias==0) );
+  for (i=0; i<ndeps_noalias; ++i) {
+    noalias_dep_list[i].flags.alias = 0;
+    *deps++ = noalias_dep_list[i];
+  }
 #if LIBOMP_USE_VARDEP
-    bloc  = &thread->th.th_edeps[1];
-    do {
-      if (bloc->ed_size) {
-        kmp_depend_info_flags_t flags;
-        flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0);
-        flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0);
-        flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0);
-        flags.commute = 0;
-        flags.alias = 0;
-        for (i=0; i<bloc->ed_size; ++i, ++deps)
-        {
-          deps->base_addr = bloc->ed_deps[i];
-          deps->len       = 1;
-          deps->flags     = flags;
-        }
+  bloc  = &thread->th.th_edeps[1];
+  do {
+    if (bloc->ed_size) {
+      kmp_depend_info_flags_t flags;
+      flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0);
+      flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0);
+      flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0);
+      flags.commute = 0;
+      flags.alias = 0;
+      for (i=0; i<bloc->ed_size; ++i, ++deps)
+      {
+        deps->base_addr = bloc->ed_deps[i];
+        deps->len       = 1;
+        deps->flags     = flags;
       }
-      kmp_extra_depinfo_th_t* blocn = bloc->ed_next;
-      if (bloc != &thread->th.th_edeps[1])
-        __kmp_fast_free(thread, bloc );
-      bloc = blocn;
-    } while (bloc != 0);
-    thread->th.th_edps_size[0] =0;
-    thread->th.th_edps_size[1] =0;
-    thread->th.th_edeps_tail[0] =0;
-    thread->th.th_edeps_tail[1] =0;
-#endif
-  }
+    }
+    kmp_extra_depinfo_th_t* blocn = bloc->ed_next;
+    if (bloc != &thread->th.th_edeps[1])
+      __kmp_fast_free(thread, bloc );
+    bloc = blocn;
+  } while (bloc != 0);
+  /* reset thread state about extra deps */
+  thread->th.th_edps_size[0] =0;
+  thread->th.th_edps_size[1] =0;
+  thread->th.th_edeps_tail[0] =0;
+  thread->th.th_edeps_tail[1] =0;
+#endif
+
+//kmp_depend_info_t *save_dep_list = dep_list;
   dep_list = new_taskdata->td_deps;
   ndeps    = new_taskdata->td_ndeps = total_ndeps;
   noalias_dep_list = new_taskdata->td_deps_noalias;
@@ -1170,8 +1181,22 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
                     "dependencies: "
                     "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
                     gtid, loc_ref, new_taskdata));
+#if 0
+      printf("%i:: Task %p #deps=%i @dep=%p/%p @=%p/%p has dependencies.\n", gtid, new_task, ndeps, dep_list, save_dep_list, 
+                   (ndeps >0 ? dep_list[0].base_addr : 0), 
+                   (ndeps >0 ? save_dep_list[0].base_addr : 0) 
+      );
+#endif
       return TASK_CURRENT_NOT_QUEUED;
     }
+    else {
+#if 0
+      printf("%i:: Task %p #deps=%i @dep=%p/%p @=%p/%p is independent.\n", gtid, new_task, ndeps, dep_list, save_dep_list, 
+                   (ndeps >0 ? dep_list[0].base_addr : 0), 
+                   (ndeps >0 ? save_dep_list[0].base_addr : 0) 
+      );
+#endif
+    }
 #if LIBOMP_USE_CONCURRENT_WRITE
     /* task ready: check to acquired commute ressources */
     if ((node->dn./*list_*/cw) && __kmp_acquired_or_failed( gtid, node, node->dn./*list_*/cw) )
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 804c19d6dc8ecaeb9a3bdf483d2fcdce758f0116..8d9a588d0cf9b8fc347549288170e8ccbdf27f8d 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -1115,10 +1115,13 @@ static size_t __kmp_round_up_to_val(size_t size, size_t val) {
 // sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed
 // in task.
 // task_entry: Pointer to task code entry point generated by compiler.
+// ndeps,ndeps_noalias: size of deps + deps without alias to be stored in taskdata struct.
+// only to be considered if flag depsinalloc is set.
 // returns: a pointer to the allocated kmp_task_t structure (task).
+// Not that variable length deps are allocated by task_alloc if flag depsinalloc is set
 kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
                   size_t sizeof_kmp_task_t, size_t sizeof_shareds,
-                  kmp_routine_entry_t task_entry, kmp_int32 deps, kmp_int32 deps_noalias )
+                  kmp_routine_entry_t task_entry, kmp_int32 ndeps, kmp_int32 ndeps_noalias )
 {
   kmp_task_t *task;
   kmp_taskdata_t *taskdata;
@@ -1128,9 +1131,9 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag
   size_t shareds_offset;
 
   KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
-                "sizeof_task=%ld sizeof_shared=%ld entry=%p, deps=%d deps_noalias=%d\n",
+                "sizeof_task=%ld sizeof_shared=%ld entry=%p, ndeps=%d ndeps_noalias=%d\n",
                 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
-                sizeof_shareds, task_entry, deps, deps_noalias) );
+                sizeof_shareds, task_entry, ndeps, ndeps_noalias) );
 
   if ( parent_task->td_flags.final ) {
     if (flags->merged_if0) {
@@ -1179,9 +1182,15 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag
 #endif
 
 #if OMP_40_ENABLED
-  int ndeps;
   if (flags->depsinalloc)
-    ndeps = deps + deps_noalias;
+  {
+    ndeps = ndeps + ndeps_noalias;
+#if LIBOMP_USE_VARDEP
+    kmp_int32 ndeps_extra = thread->th.th_edps_size[0];
+    kmp_int32 ndeps_extra_noalias = thread->th.th_edps_size[1];
+    ndeps += ndeps_extra + ndeps_extra_noalias;
+#endif
+  }
   else
 #endif
     ndeps = 0;
@@ -1230,11 +1239,11 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag
   {
     //taskdata->td_deps              = (kmp_depend_info_t*)& ((char *) taskdata)[ shareds_offset - ndeps*sizeof(kmp_depend_info_t) ];
     taskdata->td_deps              = (kmp_depend_info_t*)(task+1);
-    taskdata->td_ndeps             = deps;
-    taskdata->td_deps_noalias      = taskdata->td_deps+deps;
-    taskdata->td_ndeps_noalias     = deps_noalias;
+    taskdata->td_ndeps             = ndeps;
+    taskdata->td_deps_noalias      = taskdata->td_deps+ndeps;
+    taskdata->td_ndeps_noalias     = ndeps_noalias;
     taskdata->td_flags.depsinalloc = 1;
-    KMP_DEBUG_ASSERT( (void*)(taskdata->td_deps_noalias+deps_noalias) <= (void*)task->shareds || task->shareds == NULL);
+    KMP_DEBUG_ASSERT( (void*)(taskdata->td_deps_noalias+ndeps_noalias) <= (void*)task->shareds || task->shareds == NULL);
   }
   else {
     taskdata->td_deps              = 0;