diff --git a/CMakeLists.txt b/CMakeLists.txt
index b29e59e05f108e1f153926085803f320dd6da729..acb986ac4b329748c60187fbeced9fc5888267e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,7 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
 # Project Declaration
 #===========================================================================
 project(SCALFMM C CXX)
-INCLUDE( CMakeDependentOption )
+
 # check if compiling into source directories
 string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource)
 if(insource)
@@ -21,12 +21,12 @@ endif(insource)
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/)
 set(SCALFMM_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules)
-
-include(GetCpuInfos)
-GetCpuInfos()
-
+#
 # Adds the CMAKE_DEPENDENT_OPTION command
 INCLUDE(CMakeDependentOption)
+#  Add to check CPU info
+include(GetCpuInfos)
+GetCpuInfos()
 
 #===========================================================================
 # Version Number
@@ -45,15 +45,14 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   # Add extra cmake module path and initialize morse cmake modules
   # --------------------------------------------------------------
   if(MORSE_DISTRIB_DIR)
-      list(APPEND CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules)
-      list(APPEND CMAKE_MODULE_PATH "${MORSE_DISTRIB_DIR}/cmake_modules/morse")
-      set(MORSE_CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules/morse )
+    list(APPEND CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules)
+    list(APPEND CMAKE_MODULE_PATH "${MORSE_DISTRIB_DIR}/cmake_modules/morse")
+    set(MORSE_CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules/morse )
   elseif(EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
-      list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse/)
-      set(MORSE_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse )
+    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse/)
+    set(MORSE_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse )
   endif()
   include(MorseInit)
-
   #
   # Options
   option( SCALFMM_USE_MPI              "Set to ON to build ScaFMM with MPI"          OFF )
@@ -70,11 +69,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   option( SCALFMM_ATTACHE_SOURCE       "Set to ON to compile with -g"                OFF )
   option( SCALFMM_USE_ADDONS           "Set to ON to compile add ons"                OFF )
   if( APPLE ) # to fix problem with  GCC and avx
-	CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE              "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX2" OFF  )
-        CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX              "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" OFF "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF  )
+    CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE              "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX2" OFF  )
+    CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX              "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" OFF "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF  )
   else(APPLE)
-        CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE              "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX;NOT CPUOPTION_AVX2" OFF  )
-        CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX              "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" ON "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF  )
+    CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE              "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX;NOT CPUOPTION_AVX2" OFF  )
+    CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX              "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" ON "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF  )
   endif(APPLE)
   CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX2             "Set to ON to compile with AVX support (and use intrinsec AVXZ P2P)" ON "CPUOPTION_AVX2" OFF )
   option( SCALFMM_USE_ASSERT           "Set to ON to enable safe tests during execution" ON  )
@@ -89,10 +88,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     option( SCALFMM_DISABLE_NATIVE_OMP4 "Set to ON to disable the gcc/intel omp4"    OFF )
     option( SCALFMM_TIME_OMPTASKS "Set to ON to time omp4 tasks and generate output file"    OFF )
   endif()
+  message(STATUS "AVANT  ${CMAKE_CXX_COMPILER_ID}" )
   if( SCALFMM_USE_MPI )
     try_compile(COMPILE_INTEL ${CMAKE_CURRENT_BINARY_DIR}
-                ${SCALFMM_CMAKE_MODULE_PATH}/compileTestIntel.cpp
-                COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS}")
+      ${SCALFMM_CMAKE_MODULE_PATH}/compileTestIntel.cpp
+      COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS}")
     if (COMPILE_INTEL)
       set(CMAKE_CXX_COMPILER_ID  "Intel")
     endif()
@@ -101,6 +101,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   # Set scalfmm to default libraries
   set(SCALFMM_LIBRARIES "")
   set(SCALFMM_CXX_FLAGS "-std=c++11 -fpic -Wall")
+  MESSAGE(STATUS "FLAGS =$CALFMM_CXX_FLAGS")
   #
   #
   # Test if openmp is here
@@ -127,7 +128,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
     # INTEL
     IF (APPLE)
-      set(SSE_FLAGS  "-msse4  -mfpmath=sse")   # -mtune=native -march=native
+      IF( CPUOPTION_SSE42 ) 
+        set(SSE_FLAGS  "-msse4  -mfpmath=sse")   # -mtune=native -march=native
+      ELSEIF (CPUOPTION_SSE3)
+        set(SSE_FLAGS  "-msse3  -mfpmath=sse")   # -mtune=native -march=native
+      ENDIF (CPUOPTION_SSE42)  
     else(APPLE)
       set(AVX_FLAGS  "-march=native -axCORE-AVX2,CORE-AVX-I,AVX") #-mavx
       set(AVX2_FLAGS  "-march=native  -axCORE-AVX2,CORE-AVX-I") #-march=core-avx2
@@ -150,8 +155,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
       endif()
     endif()
     IF (APPLE)
-      set(SSE_FLAGS  "-msse4  -mfpmath=sse")   # -mtune=native -march=native
-      set(SSE_FLAGS  "-msse3  -mfpmath=sse") 
+      #      set(SSE_FLAGS  "-msse4  -mfpmath=sse")   # -mtune=native -march=native
+      IF( CPUOPTION_SSE42 ) 
+        set(SSE_FLAGS  "-msse4  -mfpmath=sse")   # -mtune=native -march=native
+      ELSEIF (CPUOPTION_SSE3)
+        set(SSE_FLAGS  "-msse3  -mfpmath=sse")   # -mtune=native -march=native
+      ENDIF (CPUOPTION_SSE42)
       set(AVX_FLAGS "-mtune=native -march=avx")
       set(AVX2_FLAGS "-mtune=native -march=native -mmic")
     else(APPLE)
@@ -212,12 +221,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
         "Set your optimization flags for release mode.")
     else(APPLE)
       # Not apple system - Check the compiler flags
-     if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-         #set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low  -funroll-loops -ftree-vectorize"
-         set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize"
-           CACHE STRING "Set your optimization flags for release mode.")
-       # set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" CACHE STRING
-       #   "Set your optimization flags for release mode.")
+      if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        #set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low  -funroll-loops -ftree-vectorize"
+        set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize"
+          CACHE STRING "Set your optimization flags for release mode.")
+	# set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" CACHE STRING
+	#   "Set your optimization flags for release mode.")
       else()
         set(SCALFMM_FLAGS_OPTI_RELEASE "-ffast-math -funroll-loops -ftree-vectorize" CACHE STRING
           "Set your optimization flags for release mode.")
@@ -226,8 +235,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     #
     set(SCALFMM_CXX_FLAGS  "${SCALFMM_CXX_FLAGS} ${SCALFMM_FLAGS_OPTI_RELEASE}")
   endif()
-    MESSAGE(STATUS   " %%%%%%%%%% SCALFMM_CXX_FLAGS  ${SCALFMM_CXX_FLAGS} %%%%%%%%%%%%%")
-    MESSAGE(STATUS   " %%%%%%%%%% CMAKE_CXX_FLAGS_RELEASE  ${CMAKE_CXX_FLAGS_RELEASE} %%%%%%%%%%%%%")
+  MESSAGE(STATUS   " %%%%%%%%%% SCALFMM_CXX_FLAGS  ${SCALFMM_CXX_FLAGS} %%%%%%%%%%%%%")
+  MESSAGE(STATUS   " %%%%%%%%%% CMAKE_CXX_FLAGS_RELEASE  ${CMAKE_CXX_FLAGS_RELEASE} %%%%%%%%%%%%%")
 
   #
   ##############################################################################
@@ -249,28 +258,28 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     # -DMPI_C_COMPILER=path/to/mpicc -DMPI_CXX_COMPILER=path/to/mpicxx
     # at cmake configure
     if(NOT MPI_C_COMPILER)
-        set(MPI_C_COMPILER mpicc)
+      set(MPI_C_COMPILER mpicc)
     endif()
     if(NOT MPI_CXX_COMPILER)
-        set(MPI_CXX_COMPILER mpicxx)
+      set(MPI_CXX_COMPILER mpicxx)
     endif()
 
     find_package(MPI REQUIRED)
 
     if (MPI_CXX_INCLUDE_PATH)
-        include_directories( ${MPI_CXX_INCLUDE_PATH} )
+      include_directories( ${MPI_CXX_INCLUDE_PATH} )
     endif()
     if (MPI_CXX_COMPILE_FLAGS)
-        set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}")
+      set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}")
     endif()
     if (MPI_CXX_INCLUDE_PATH)
-        set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}")
+      set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}")
     endif()
     if (MPI_CXX_LINK_FLAGS)
-        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}")
+      set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}")
     endif()
     if (MPI_CXX_LIBRARIES)
-        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}")
+      set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}")
     endif()
 
   endif()
@@ -286,7 +295,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
 
     if( SCALFMM_USE_MKL_AS_BLAS )
       set(BLA_VENDOR "Intel10_64lp_seq")
-      find_package(BLASEXT) # not REQUIRED
+      find_package(BLASEXT QUIET) # not REQUIRED
 
       if(BLAS_LIBRARY_DIRS)
         # the RPATH to be used when installing
@@ -311,12 +320,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     endif()
 
     if(BLAS_FOUND)
-        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}")
-        #message(STATUS "SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
+      set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}")
+      #message(STATUS "SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
     else()
-        message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.")
-        message(WARNING "If you have BLAS set BLAS_LIBDIR, BLAS_INCDIR or BLAS_DIR (CMake variables using -D or environment variables).")        
-        set(SCALFMM_USE_BLAS OFF)
+      message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.")
+      message(WARNING "If you have BLAS set BLAS_LIBDIR, BLAS_INCDIR or BLAS_DIR (CMake variables using -D or environment variables).")        
+      set(SCALFMM_USE_BLAS OFF)
     endif()
 
   endif(SCALFMM_USE_BLAS)
@@ -338,7 +347,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
         message(STATUS "         SCALFMM USE MKL already defined")
         set(FFT_INCLUDES "$ENV{MKLROOT}/include/fftw" CACHE STRING "Set your MKL flags")
         if (BLAS_FOUND)
-            set(FFTW_FOUND ON)
+          set(FFTW_FOUND ON)
         endif()
 
       else(SCALFMM_USE_MKL_AS_BLAS)
@@ -348,16 +357,16 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
         # Default is DOUBLE and without THREADS|OMP
         find_package(FFTW  COMPONENTS MKL) # not REQUIRED
         if (FFTW_LIBRARY_DIRS_DEP)
-            set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags")
+          set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags")
         endif()
         if (FFTW_LIBRARIES_DEP)
-            foreach (fft_lib ${FFTW_LIBRARIES_DEP})
-              set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};")
-            endforeach()
+          foreach (fft_lib ${FFTW_LIBRARIES_DEP})
+            set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};")
+          endforeach()
         endif()
         set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" )
         if (FFT_LIBRARIES)
-            set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}")
+          set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}")
         endif()
 
       endif(SCALFMM_USE_MKL_AS_BLAS)
@@ -385,21 +394,21 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     endif(SCALFMM_USE_MKL_AS_FFTW)
 
     if (FFT_INCLUDES)
-        set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${FFT_INCLUDES}")
+      set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${FFT_INCLUDES}")
     endif()
 
     if(FFTW_FOUND)
-        message(STATUS "         SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
-        message(STATUS "         SCALFMM_INCLUDES          = ${SCALFMM_INCLUDES}")
+      message(STATUS "         SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
+      message(STATUS "         SCALFMM_INCLUDES          = ${SCALFMM_INCLUDES}")
     else()
-        message(WARNING "FFTW has not been found, SCALFMM will continue to compile but some applications will be disabled.")
-        message(WARNING "If you have FFTW set FFTW_LIBDIR, FFTW_INCDIR or FFTW_DIR (CMake variables using -D or environment variables).")        
-        set(SCALFMM_USE_FFT OFF)
+      message(WARNING "FFTW has not been found, SCALFMM will continue to compile but some applications will be disabled.")
+      message(WARNING "If you have FFTW set FFTW_LIBDIR, FFTW_INCDIR or FFTW_DIR (CMake variables using -D or environment variables).")        
+      set(SCALFMM_USE_FFT OFF)
     endif()
   endif(SCALFMM_USE_FFT)
   list(APPEND FUSE_LIST "FFT")
-         message(STATUS "         SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
-        message(STATUS "         SCALFMM_INCLUDES          = ${SCALFMM_INCLUDES}")
+  message(STATUS "         SCALFMM_LIBRARIES          = ${SCALFMM_LIBRARIES}")
+  message(STATUS "         SCALFMM_INCLUDES          = ${SCALFMM_INCLUDES}")
   
   message(STATUS "SCALFMM_USE_FFT       =  ${SCALFMM_USE_FFT}")
 
@@ -423,38 +432,38 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   if( SCALFMM_USE_STARPU )
     # No fast math with starpu
     if(SCALFMM_CXX_FLAGS)
-        string(REPLACE "-ffast-math" " " SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS})
+      string(REPLACE "-ffast-math" " " SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS})
     endif()
     if(SCALFMM_FLAGS_OPTI_RELEASE)
-        string(REPLACE "-ffast-math" " " SCALFMM_FLAGS_OPTI_RELEASE ${SCALFMM_FLAGS_OPTI_RELEASE})     
+      string(REPLACE "-ffast-math" " " SCALFMM_FLAGS_OPTI_RELEASE ${SCALFMM_FLAGS_OPTI_RELEASE})     
     endif()
 
     # CUDA could be used with StarPU enabled
     option( SCALFMM_USE_CUDA "Set to ON to use CUDA with StarPU" OFF )
     message( STATUS "SCALFMM_USE_CUDA             = ${SCALFMM_USE_CUDA}" )
     if(SCALFMM_USE_CUDA)
-        execute_process(COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET)
-        if(cuda_error_output)
-            message( FATAL_ERROR "nvcc is needed with CUDA." )
-        endif()
-        if(NOT DEFINED CUSTOM_CUDA_FLAGS)
-            set( CUSTOM_CUDA_FLAGS "-std=c++11;-arch=sm_20" CACHE
-            STRING "Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math")
-        endif()
-        # This is needed to remove backslash after space in ADD_CUSTOM_COMMAND
-        separate_arguments(CUSTOM_CUDA_FLAGS)
-        message( STATUS "CUSTOM_CUDA_FLAGS             = ${CUSTOM_CUDA_FLAGS}" )
+      execute_process(COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET)
+      if(cuda_error_output)
+        message( FATAL_ERROR "nvcc is needed with CUDA." )
+      endif()
+      if(NOT DEFINED CUSTOM_CUDA_FLAGS)
+        set( CUSTOM_CUDA_FLAGS "-std=c++11;-arch=sm_20" CACHE
+          STRING "Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math")
+      endif()
+      # This is needed to remove backslash after space in ADD_CUSTOM_COMMAND
+      separate_arguments(CUSTOM_CUDA_FLAGS)
+      message( STATUS "CUSTOM_CUDA_FLAGS             = ${CUSTOM_CUDA_FLAGS}" )
 
-        find_package(CUDA REQUIRED)
+      find_package(CUDA REQUIRED)
 
-        if (CUDA_INCLUDE_DIRS)
-            include_directories(${CUDA_INCLUDE_DIRS})
-        endif()
-        if (CUDA_LIBRARIES)
-            set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
-        endif()
-        
-        set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src)
+      if (CUDA_INCLUDE_DIRS)
+        include_directories(${CUDA_INCLUDE_DIRS})
+      endif()
+      if (CUDA_LIBRARIES)
+        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
+      endif()
+      
+      set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src)
     endif()
 
     # Find StarPU with a list of optional components
@@ -462,50 +471,50 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     # create list of components in order to make a single call to find_package(starpu...)
     set(STARPU_COMPONENT_LIST "HWLOC")
     if(SCALFMM_USE_MPI)
-        list(APPEND STARPU_COMPONENT_LIST "MPI")
+      list(APPEND STARPU_COMPONENT_LIST "MPI")
     endif()
     if(SCALFMM_USE_CUDA)
-        list(APPEND STARPU_COMPONENT_LIST "CUDA")
+      list(APPEND STARPU_COMPONENT_LIST "CUDA")
     endif()
 
     find_package(STARPU ${SCALFMM_STARPU_VERSION} REQUIRED
-                 COMPONENTS ${STARPU_COMPONENT_LIST})
+      COMPONENTS ${STARPU_COMPONENT_LIST})
 
     # Append list of libraries and include dirs
     include_directories(${STARPU_INCLUDE_DIRS_DEP})
     foreach (starpu_libdir ${STARPU_LIBRARY_DIRS_DEP})
       if (${starpu_libdir} MATCHES "^ *-L")
-          set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}")
+        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}")
       else()
-          set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}")
+        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}")
       endif()
     endforeach()
     foreach (starpu_lib ${STARPU_LIBRARIES_DEP})
       if (EXISTS ${starpu_lib} OR ${starpu_lib} MATCHES "^ *-")
-          set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}")
+        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}")
       else()
-          set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}")
+        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}")
       endif()
     endforeach()
 
     # TODO: is this very useful? CUDA is already a component of find starpu
     if (CUDA_LIBRARIES)
-        set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
+      set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
     endif()
 
     # Message
     message(STATUS "         STARPU_LIBRARIES          = ${STARPU_LIBRARIES}")
     if (STARPU_INCLUDE_DIRS)
-        message(STATUS "         STARPU_INCLUDES           = ${STARPU_INCLUDE_DIRS}")
-        set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}")
+      message(STATUS "         STARPU_INCLUDES           = ${STARPU_INCLUDE_DIRS}")
+      set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}")
     endif()
 
     # TODO: replace this by a component of find starpu
     OPTION( SCALFMM_USE_OPENCL      "Set to ON to use OPENCL with StarPU" OFF )
     MESSAGE( STATUS "SCALFMM_USE_OPENCL             = ${SCALFMM_USE_OPENCL}" )
     if(SCALFMM_USE_OPENCL)
-        include_directories($ENV{OPENCL_INC})
-        SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL")
+      include_directories($ENV{OPENCL_INC})
+      SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL")
     endif()
   endif(SCALFMM_USE_STARPU)
   list(APPEND FUSE_LIST "STARPU")
@@ -523,16 +532,16 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     endif()
     message( STATUS "SSE_FLAGS ${SSE_FLAGS}  -- ${CMAKE_CXX_FLAGS}  ")
     try_compile(COMPILE_SSE  ${CMAKE_CURRENT_BINARY_DIR}
-                ${SCALFMM_CMAKE_MODULE_PATH}/compileTestSse.cpp
-                COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}"
-                OUTPUT_VARIABLE COMPILE_SSE_OUTPUT)
+      ${SCALFMM_CMAKE_MODULE_PATH}/compileTestSse.cpp
+      COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}"
+      OUTPUT_VARIABLE COMPILE_SSE_OUTPUT)
 
     if(${COMPILE_SSE})
       set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${SSE_FLAGS}")
 
       try_compile(COMPILE_RESULT_VAR ${CMAKE_CURRENT_BINARY_DIR}
-                  ${SCALFMM_CMAKE_MODULE_PATH}/checkSSEpe.cpp
-                  COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}")
+        ${SCALFMM_CMAKE_MODULE_PATH}/checkSSEpe.cpp
+        COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}")
       if( NOT ${COMPILE_RESULT_VAR})
         set(__SSEPE_INTEL_COMPILER ON)
       endif()
@@ -554,23 +563,23 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     endif()
 
     try_compile(COMPILE_AVX ${CMAKE_CURRENT_BINARY_DIR}
-                ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx.cpp
-                COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}"
-                OUTPUT_VARIABLE COMPILE_AVX_OUTPUT)
-     if(${COMPILE_AVX})
-  message(STATUS "%%%%%%%%%%%% COMPILE_AVX               = ${COMPILE_AVX}  %%%%<    ${AVX_FLAGS}")
-		
+      ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx.cpp
+      COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}"
+      OUTPUT_VARIABLE COMPILE_AVX_OUTPUT)
+    if(${COMPILE_AVX})
+      message(STATUS "%%%%%%%%%%%% COMPILE_AVX               = ${COMPILE_AVX}  %%%%<    ${AVX_FLAGS}")
+      
       set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS}   ${AVX_FLAGS}")
-    message(STATUS "%%%%%%%%%%%% SCALFMM_CXX_FLAGS               = ${SCALFMM_CXX_FLAGS}")
-    #set( SCALFMM_USE_SSE   OFF   FORCE) # ne marche pas
+      message(STATUS "%%%%%%%%%%%% SCALFMM_CXX_FLAGS               = ${SCALFMM_CXX_FLAGS}")
+      #set( SCALFMM_USE_SSE   OFF   FORCE) # ne marche pas
       try_compile(COMPILE_RESULT_AVSPE ${CMAKE_CURRENT_BINARY_DIR}
-                  ${SCALFMM_CMAKE_MODULE_PATH}/checkAVXpe.cpp
-                  COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
-		 if( NOT ${COMPILE_RESULT_AVSPE})
+        ${SCALFMM_CMAKE_MODULE_PATH}/checkAVXpe.cpp
+        COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+      if( NOT ${COMPILE_RESULT_AVSPE})
 
 
         set(__AVXPE_INTEL_COMPILER ON)
-   endif()
+      endif()
 
       message(STATUS ${CMAKE_CXX_FLAGS} )
     else(${COMPILE_AVX})
@@ -595,15 +604,15 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     endif()
 
     try_compile(COMPILE_AVX2 ${CMAKE_CURRENT_BINARY_DIR}
-                ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx2.cpp
-                COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}"
-                OUTPUT_VARIABLE COMPILE_AVX2_OUTPUT)
+      ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx2.cpp
+      COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}"
+      OUTPUT_VARIABLE COMPILE_AVX2_OUTPUT)
     if(${COMPILE_AVX2})
       set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS}   ${AVX2_FLAGS}")
       #set( SCALFMM_USE_SSE   OFF   FORCE) # ne marche pas
       try_compile(COMPILE_RESULT_AVSPE ${CMAKE_CURRENT_BINARY_DIR}
-                  ${SCALFMM_CMAKE_MODULE_PATH}/checkAVX2pe.cpp
-                  COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}")
+        ${SCALFMM_CMAKE_MODULE_PATH}/checkAVX2pe.cpp
+        COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}")
       if( NOT ${COMPILE_RESULT_AVSPE})
         set(__AVX2PE_INTEL_COMPILER ON)
       endif()
@@ -643,15 +652,27 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
     find_package (PkgConfig)
     if(PKG_CONFIG_FOUND)
       set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH "ON")
-     pkg_search_module( EZTrace REQUIRED eztrace)
-      link_directories(${EZTrace_LIBRARY_DIRS})
-      link_libraries( ${EZTrace_LIBRARIES} -leztrace-memory)
-      include_directories(${EZTrace_INCLUDE_DIRS})
-      MESSAGE(STATUS "EZTRACE:  ${EZTrace_INCLUDE_DIRS}   ${EZTrace_LIBRARY_DIRS}  ${EZTrace_LIBRARIES}")
-      CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2L "Set to ON to trace M2L operator" ON  "SCALFMM_USE_EZTRACE" OFF )
-
+      pkg_search_module( EZTrace REQUIRED eztrace)
+      if(PEZTrace_FOUND)
+        link_directories(${EZTrace_LIBRARY_DIRS})
+        link_libraries( ${EZTrace_LIBRARIES})
+        IF( SCALFMM_USE_MPI )
+          link_libraries(-leztrace-mpi)
+	ENDIF(SCALFMM_USE_MPI)     
+	include_directories(${EZTrace_INCLUDE_DIRS})
+	MESSAGE(STATUS "EZTRACE:  ${EZTrace_INCLUDE_DIRS}   ${EZTrace_LIBRARY_DIRS}  ${EZTrace_LIBRARIES}")
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_ALGO "Set to ON to trace the full algorithm (all operators)" ON  "SCALFMM_USE_EZTRACE" OFF )
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_P2M   "Set to ON to trace P2M operator" OFF  "SCALFMM_USE_EZTRACE" OFF )
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2M   "Set to ON to trace M2M operator" OFF  "SCALFMM_USE_EZTRACE" OFF )
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2L   "Set to ON to trace M2L operator" OFF  "SCALFMM_USE_EZTRACE" OFF )
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_L2L   "Set to ON to trace L2L operator" OFF  "SCALFMM_USE_EZTRACE" OFF )
+	CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_P2P   "Set to ON to trace P2P operator" OFF  "SCALFMM_USE_EZTRACE" OFF )
+      else(EZTrace_FOUND)
+	MESSAGE(WARNING "Eztrace not found - EZTRACE Is set to OFF")
+	set(SCALFMM_USE_EZTRACE OFF)	
+      endif(EZTrace_FOUND)
     else(PKG_CONFIG_FOUND)
-      MESSAGE(WARNING "PKG-CONFIG not found- EZTRACE Is set to NONE")
+      MESSAGE(WARNING "PKG-CONFIG not found - EZTRACE Is set to OFF")
       set(SCALFMM_USE_EZTRACE OFF)
     endif(PKG_CONFIG_FOUND)
 
@@ -669,11 +690,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   # We need the libraries without spaces (inside the config file)
   set(SCALFMM_COMPILE_LIBS   "")
   foreach(lib_var ${SCALFMM_LIBRARIES})
-      string(STRIP ${lib_var} lib_var)
-      LIST(APPEND SCALFMM_COMPILE_LIBS   ${lib_var})        
+    string(STRIP ${lib_var} lib_var)
+    LIST(APPEND SCALFMM_COMPILE_LIBS   ${lib_var})        
   endforeach()
   configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake
-                  ${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h  )
+    ${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h  )
   #
   ##################################################################
   #                      Build - lib                               #
@@ -817,7 +838,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
   message(STATUS "SCALFMM_USE_BLAS     =  ${SCALFMM_USE_BLAS}")
   message(STATUS "SCALFMM_USE_FFT       =  ${SCALFMM_USE_FFT}")
   message(STATUS "SCALFMM_USE_MKL      =   ${SCALFMM_USE_MKL}")
-# 
+  # 
   message(STATUS "CMAKE_CXX_FLAGS     =  ${CMAKE_CXX_FLAGS}")
   message(STATUS "SCALFMM_CXX_FLAGS   =  ${SCALFMM_CXX_FLAGS}")
   message(STATUS "SCALFMM_LIBRARIES   =  ${SCALFMM_LIBRARIES}")
diff --git a/CMakeModules/morse/Ressources.cmake b/CMakeModules/morse/Ressources.cmake
index 3a5cf6cdf0fb7ba0418925acb2252dccc0a8224d..15b7a35d11cd2cd2ddb5ff358bb3139bbe92cfa0 100644
--- a/CMakeModules/morse/Ressources.cmake
+++ b/CMakeModules/morse/Ressources.cmake
@@ -43,9 +43,9 @@ if(NOT DEFINED PROCESSOR_COUNT)
     if(APPLE)
         find_program(cmd_sys_pro "system_profiler")
         if(cmd_sys_pro)
-            execute_process(COMMAND ${cmd_sys_pro} OUTPUT_VARIABLE info)
-            string(REGEX REPLACE "^.*Total Number Of Cores: ([0-9]+).*$" "\\1"
-                NUMBER_OF_CPU "${info}")
+          execute_process(COMMAND ${cmd_sys_pro} SPHardwareDataType OUTPUT_VARIABLE info)
+            string(REGEX REPLACE "^.*Total Number of Cores: ([0-9]+).*$" "\\1"
+              NUMBER_OF_CPU "${info}")
         endif()
     endif()
 
diff --git a/CMakeModules/morse/find/FindPTSCOTCH.cmake b/CMakeModules/morse/find/FindPTSCOTCH.cmake
index 88c22713ca2a07d36b301e5f9f5b90f678426e75..e457dcac9a49f9f226d0e5f4f2531768ac9ddfc4 100644
--- a/CMakeModules/morse/find/FindPTSCOTCH.cmake
+++ b/CMakeModules/morse/find/FindPTSCOTCH.cmake
@@ -264,10 +264,21 @@ if(PTSCOTCH_LIBRARIES)
     if(CMAKE_THREAD_LIBS_INIT)
         list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
     endif()
-    if(UNIX OR WIN32)
+    set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
+    find_library(Z_LIBRARY NAMES z)
+    if(Z_LIBRARY)
+        list(APPEND REQUIRED_LIBS "-lz")
+    endif()
+    set(M_LIBRARY "M_LIBRARY-NOTFOUND")
+    find_library(M_LIBRARY NAMES m)
+    if(M_LIBRARY)
         list(APPEND REQUIRED_LIBS "-lm")
     endif()
-    list(APPEND REQUIRED_LIBS "-lz -lrt")
+    set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
+    find_library(RT_LIBRARY NAMES rt)
+    if(RT_LIBRARY)
+        list(APPEND REQUIRED_LIBS "-lrt")
+    endif()
 
     # set required libraries for link
     set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
diff --git a/CMakeModules/morse/find/FindSCOTCH.cmake b/CMakeModules/morse/find/FindSCOTCH.cmake
index 1f4dc25afe832307068f54453e7f05b566ba2919..c24242ce6fbd44465962f1521a0068fb50ff3a87 100644
--- a/CMakeModules/morse/find/FindSCOTCH.cmake
+++ b/CMakeModules/morse/find/FindSCOTCH.cmake
@@ -233,10 +233,21 @@ if(SCOTCH_LIBRARIES)
     if(CMAKE_THREAD_LIBS_INIT)
         list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
     endif()
-    if(UNIX OR WIN32)
+    set(Z_LIBRARY "Z_LIBRARY-NOTFOUND")
+    find_library(Z_LIBRARY NAMES z)
+    if(Z_LIBRARY)
+        list(APPEND REQUIRED_LIBS "-lz")
+    endif()
+    set(M_LIBRARY "M_LIBRARY-NOTFOUND")
+    find_library(M_LIBRARY NAMES m)
+    if(M_LIBRARY)
         list(APPEND REQUIRED_LIBS "-lm")
     endif()
-    list(APPEND REQUIRED_LIBS "-lz -lrt")
+    set(RT_LIBRARY "RT_LIBRARY-NOTFOUND")
+    find_library(RT_LIBRARY NAMES rt)
+    if(RT_LIBRARY)
+        list(APPEND REQUIRED_LIBS "-lrt")
+    endif()
 
     # set required libraries for link
     set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp
index 3a8fffc259d63a41508f197cc35a60a9599b38a5..49010c41abdbad8185ac66bcc86c7ac80cfd5314 100644
--- a/Src/Core/FFmmAlgorithm.hpp
+++ b/Src/Core/FFmmAlgorithm.hpp
@@ -62,6 +62,7 @@ public:
 
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(kernels, "kernels cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
 
diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp
index bdd5f8cca3a34931a991c33d788369fbcb957947..d8107679a4ecde0b4e8c01e838b7c07aedf089e0 100644
--- a/Src/Core/FFmmAlgorithmPeriodic.hpp
+++ b/Src/Core/FFmmAlgorithmPeriodic.hpp
@@ -67,6 +67,7 @@ public:
 
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
+        FAssertLF(leafLevelSeperationCriteria < 3, "Separation criteria should be < 3");
 
         FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight());
 
diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp
index ebafb2e9c9fffbe1a12282bc4eb192ffaa04daaf..658b8889a7404dc1c2beb0c0f29761571e4d171a 100644
--- a/Src/Core/FFmmAlgorithmSectionTask.hpp
+++ b/Src/Core/FFmmAlgorithmSectionTask.hpp
@@ -27,6 +27,7 @@
 #include "../Containers/FVector.hpp"
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 /**
  * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -45,7 +46,7 @@
  *
  * Upon destruction, this class does not deallocate pointers given to its constructor.
  */
-template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers {
     
     OctreeClass* const tree;  ///< The octree to work on
@@ -74,13 +75,14 @@ public:
 
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(inKernels, "kernels cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
+        #pragma omp parallel num_threads(MaxThreads)
+        {
             #pragma omp critical (InitFFmmAlgorithmSectionTask)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
 
@@ -327,7 +329,7 @@ protected:
         // There is a maximum of 26 neighbors
         ContainerClass* neighbors[27];
 
-        const int SizeShape = 3*3*3;
+        const int SizeShape = P2PExclusionClass::SizeShape;
         FVector<typename OctreeClass::Iterator> shapes[SizeShape];
 
         typename OctreeClass::Iterator octreeIterator(tree);
@@ -337,7 +339,7 @@ protected:
         // Coloring all the cells
         do{
             const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
-            const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+            const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
 
             shapes[shapePosition].push(octreeIterator);
 
diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp
index 8d87deaf43ced395b8f61475df0fa23ec4d9ef36..2856035ac93f0e2fd823298d880f6ea625012bf9 100644
--- a/Src/Core/FFmmAlgorithmTask.hpp
+++ b/Src/Core/FFmmAlgorithmTask.hpp
@@ -27,6 +27,7 @@
 #include "../Containers/FVector.hpp"
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 /**
  * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -39,7 +40,7 @@
  *
  * Of course this class does not deallocate pointer given in arguements.
  */
-template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
 
 	OctreeClass* const tree;       //< The octree to work on
@@ -49,7 +50,7 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
 
 	const int OctreeHeight;
 
-	const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 public:
 	/** The constructor need the octree and the kernels used for computation
 	 * @param inTree the octree to work on
@@ -58,20 +59,21 @@ public:
 	 */
 	FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
 : tree(inTree) , kernels(nullptr),
-  MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria)
+  MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria)
 {
 
 		FAssertLF(tree, "tree cannot be null");
 		FAssertLF(inKernels, "kernels cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
 		this->kernels = new KernelClass*[MaxThreads];
-#pragma omp parallel for schedule(static)
-		for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-#pragma omp critical (InitFFmmAlgorithmTask)
-			{
-				this->kernels[idxThread] = new KernelClass(*inKernels);
-			}
-		}
+        #pragma omp parallel num_threads(MaxThreads)
+        {
+            #pragma omp critical (InitFFmmAlgorithmTask)
+            {
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
+            }
+        }
 
 		FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
 
@@ -239,7 +241,7 @@ protected:
 				// for each levels
 				for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
 					FLOG(FTic counterTimeLevel);
-					const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                    const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 					// for each cell we apply the M2L with all cells in the implicit interaction list
 					do{
 #pragma omp task firstprivate(octreeIterator) private(neighbors) shared(idxLevel)
@@ -286,7 +288,7 @@ protected:
 				// for each levels
 				for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
 					FLOG(FTic counterTimeLevel);
-					const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                    const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 					// for each cells
 					do{
 //#pragma omp task default(none) firstprivate(octreeIterator,separationCriteria)  private( neighbors) shared(idxLevel)
@@ -388,7 +390,7 @@ protected:
 				// There is a maximum of 26 neighbors
 				ContainerClass* neighbors[27];
 
-				const int SizeShape = 3*3*3;
+                const int SizeShape = P2PExclusionClass::SizeShape;
 				FVector<typename OctreeClass::Iterator> shapes[SizeShape];
 
 				typename OctreeClass::Iterator octreeIterator(tree);
@@ -397,7 +399,7 @@ protected:
 				// for each leafs
 				do{
 					const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
-					const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+                    const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
 
 					shapes[shapePosition].push(octreeIterator);
 
diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp
index daf2303b06ed3b5b9692c444e924efc084d11035..85c7ffbe83577d2d984d372e39d1c8cf124ac652 100644
--- a/Src/Core/FFmmAlgorithmThread.hpp
+++ b/Src/Core/FFmmAlgorithmThread.hpp
@@ -27,6 +27,7 @@
 #include "../Containers/FOctree.hpp"
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 #include <omp.h>
 
@@ -45,7 +46,7 @@
 *
 * This class does not deallocate pointers given to its constructor.
 */
-template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
     OctreeClass* const tree;                  ///< The octree to work on.
     KernelClass** kernels;                    ///< The kernels.
@@ -53,7 +54,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
     typename OctreeClass::Iterator* iterArray;
     int leafsNumber;
 
-    static const int SizeShape = 3*3*3;
+    static const int SizeShape = P2PExclusionClass::SizeShape;
     int shapeLeaf[SizeShape];
 
     const int MaxThreads;                     ///< The maximum number of threads.
@@ -62,7 +63,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
 
     int userChunkSize;
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
 public:
     /** Class constructor
@@ -79,15 +80,17 @@ public:
                         const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1)
         : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
           MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
-          userChunkSize(inUserChunkSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
+          userChunkSize(inUserChunkSize), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
         FAssertLF(tree, "tree cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
+        FAssertLF(0 < userChunkSize, "Chunk size should be > 0");
         
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-            #pragma omp critical (InitFFmmAlgorithmThread) 
+        #pragma omp parallel num_threads(MaxThreads)
+        {
+            #pragma omp critical (InitFFmmAlgorithmThread)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
         
@@ -138,7 +141,7 @@ protected:
         do{
             ++leafsNumber;
             const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
-            ++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];
+            ++this->shapeLeaf[P2PExclusionClass::GetShapeIdx(coord)];
 
         } while(octreeIterator.moveRight());
         iterArray = new typename OctreeClass::Iterator[leafsNumber];
@@ -296,7 +299,7 @@ protected:
         // for each levels
         for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
             FLOG(FTic counterTimeLevel);
-            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
             int numberOfCells = 0;
             // for each cells
             do{
@@ -439,7 +442,7 @@ protected:
                 //iterArray[leafs] = octreeIterator;
                 //++leafs;
                 const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
-                const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+                const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
 
                 omp_set_lock(&lockShape[shapePosition]);
                 const int positionToWork = startPosAtShape[shapePosition]++;
diff --git a/Src/Core/FFmmAlgorithmThreadBalance.hpp b/Src/Core/FFmmAlgorithmThreadBalance.hpp
index 3d3afb85923b99bd4bf0a0f1d2162d6d2fb34ad7..002aa4e309f04da1b8c2596c0e18b8925e749b46 100644
--- a/Src/Core/FFmmAlgorithmThreadBalance.hpp
+++ b/Src/Core/FFmmAlgorithmThreadBalance.hpp
@@ -11,6 +11,7 @@
 #include "../Containers/FOctree.hpp"
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 #include <omp.h>
 #include <vector>
@@ -29,18 +30,18 @@
 *
 * This class does not deallocate pointers given to its constructor.
 */
-template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmThreadBalance : public FAbstractAlgorithm, public FAlgorithmTimers{
     OctreeClass* const tree;                  ///< The octree to work on.
     KernelClass** kernels;                    ///< The kernels.
 
-    static const int SizeShape = 3*3*3;
+    static const int SizeShape = P2PExclusionClass::SizeShape;
 
     const int MaxThreads;                     ///< The maximum number of threads.
 
     const int OctreeHeight;                   ///< The height of the given tree.
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
 public:
     /** Class constructor
@@ -57,15 +58,16 @@ public:
                                const int inLeafLevelSeperationCriteria = 1)
         : tree(inTree) , kernels(nullptr),
           MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
-          leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
+          leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
         FAssertLF(tree, "tree cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         this->kernels = new KernelClass*[MaxThreads];
-#pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-#pragma omp critical (InitFFmmAlgorithmThreadBalance)
+        #pragma omp parallel num_threads(MaxThreads)
+        {
+            #pragma omp critical (InitFFmmAlgorithmThreadBalance)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
 
@@ -205,7 +207,7 @@ protected:
             do{
                 ++leafsNumber;
                 const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
-                ++shapeLeaves[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];
+                ++shapeLeaves[P2PExclusionClass::GetShapeIdx(coord)];
             } while(octreeIterator.moveRight());
         }
 
@@ -346,6 +348,7 @@ protected:
                         workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
                     }
                     WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
+                    memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber);
                     // Prepare the P2P
                     const int LeafIndex = OctreeHeight - 1;
                     leafsDataArray.reset(new LeafData[leafsNumber]);
@@ -365,7 +368,7 @@ protected:
                     // for each leafs
                     for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
                         const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
-                        const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+                        const int shapePosition = P2PExclusionClass::GetShapeIdx(coord);
 
                         const int positionToWork = startPosAtShape[shapePosition]++;
 
@@ -542,7 +545,7 @@ protected:
 
         // for each levels
         for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
-            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
             FLOG(FTic counterTimeLevel);
             FLOG(computationCounter.tic());
             #pragma omp parallel
diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index 649f1a21be80d7a424e6ad58e57f3fca1b6ad765..2218a1282aab1b6bee527817ef498115316b5583 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -40,6 +40,7 @@
 #include <sys/time.h>
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 #include <memory>
 
@@ -63,7 +64,7 @@
  *        --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20
  *        --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
  */
-template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers {
 private:
     OctreeClass* const tree;     ///< The octree to work on
@@ -82,7 +83,7 @@ private:
     const int idProcess;         ///< Current process id
     const int OctreeHeight;      ///< Tree height
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
     /** An interval is the morton index interval
      * that a proc uses (i.e. it holds data in this interval) */
@@ -150,17 +151,18 @@ public:
         nbProcess(inComm.processCount()),
         idProcess(inComm.processId()),
         OctreeHeight(tree->getHeight()),
-        leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
+        leafLevelSeparationCriteria(inLeafLevelSeperationCriteria),
         intervals(new Interval[inComm.processCount()]),
         workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
         FAssertLF(tree, "tree cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
+        #pragma omp parallel num_threads(MaxThreads)
+        {
             #pragma omp critical (InitFFmmAlgorithmThreadProc)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
 
@@ -188,7 +190,10 @@ protected:
      */
     void executeCore(const unsigned operationsToProceed) override {
         // Count leaf
-        this->numberOfLeafs = 0;
+#ifdef SCALFMM_TRACE_ALGO
+    	eztrace_start();
+#endif
+	this->numberOfLeafs = 0;
         {
             Interval myFullInterval;
             {//Building the interval with the first and last leaves (and count the number of leaves)
@@ -260,31 +265,61 @@ protected:
                                             workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()),  __LINE__ );
         }
 
+#ifdef SCALFMM_TRACE_ALGO
         Timers[P2MTimer].tic();
+	    eztrace_enter_event("P2M", EZTRACE_YELLOW);
+#endif
         if(operationsToProceed & FFmmP2M) bottomPass();
         Timers[P2MTimer].tac();
 
+#ifdef SSCALFMM_TRACE_ALGO
+		eztrace_leave_event();
+	    eztrace_enter_event("M2M", EZTRACE_PINK);
+#endif
+
         Timers[M2MTimer].tic();
-        if(operationsToProceed & FFmmM2M) upwardPass();
-        Timers[M2MTimer].tac();
+	    if(operationsToProceed & FFmmM2M) upwardPass();
+      Timers[M2MTimer].tac();
 
-        Timers[M2LTimer].tic();
+#ifdef SCALFMM_TRACE_ALGO
+	     eztrace_leave_event();
+	    eztrace_enter_event("M2L", EZTRACE_GREEN);
+#endif
+
+		Timers[M2LTimer].tic();
         if(operationsToProceed & FFmmM2L) transferPass();
         Timers[M2LTimer].tac();
 
-        Timers[L2LTimer].tic();
+ #ifdef SCALFMM_TRACE_ALGO
+		eztrace_leave_event();
+	    eztrace_enter_event("L2L", EZTRACE_PINK);
+#endif
+
+	    Timers[L2LTimer].tic();
         if(operationsToProceed & FFmmL2L) downardPass();
         Timers[L2LTimer].tac();
 
-        Timers[NearTimer].tic();
+#ifdef SCALFMM_TRACE_ALGO
+		eztrace_leave_event();
+	    eztrace_enter_event("L2P+P2P", EZTRACE_BLUE);
+#endif
+
+	    Timers[NearTimer].tic();
         if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
         Timers[NearTimer].tac();
 
+#ifdef SCALFMM_TRACE_ALGO
+		eztrace_leave_event();
+	    eztrace_stop();
+#endif
         // delete array
         delete []     iterArray;
-        delete [] iterArrayComm;
-        iterArray     = nullptr;
+        delete []     iterArrayComm;
+        iterArray          = nullptr;
         iterArrayComm = nullptr;
+#ifdef SCALFMM_TRACE_ALGO
+	  eztrace_stop();
+#endif
     }
 
     /////////////////////////////////////////////////////////////////////////////
@@ -626,7 +661,7 @@ protected:
                     // for each levels
                     for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
 
-                        const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                        const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 
                         if(!procHasWorkAtLevel(idxLevel, idProcess)){
                             avoidGotoLeftIterator.moveDown();
@@ -784,7 +819,7 @@ protected:
                 // Now we can compute all the data
                 // for each levels
                 for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
-                    const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                    const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 
                     if(!procHasWorkAtLevel(idxLevel, idProcess)){
                         avoidGotoLeftIterator.moveDown();
@@ -851,7 +886,7 @@ protected:
             // compute the second time
             // for each levels
             for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
-                const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 
                 if(!procHasWorkAtLevel(idxLevel, idProcess)){
                     avoidGotoLeftIterator.moveDown();
@@ -1199,7 +1234,7 @@ protected:
 
         // init
         const int LeafIndex = OctreeHeight - 1;
-        const int SizeShape = 3*3*3;
+        const int SizeShape = P2PExclusionClass::SizeShape;
 
         int shapeLeaf[SizeShape];
         memset(shapeLeaf,0,SizeShape*sizeof(int));
@@ -1360,7 +1395,7 @@ protected:
                     myLeafs[idxLeaf] = octreeIterator;
 
                     const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
-                    const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+                    const int shape = P2PExclusionClass::GetShapeIdx(coord);
                     shapeType[idxLeaf] = shape;
 
                     ++shapeLeaf[shape];
diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
index e2a0ab116633fcc4e5cb503c9ba97e0f52735eb2..ce9690d551b9e3190969d266ebea14e41e2a2377 100644
--- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
@@ -38,6 +38,7 @@
 #include <omp.h>
 
 #include "FCoreCommon.hpp"
+#include "FP2PExclusion.hpp"
 
 #include <memory>
 
@@ -61,7 +62,7 @@
  * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
  * ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
  */
-template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
+template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion>
 class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm {
     OctreeClass* const tree;                 //< The octree to work on
     KernelClass** kernels;                   //< The kernels
@@ -83,7 +84,7 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm {
 
     const int OctreeHeight;
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
 public:
     struct Interval{
@@ -117,11 +118,11 @@ public:
 
     void setKernel(KernelClass*const inKernels){
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
+        #pragma omp parallel num_threads(MaxThreads)
+        {
             #pragma omp critical (InitFFmmAlgorithmThreadProcPeriodic)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
     }
@@ -146,12 +147,13 @@ public:
           numberOfLeafs(0),
           MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
           OctreeHeight(tree->getHeight()),
-          leafLevelSeperationCriteria(inLeafLevelSeperationCriteria),
+          leafLevelSeparationCriteria(inLeafLevelSeperationCriteria),
           intervals(new Interval[inComm.processCount()]),
           workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
 
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight());
 
@@ -787,7 +789,7 @@ protected:
                             // Find the M2L neigbors of a cell
                             const int counter = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(),
                                                                                idxLevel,
-                                                                               neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeperationCriteria);
+                                                                               neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeparationCriteria);
 
                             memset(alreadySent, false, sizeof(bool) * nbProcess);
                             bool needOther = false;
@@ -913,7 +915,7 @@ protected:
                 for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){
                     const int fackLevel = idxLevel + offsetRealTree;
 
-                    const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria);
+                    const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria);
 
                     if(!procHasWorkAtLevel(idxLevel, idProcess)){
                         avoidGotoLeftIterator.moveDown();
@@ -981,7 +983,7 @@ protected:
             for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){
                 const int fackLevel = idxLevel + offsetRealTree;
 
-                const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria);
+                const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria);
 
                 if(!procHasWorkAtLevel(idxLevel, idProcess)){
                     avoidGotoLeftIterator.moveDown();
@@ -1347,7 +1349,7 @@ protected:
 
         // init
         const int LeafIndex = OctreeHeight - 1;
-        const int SizeShape = 3*3*3;
+        const int SizeShape = P2PExclusionClass::SizeShape;
 
         int shapeLeaf[SizeShape];
         memset(shapeLeaf,0,SizeShape*sizeof(int));
@@ -1510,7 +1512,7 @@ protected:
                     myLeafs[idxLeaf] = octreeIterator;
 
                     const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
-                    const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+                    const int shape = P2PExclusionClass::GetShapeIdx(coord);
                     shapeType[idxLeaf] = shape;
 
                     ++shapeLeaf[shape];
diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp
index 6bb6a268a25c1f58c012b7909d14d61b0121b213..761bb345a4c5e5886e7c3e463b659fdc60b816e9 100644
--- a/Src/Core/FFmmAlgorithmThreadTsm.hpp
+++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp
@@ -55,7 +55,7 @@ class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimer
 
     const int OctreeHeight;
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
 public:
     /** The constructor need the octree and the kernels used for computation
@@ -65,16 +65,17 @@ public:
       */
     FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
                       : tree(inTree) , kernels(nullptr), iterArray(nullptr),
-                      MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
+                      MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) {
 
         FAssertLF(tree, "tree cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel for schedule(static)
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-            #pragma omp critical (InitFFmmAlgorithmThreadTsm)
+        #pragma omp parallel num_threads(MaxThreads)
+        {
+            #pragma omp critical (InitFFmmAlgorithmTsm)
             {
-                this->kernels[idxThread] = new KernelClass(*inKernels);
+                this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
         }
 
@@ -250,7 +251,7 @@ protected:
             // for each levels
             for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
                 FLOG(FTic counterTimeLevel);
-                const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+                const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
 
                 int numberOfCells = 0;
                 // for each cells
diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp
index 80f238d68465a3ec2905c8892ce30797e6ea322e..62f0c8c3be55c45279f247e5ecc8f06e2745ca7f 100644
--- a/Src/Core/FFmmAlgorithmTsm.hpp
+++ b/Src/Core/FFmmAlgorithmTsm.hpp
@@ -46,7 +46,7 @@ class FFmmAlgorithmTsm : public FAbstractAlgorithm{
 
     const int OctreeHeight;
 
-    const int leafLevelSeperationCriteria;
+    const int leafLevelSeparationCriteria;
 
     FLOG(FTic counterTime);                                               //< In case of debug: to count the elapsed time
     FLOG(FTic computationCounter);                                        //< In case of debug: to  count computation time
@@ -58,10 +58,11 @@ public:
       * An assert is launched if one of the arguments is null
       */
     FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
-        : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria){
+        : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria){
 
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(kernels, "kernels cannot be null");
+        FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
 
@@ -200,7 +201,7 @@ protected:
         // for each levels
         for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
             FLOG(FTic counterTimeLevel);
-            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
+            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
             // for each cells
             do{
                 FLOG(computationCounter.tic());
diff --git a/Src/Core/FP2PExclusion.hpp b/Src/Core/FP2PExclusion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..329f1c42c802b688d179acfe4118dcb71af8e029
--- /dev/null
+++ b/Src/Core/FP2PExclusion.hpp
@@ -0,0 +1,50 @@
+#ifndef FP2PEXCLUSION_HPP
+#define FP2PEXCLUSION_HPP
+
+#include "../Containers/FTreeCoordinate.hpp"
+
+/**
+ * This class gives is responsible of the separation of the leaves
+ * using the coloring algorithm.
+ * In case of classic P2P and mutual interaction the BoxSeparations = 2 should be used.
+ * For our current mutual P2P is is a little more complicated because we need
+ * 2 boxes of separation but only in some directions.
+ */
+template <int BoxSeparations = 2>
+class FP2PExclusion{
+public:
+    static const int BoxesPerDim = (BoxSeparations+1);
+    static const int SizeShape = BoxesPerDim*BoxesPerDim*BoxesPerDim;
+
+    static int GetShapeIdx(const int inX, const int inY, const int inZ){
+        return (inX%BoxesPerDim)*(BoxesPerDim*BoxesPerDim) + (inY%BoxesPerDim)*BoxesPerDim + (inZ%BoxesPerDim);
+    }
+
+    static int GetShapeIdx(const FTreeCoordinate& coord){
+        return GetShapeIdx(coord.getX(), coord.getY(), coord.getZ());
+    }
+};
+
+/**
+ * Here the formula is related to the octree construction of neighbors list:
+ * const int index = (((idxX + 1) * 3) + (idxY +1)) * 3 + idxZ + 1;
+ * If go from 0 to 27,
+ * if we loop from 0 to 14, then we need "x" in [0;2[
+ * "y" "z" in [0;3[
+ */
+class FP2PMiddleExclusion{
+public:
+    static const int SizeShape = 3*3*2;
+
+    static int GetShapeIdx(const int inX, const int inY, const int inZ){
+        return (inX%2)*9 + (inY%3)*3 + (inZ%3);
+    }
+
+    static int GetShapeIdx(const FTreeCoordinate& coord){
+        return GetShapeIdx(coord.getX(), coord.getY(), coord.getZ());
+    }
+};
+
+
+#endif // FP2PEXCLUSION_HPP
+
diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake
index d23bddf1d47a78a930b93869698b9e901987c0ae..3450820790230429c52ec9ee75aa9c58225b1072 100644
--- a/Src/ScalFmmConfig.h.cmake
+++ b/Src/ScalFmmConfig.h.cmake
@@ -86,7 +86,12 @@
 ///////////////////////////////////////////////////////
 
 #cmakedefine SCALFMM_USE_EZTRACE
+#cmakedefine SCALFMM_TRACE_ALGO
+#cmakedefine SCALFMM_TRACE_P2P
+#cmakedefine SCALFMM_TRACE_P2M
 #cmakedefine SCALFMM_TRACE_M2L
+#cmakedefine SCALFMM_TRACE_L2L
+#cmakedefine SCALFMM_TRACE_L2P
 
 
 ///////////////////////////////////////////////////////
diff --git a/UTests/utestP2PExclusion.cpp b/UTests/utestP2PExclusion.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ecf03d65206dfc51801c350f83ad46ed882417b7
--- /dev/null
+++ b/UTests/utestP2PExclusion.cpp
@@ -0,0 +1,126 @@
+
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info".
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#include "FUTester.hpp"
+
+#include "Core/FP2PExclusion.hpp"
+#include "Utils/FMath.hpp"
+
+#include <memory>
+
+/**
+* This file is a unit test for the FNeigborIndexes classes
+*/
+
+
+/** this class test the list container */
+class TestExclusion : public FUTester<TestExclusion> {
+    const int Size = 100;
+
+    void Exclusion2(){
+        const int Width = 2;
+        std::unique_ptr<int[]> grid(new int[Size*Size*Size]);
+        for(int idxShape = 0 ; idxShape < FP2PExclusion<Width>::SizeShape ; ++idxShape){
+            memset(grid.get(), 0, sizeof(int)*Size*Size*Size);
+
+            for(int idxX = 0 ; idxX < Size ; ++idxX){
+                for(int idxY = 0 ; idxY < Size ; ++idxY){
+                    for(int idxZ = 0 ; idxZ < Size ; ++idxZ){
+                        if(FP2PExclusion<Width>::GetShapeIdx(idxX,idxY,idxZ) == idxShape){
+                            for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+1) ; ++idxX_neig){
+                                for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+1) ; ++idxY_neig){
+                                    for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+1) ; ++idxZ_neig){
+                                        uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0);
+                                        grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void Exclusion1(){
+        const int Width = 1;
+        std::unique_ptr<int[]> grid(new int[Size*Size*Size]);
+        for(int idxShape = 0 ; idxShape < FP2PExclusion<Width>::SizeShape ; ++idxShape){
+            memset(grid.get(), 0, sizeof(int)*Size*Size*Size);
+
+            for(int idxX = 0 ; idxX < Size ; ++idxX){
+                for(int idxY = 0 ; idxY < Size ; ++idxY){
+                    for(int idxZ = 0 ; idxZ < Size ; ++idxZ){
+                        if(FP2PExclusion<Width>::GetShapeIdx(idxX,idxY,idxZ) == idxShape){
+                            for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < idxX ; ++idxX_neig){
+                                for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < idxY ; ++idxY_neig){
+                                    for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < idxZ ; ++idxZ_neig){
+                                        uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0);
+                                        grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void Middle(){
+        std::unique_ptr<int[]> grid(new int[Size*Size*Size]);
+        for(int idxShape = 0 ; idxShape < FP2PMiddleExclusion::SizeShape ; ++idxShape){
+            memset(grid.get(), 0, sizeof(int)*Size*Size*Size);
+
+            for(int idxX = 0 ; idxX < Size ; ++idxX){
+                for(int idxY = 0 ; idxY < Size ; ++idxY){
+                    for(int idxZ = 0 ; idxZ < Size ; ++idxZ){
+                        if(FP2PMiddleExclusion::GetShapeIdx(idxX,idxY,idxZ) == idxShape){
+                            for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+1) ; ++idxX_neig){
+                                for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+1) ; ++idxY_neig){
+                                    for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+1) ; ++idxZ_neig){
+                                        const int diffx = idxX_neig-idxX;
+                                        const int diffy = idxY_neig-idxY;
+                                        const int diffz = idxZ_neig-idxZ;
+                                        const int idx = (diffx+1)*9 + (diffy+1)*3 + (diffz+1);
+                                        if(idx < 14){
+                                            uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0);
+                                            grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+
+    // set test
+    void SetTests(){
+        AddTest(&TestExclusion::Exclusion2,"Test 2 exclustion");
+        AddTest(&TestExclusion::Exclusion1,"Test 1 exclustion");
+        AddTest(&TestExclusion::Middle,"Test middle exclustion");
+    }
+};
+
+// You must do this
+TestClass(TestExclusion)
+
+
+
diff --git a/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp b/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp
index c6f1c667cdd7418d648f366629d05c3d48cbf5ec..5a039bd88a6574fbfe743805ceec35cd5b9746bc 100644
--- a/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp
+++ b/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp
@@ -177,7 +177,7 @@ int main(int argc, char* argv[])
 		std::string  algoStr  = FParameters::getStr(argc,argv,"-algo",  "basic");
 
 		ForFmmClass              algo1(&tree, &kernels, inUserChunckSize);
-		ForBalFmmClass          algo4(&tree, &kernels, inUserChunckSize);
+        ForBalFmmClass          algo4(&tree, &kernels);
 		TaskFmmClass            algo2(&tree, &kernels );
 		SectionTaskFmmClass algo3(&tree, &kernels );
 
@@ -203,8 +203,8 @@ int main(int argc, char* argv[])
 		time.tic();
 		//  ---------------------------------------------
 //		algo->execute(FFmmNearField);   // Here the call of the FMM algorithm
-		algo->execute(FFmmFarField);   // Here the call of the FMM algorithm
-//		algo->execute();   // Here the call of the FMM algorithm
+//		algo->execute(FFmmFarField);   // Here the call of the FMM algorithm
+		algo->execute();   // Here the call of the FMM algorithm
 		//  ---------------------------------------------
 		time.tac();
 		std::cout << "Timers Far Field \n"