diff --git a/CMakeLists.txt b/CMakeLists.txt
index c465ff5bca5d9278b7ac81749c8030c94e557765..77671fd8deb239d452a94890c457b179e8989608 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,6 +50,7 @@ OPTION( ScalFMM_USE_DOUBLE_PRECISION "Set to ON to compile in double precision"
 OPTION( ScalFMM_ATTACHE_SOURCE       "Set to ON to compile with -g"                OFF )
 OPTION( ScalFMM_USE_ADDONS           "Set to ON to compile add ons"                OFF )
 OPTION( ScalFMM_USE_SSE              "Set to ON to compile with sse support"       ON  )
+OPTION( ScalFMM_USE_ASSERT           "Set to ON to enable safe tests during execution" ON  )
 # Set scalfmm to default libraries
 SET(SCALFMM_LIBRARIES "")
 #
@@ -90,9 +91,9 @@ else()
 # Compile Release flags
 #
   SET(CMAKE_BUILD_TYPE Release)
-  # force -O2 in release
-  SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
-  STRING(REPLACE "-O3" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} )
+  # force -O3 in release
+  SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+  STRING(REPLACE "-O2" "" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} )
   # if compiler is intel add -ip
   IF(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
     SET(ScaLFMM_CXX_FLAGS  "${ScaLFMM_CXX_FLAGS} -ip")
@@ -104,9 +105,9 @@ else()
   ELSE(APPLE)
     # Test if not apple and 64bits
     if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-       SET(SCALFMM_FLAGS_OPTI_RELEASE "-m64 -ffast-math -flto -march=native -funroll-loops" CACHE STRING "Set your optimization flags for release mode.")
+       SET(SCALFMM_FLAGS_OPTI_RELEASE "-m64 -ffast-math -flto -march=native -funroll-loops -ftree-vectorize" CACHE STRING "Set your optimization flags for release mode.")
     else()
-       SET(SCALFMM_FLAGS_OPTI_RELEASE "-ffast-math -flto -march=native -funroll-loops" CACHE STRING "Set your optimization flags for release mode.")
+       SET(SCALFMM_FLAGS_OPTI_RELEASE "-ffast-math -flto -march=native -funroll-loops -ftree-vectorize" CACHE STRING "Set your optimization flags for release mode.")
     endif()
   ENDIF(APPLE)
 #  ADD_DEFINITIONS(${SCALFMM_FLAGS_OPTI_RELEASE})
@@ -175,6 +176,12 @@ endif()
 # Use Mem stats
 MESSAGE( STATUS "ScalFMM_USE_MEM_STATS        = ${ScalFMM_USE_MEM_STATS}" )
 
+# Use Log
+MESSAGE( STATUS "ScalFMM_USE_LOG              = ${ScalFMM_USE_LOG}" )
+
+# Use Assert
+MESSAGE( STATUS "ScalFMM_USE_ASSERT           = ${ScalFMM_USE_ASSERT}" )
+
 # Add CBLAS
 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}  ${CBLAS_LIBRARIES}")
 
diff --git a/Doc/Doxyfile.in b/Doc/Doxyfile.in
old mode 100755
new mode 100644
index 7edeabb6663348aff29f99558e4763e9e4411ec0..b775b47755fd8a67d99dd40a3d1baf77d5dc8ee9
--- a/Doc/Doxyfile.in
+++ b/Doc/Doxyfile.in
@@ -1,103 +1,105 @@
-# Doxyfile 1.8.1.2
+# Doxyfile 1.8.4
 
 # This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
+# doxygen (www.doxygen.org) for a project.
 #
-# All text after a hash (#) is considered a comment and will be ignored
+# All text after a double hash (##) is considered a comment and is placed
+# in front of the TAG it is preceding .
+# All text after a hash (#) is considered a comment and will be ignored.
 # The format is:
 #       TAG = value [value, ...]
 # For lists items can also be appended using:
 #       TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
+# Values that contain spaces should be placed between quotes (" ").
 
 #---------------------------------------------------------------------------
 # Project related configuration options
 #---------------------------------------------------------------------------
 
-# This tag specifies the encoding used for all characters in the config file 
-# that follow. The default is UTF-8 which is also the encoding used for all 
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the 
-# iconv built into libc) for the transcoding. See 
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
 # http://www.gnu.org/software/libiconv for the list of possible encodings.
 
 DOXYFILE_ENCODING      = UTF-8
 
-# The PROJECT_NAME tag is a single word (or sequence of words) that should 
-# identify the project. Note that if you do not use Doxywizard you need 
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
 # to put quotes around the project name if it contains spaces.
 
-PROJECT_NAME           = "Inria - ScalFmm"
+PROJECT_NAME           = "ScalFmm"
 
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
-# This could be handy for archiving the generated documentation or 
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 
+PROJECT_NUMBER         =
 
-# Using the PROJECT_BRIEF tag one can provide an optional one line description 
-# for a project that appears at the top of each page and should give viewer 
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
 # a quick idea about the purpose of the project. Keep the description short.
 
-PROJECT_BRIEF          = 
+PROJECT_BRIEF          =
 
-# With the PROJECT_LOGO tag one can specify an logo or icon that is 
-# included in the documentation. The maximum height of the logo should not 
-# exceed 55 pixels and the maximum width should not exceed 200 pixels. 
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
 # Doxygen will copy the logo to the output directory.
 
-PROJECT_LOGO           = @CMAKE_CURRENT_SOURCE_DIR@/../Doc/scalfmm.png
+PROJECT_LOGO           = @CMAKE_CURRENT_SOURCE_DIR@/Doc/Image_dox/scalfmm.png
 
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
-# base path where the generated documentation will be put. 
-# If a relative path is entered, it will be relative to the location 
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
 # where doxygen was started. If left blank the current directory will be used.
 
 OUTPUT_DIRECTORY       = ../Doc
 
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
-# 4096 sub-directories (in 2 levels) under the output directory of each output 
-# format and will distribute the generated files over these directories. 
-# Enabling this option can be useful when feeding doxygen a huge amount of 
-# source files, where putting all generated files in the same directory would 
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
 # otherwise cause performance problems for the file system.
 
 CREATE_SUBDIRS         = NO
 
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
-# documentation generated by doxygen is written. Doxygen will use this 
-# information to generate all constant output in the proper language. 
-# The default language is English, other supported languages are: 
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
-# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, 
-# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English 
-# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, 
-# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, 
-# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian,
+# Persian, Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic,
+# Slovak, Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
 
 OUTPUT_LANGUAGE        = English
 
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
-# include brief member descriptions after the members that are listed in 
-# the file and class documentation (similar to JavaDoc). 
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
 # Set to NO to disable this.
 
 BRIEF_MEMBER_DESC      = YES
 
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
-# the brief description of a member or function before the detailed description. 
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
 # brief descriptions will be completely suppressed.
 
 REPEAT_BRIEF           = YES
 
-# This tag implements a quasi-intelligent brief description abbreviator 
-# that is used to form the text in various listings. Each string 
-# in this list, if found as the leading text of the brief description, will be 
-# stripped from the text and the result after processing the whole list, is 
-# used as the annotated text. Otherwise, the brief description is used as-is. 
-# If left blank, the following values are used ("$name" is automatically 
-# replaced with the name of the entity): "The $name class" "The $name widget" 
-# "The $name file" "is" "provides" "specifies" "contains" 
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
 # "represents" "a" "an" "the"
 
 ABBREVIATE_BRIEF       = "The $name class" \
@@ -112,247 +114,241 @@ ABBREVIATE_BRIEF       = "The $name class" \
                          an \
                          the
 
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
-# Doxygen will generate a detailed section even if there is only a brief 
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
 # description.
 
 ALWAYS_DETAILED_SEC    = YES
 
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
-# inherited members of a class in the documentation of that class as if those 
-# members were ordinary class members. Constructors, destructors and assignment 
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
 # operators of the base classes will not be shown.
 
 INLINE_INHERITED_MEMB  = YES
 
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
-# path before files name in the file list and in the header files. If set 
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
 # to NO the shortest path that makes the file name unique will be used.
 
 FULL_PATH_NAMES        = YES
 
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
-# can be used to strip a user-defined part of the path. Stripping is 
-# only done if one of the specified strings matches the left-hand part of 
-# the path. The tag can be used to show relative paths in the file list. 
-# If left blank the directory from which doxygen is run is used as the 
-# path to strip.
-
-STRIP_FROM_PATH        = 
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
-# the path mentioned in the documentation of a class, which tells 
-# the reader which header file to include in order to use a class. 
-# If left blank only the name of the header file containing the class 
-# definition is used. Otherwise one should specify the include paths that 
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip. Note that you specify absolute paths here, but also
+# relative paths, which will be relative from the directory where doxygen is
+# started.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
 # are normally passed to the compiler using the -I flag.
 
-STRIP_FROM_INC_PATH    = 
+STRIP_FROM_INC_PATH    =
 
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
-# (but less readable) file names. This can be useful if your file system 
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
 # doesn't support long names like on DOS, Mac, or CD-ROM.
 
 SHORT_NAMES            = NO
 
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
-# will interpret the first line (until the first dot) of a JavaDoc-style 
-# comment as the brief description. If set to NO, the JavaDoc 
-# comments will behave just like regular Qt-style comments 
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
 # (thus requiring an explicit @brief command for a brief description.)
 
 JAVADOC_AUTOBRIEF      = NO
 
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
-# interpret the first line (until the first dot) of a Qt-style 
-# comment as the brief description. If set to NO, the comments 
-# will behave just like regular Qt-style comments (thus requiring 
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
 # an explicit \brief command for a brief description.)
 
 QT_AUTOBRIEF           = NO
 
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
-# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
-# comments) as a brief description. This used to be the default behaviour. 
-# The new default is to treat a multi-line C++ comment block as a detailed 
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
 # description. Set this tag to YES if you prefer the old behaviour instead.
 
 MULTILINE_CPP_IS_BRIEF = NO
 
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
-# member inherits the documentation from any documented member that it 
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
 # re-implements.
 
 INHERIT_DOCS           = YES
 
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
-# a new page for each member. If set to NO, the documentation of a member will 
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
 # be part of the file/class/namespace that contains it.
 
 SEPARATE_MEMBER_PAGES  = NO
 
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
 # Doxygen uses this value to replace tabs by spaces in code fragments.
 
 TAB_SIZE               = 8
 
-# This tag can be used to specify a number of aliases that acts 
-# as commands in the documentation. An alias has the form "name=value". 
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
-# put the command \sideeffect (or @sideeffect) in the documentation, which 
-# will result in a user-defined paragraph with heading "Side Effects:". 
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
 # You can put \n's in the value part of an alias to insert newlines.
 
-ALIASES                = 
+ALIASES                =
 
-# This tag can be used to specify a number of word-keyword mappings (TCL only). 
-# A mapping has the form "name=value". For example adding 
-# "class=itcl::class" will allow you to use the command class in the 
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
 # itcl::class meaning.
 
-TCL_SUBST              = 
+TCL_SUBST              =
 
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
-# sources only. Doxygen will then generate output that is more tailored for C. 
-# For instance, some of the names that are used will be different. The list 
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
 # of all members will be omitted, etc.
 
 OPTIMIZE_OUTPUT_FOR_C  = NO
 
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
-# sources only. Doxygen will then generate output that is more tailored for 
-# Java. For instance, namespaces will be presented as packages, qualified 
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
 # scopes will look different, etc.
 
 OPTIMIZE_OUTPUT_JAVA   = NO
 
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 
-# sources only. Doxygen will then generate output that is more tailored for 
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
 # Fortran.
 
 OPTIMIZE_FOR_FORTRAN   = NO
 
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 
-# sources. Doxygen will then generate output that is tailored for 
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
 # VHDL.
 
 OPTIMIZE_OUTPUT_VHDL   = NO
 
-# Doxygen selects the parser to use depending on the extension of the files it 
-# parses. With this tag you can assign which parser to use for a given extension. 
-# Doxygen has a built-in mapping, but you can override or extend it using this 
-# tag. The format is ext=language, where ext is a file extension, and language 
-# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, 
-# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make 
-# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C 
-# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions 
-# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
-
-EXTENSION_MAPPING      = 
-
-# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all 
-# comments according to the Markdown format, which allows for more readable 
-# documentation. See http://daringfireball.net/projects/markdown/ for details. 
-# The output of markdown processing is further processed by doxygen, so you 
-# can mix doxygen, HTML, and XML commands with Markdown formatting. 
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension,
+# and language is one of the parsers supported by doxygen: IDL, Java,
+# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
+# C++. For instance to make doxygen treat .inc files as Fortran files (default
+# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
+# that for custom extensions you also need to set FILE_PATTERNS otherwise the
+# files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
+# comments according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you
+# can mix doxygen, HTML, and XML commands with Markdown formatting.
 # Disable only in case of backward compatibilities issues.
 
 MARKDOWN_SUPPORT       = YES
 
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 
-# to include (a tag file for) the STL sources as input, then you should 
-# set this tag to YES in order to let doxygen match functions declarations and 
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
-# func(std::string) {}). This also makes the inheritance and collaboration 
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
 # diagrams that involve STL classes more complete and accurate.
 
 BUILTIN_STL_SUPPORT    = NO
 
-# If you use Microsoft's C++/CLI language, you should set this option to YES to 
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
 # enable parsing support.
 
 CPP_CLI_SUPPORT        = NO
 
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 
-# Doxygen will parse them like normal C++ but will assume all classes use public 
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
 # instead of private inheritance when no explicit protection keyword is present.
 
 SIP_SUPPORT            = NO
 
-# For Microsoft's IDL there are propget and propput attributes to indicate getter 
-# and setter methods for a property. Setting this option to YES (the default) 
-# will make doxygen replace the get and set methods by a property in the 
-# documentation. This will only work if the methods are indeed getting or 
-# setting a simple type. If this is not the case, or you want to show the 
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES (the
+# default) will make doxygen replace the get and set methods by a property in
+# the documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
 # methods anyway, you should set this option to NO.
 
 IDL_PROPERTY_SUPPORT   = YES
 
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
-# tag is set to YES, then doxygen will reuse the documentation of the first 
-# member in the group (if any) for the other members of the group. By default 
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
 # all members of a group must be documented explicitly.
 
 DISTRIBUTE_GROUP_DOC   = NO
 
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
-# the same type (for instance a group of public functions) to be put as a 
-# subgroup of that type (e.g. under the Public Functions section). Set it to 
-# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
 # the \nosubgrouping command.
 
 SUBGROUPING            = YES
 
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and 
-# unions are shown inside the group in which they are included (e.g. using 
-# @ingroup) instead of on a separate page (for HTML and Man pages) or 
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
 # section (for LaTeX and RTF).
 
 INLINE_GROUPED_CLASSES = NO
 
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and 
-# unions with only public data fields will be shown inline in the documentation 
-# of the scope in which they are defined (i.e. file, namespace, or group 
-# documentation), provided this scope is documented. If set to NO (the default), 
-# structs, classes, and unions are shown on a separate page (for HTML and Man 
-# pages) or section (for LaTeX and RTF).
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields or simple typedef fields will be shown
+# inline in the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO (the default), structs, classes, and unions are shown on a separate
+# page (for HTML and Man pages) or section (for LaTeX and RTF).
 
 INLINE_SIMPLE_STRUCTS  = NO
 
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 
-# is documented as struct, union, or enum with the name of the typedef. So 
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct 
-# with name TypeT. When disabled the typedef will appear as a member of a file, 
-# namespace, or class. And the struct will be named TypeS. This can typically 
-# be useful for C code in case the coding convention dictates that all compound 
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
 # types are typedef'ed and only the typedef is referenced, never the tag name.
 
 TYPEDEF_HIDES_STRUCT   = NO
 
-# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to 
-# determine which symbols to keep in memory and which to flush to disk. 
-# When the cache is full, less often used symbols will be written to disk. 
-# For small to medium size projects (<1000 input files) the default value is 
-# probably good enough. For larger projects a too small cache size can cause 
-# doxygen to be busy swapping symbols to and from disk most of the time 
-# causing a significant performance penalty. 
-# If the system has enough physical memory increasing the cache will improve the 
-# performance by keeping more symbols in memory. Note that the value works on 
-# a logarithmic scale so increasing the size by one will roughly double the 
-# memory usage. The cache size is given by this formula: 
-# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, 
-# corresponding to a cache size of 2^16 = 65536 symbols.
-
-SYMBOL_CACHE_SIZE      = 0
-
-# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be 
-# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given 
-# their name and scope. Since this can be an expensive process and often the 
-# same symbol appear multiple times in the code, doxygen keeps a cache of 
-# pre-resolved symbols. If the cache is too small doxygen will become slower. 
-# If the cache is too large, memory is wasted. The cache size is given by this 
-# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, 
-# corresponding to a cache size of 2^16 = 65536 symbols.
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can
+# be an expensive process and often the same symbol appear multiple times in
+# the code, doxygen keeps a cache of pre-resolved symbols. If the cache is too
+# small doxygen will become slower. If the cache is too large, memory is wasted.
+# The cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid
+# range is 0..9, the default is 0, corresponding to a cache size of 2^16 = 65536
+# symbols.
 
 LOOKUP_CACHE_SIZE      = 0
 
@@ -360,14 +356,14 @@ LOOKUP_CACHE_SIZE      = 0
 # Build related configuration options
 #---------------------------------------------------------------------------
 
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
-# documentation are documented, even if no documentation was available. 
-# Private class members and static file members will be hidden unless 
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES
 
 EXTRACT_ALL            = YES
 
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
 # will be included in the documentation.
 
 EXTRACT_PRIVATE        = YES
@@ -377,310 +373,314 @@ EXTRACT_PRIVATE        = YES
 
 EXTRACT_PACKAGE        = NO
 
-# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
 # will be included in the documentation.
 
 EXTRACT_STATIC         = YES
 
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
-# defined locally in source files will be included in the documentation. 
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
 # If set to NO only classes defined in header files are included.
 
 EXTRACT_LOCAL_CLASSES  = YES
 
-# This flag is only useful for Objective-C code. When set to YES local 
-# methods, which are defined in the implementation section but not in 
-# the interface are included in the documentation. 
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
 # If set to NO (the default) only methods in the interface are included.
 
 EXTRACT_LOCAL_METHODS  = NO
 
-# If this flag is set to YES, the members of anonymous namespaces will be 
-# extracted and appear in the documentation as a namespace called 
-# 'anonymous_namespace{file}', where file will be replaced with the base 
-# name of the file that contains the anonymous namespace. By default 
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
 # anonymous namespaces are hidden.
 
 EXTRACT_ANON_NSPACES   = NO
 
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
-# undocumented members of documented classes, files or namespaces. 
-# If set to NO (the default) these members will be included in the 
-# various overviews, but no documentation section is generated. 
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
 # This option has no effect if EXTRACT_ALL is enabled.
 
 HIDE_UNDOC_MEMBERS     = NO
 
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
-# undocumented classes that are normally visible in the class hierarchy. 
-# If set to NO (the default) these classes will be included in the various 
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
 # overviews. This option has no effect if EXTRACT_ALL is enabled.
 
 HIDE_UNDOC_CLASSES     = NO
 
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
-# friend (class|struct|union) declarations. 
-# If set to NO (the default) these declarations will be included in the 
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
 # documentation.
 
 HIDE_FRIEND_COMPOUNDS  = NO
 
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
-# documentation blocks found inside the body of a function. 
-# If set to NO (the default) these blocks will be appended to the 
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
 # function's detailed documentation block.
 
 HIDE_IN_BODY_DOCS      = NO
 
-# The INTERNAL_DOCS tag determines if documentation 
-# that is typed after a \internal command is included. If the tag is set 
-# to NO (the default) then the documentation will be excluded. 
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
 # Set it to YES to include the internal documentation.
 
 INTERNAL_DOCS          = YES
 
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
-# file names in lower-case letters. If set to YES upper-case letters are also 
-# allowed. This is useful if you have classes or files whose names only differ 
-# in case and if your file system supports case sensitive file names. Windows 
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
 # and Mac users are advised to set this option to NO.
 
 CASE_SENSE_NAMES       = NO
 
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
-# will show members with their full class and namespace scopes in the 
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
 # documentation. If set to YES the scope will be hidden.
 
 HIDE_SCOPE_NAMES       = NO
 
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
-# will put a list of the files that are included by a file in the documentation 
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
 # of that file.
 
 SHOW_INCLUDE_FILES     = YES
 
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen 
-# will list include files with double quotes in the documentation 
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
 # rather than with sharp brackets.
 
 FORCE_LOCAL_INCLUDES   = NO
 
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
 # is inserted in the documentation for inline members.
 
 INLINE_INFO            = YES
 
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
-# will sort the (detailed) documentation of file and class members 
-# alphabetically by member name. If set to NO the members will appear in 
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
 # declaration order.
 
 SORT_MEMBER_DOCS       = YES
 
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
-# brief documentation of file, namespace and class members alphabetically 
-# by member name. If set to NO (the default) the members will appear in 
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
 # declaration order.
 
 SORT_BRIEF_DOCS        = NO
 
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen 
-# will sort the (brief and detailed) documentation of class members so that 
-# constructors and destructors are listed first. If set to NO (the default) 
-# the constructors will appear in the respective orders defined by 
-# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. 
-# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO 
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
 # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
 
 SORT_MEMBERS_CTORS_1ST = NO
 
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 
-# hierarchy of group names into alphabetical order. If set to NO (the default) 
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
 # the group names will appear in their defined order.
 
 SORT_GROUP_NAMES       = NO
 
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
-# sorted by fully-qualified names, including namespaces. If set to 
-# NO (the default), the class list will be sorted only by class name, 
-# not including the namespace part. 
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 
-# Note: This option applies only to the class list, not to the 
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
 # alphabetical list.
 
 SORT_BY_SCOPE_NAME     = NO
 
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to 
-# do proper type resolution of all parameters of a function it will reject a 
-# match between the prototype and the implementation of a member function even 
-# if there is only one candidate or it is obvious which candidate to choose 
-# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen 
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
 # will still accept a match between prototype and implementation in such cases.
 
 STRICT_PROTO_MATCHING  = NO
 
-# The GENERATE_TODOLIST tag can be used to enable (YES) or 
-# disable (NO) the todo list. This list is created by putting \todo 
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
 # commands in the documentation.
 
 GENERATE_TODOLIST      = NO
 
-# The GENERATE_TESTLIST tag can be used to enable (YES) or 
-# disable (NO) the test list. This list is created by putting \test 
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
 # commands in the documentation.
 
 GENERATE_TESTLIST      = YES
 
-# The GENERATE_BUGLIST tag can be used to enable (YES) or 
-# disable (NO) the bug list. This list is created by putting \bug 
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
 # commands in the documentation.
 
 GENERATE_BUGLIST       = YES
 
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
-# disable (NO) the deprecated list. This list is created by putting 
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
 # \deprecated commands in the documentation.
 
 GENERATE_DEPRECATEDLIST= YES
 
-# The ENABLED_SECTIONS tag can be used to enable conditional 
-# documentation sections, marked by \if sectionname ... \endif.
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if section-label ... \endif
+# and \cond section-label ... \endcond blocks.
 
-ENABLED_SECTIONS       = 
+ENABLED_SECTIONS       =
 
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
-# the initial value of a variable or macro consists of for it to appear in 
-# the documentation. If the initializer consists of more lines than specified 
-# here it will be hidden. Use a value of 0 to hide initializers completely. 
-# The appearance of the initializer of individual variables and macros in the 
-# documentation can be controlled using \showinitializer or \hideinitializer 
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
 # command in the documentation regardless of this setting.
 
 MAX_INITIALIZER_LINES  = 30
 
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
-# at the bottom of the documentation of classes and structs. If set to YES the 
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
 # list will mention the files that were used to generate the documentation.
 
 SHOW_USED_FILES        = YES
 
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. 
-# This will remove the Files entry from the Quick Index and from the 
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
 # Folder Tree View (if specified). The default is YES.
 
 SHOW_FILES             = YES
 
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the 
-# Namespaces page.  This will remove the Namespaces entry from the Quick Index 
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
 # and from the Folder Tree View (if specified). The default is YES.
 
 SHOW_NAMESPACES        = YES
 
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
-# doxygen should invoke to get the current version for each file (typically from 
-# the version control system). Doxygen will invoke the program by executing (via 
-# popen()) the command <command> <input-file>, where <command> is the value of 
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
-# provided by doxygen. Whatever the program writes to standard output 
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
 # is used as the file version. See the manual for examples.
 
-FILE_VERSION_FILTER    = 
+FILE_VERSION_FILTER    =
 
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed 
-# by doxygen. The layout file controls the global structure of the generated 
-# output files in an output format independent way. To create the layout file 
-# that represents doxygen's defaults, run doxygen with the -l option. 
-# You can optionally specify a file name after the option, if omitted 
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
 # DoxygenLayout.xml will be used as the name of the layout file.
 
-LAYOUT_FILE            = 
+LAYOUT_FILE            =
 
-# The CITE_BIB_FILES tag can be used to specify one or more bib files 
-# containing the references data. This must be a list of .bib files. The 
-# .bib extension is automatically appended if omitted. Using this command 
-# requires the bibtex tool to be installed. See also 
-# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style 
-# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this 
-# feature you need bibtex and perl available in the search path.
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path. Do not use
+# file names with spaces, bibtex cannot handle them.
 
-CITE_BIB_FILES         = 
+CITE_BIB_FILES         =
 
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
 
-# The QUIET tag can be used to turn on/off the messages that are generated 
+# The QUIET tag can be used to turn on/off the messages that are generated
 # by doxygen. Possible values are YES and NO. If left blank NO is used.
 
 QUIET                  = NO
 
-# The WARNINGS tag can be used to turn on/off the warning messages that are 
-# generated by doxygen. Possible values are YES and NO. If left blank 
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
 # NO is used.
 
 WARNINGS               = YES
 
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
 # automatically be disabled.
 
 WARN_IF_UNDOCUMENTED   = YES
 
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
-# potential errors in the documentation, such as not documenting some 
-# parameters in a documented function, or documenting parameters that 
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
 # don't exist or using markup commands wrongly.
 
 WARN_IF_DOC_ERROR      = YES
 
-# The WARN_NO_PARAMDOC option can be enabled to get warnings for 
-# functions that are documented, but have no documentation for their parameters 
-# or return value. If set to NO (the default) doxygen will only warn about 
-# wrong or incomplete parameter documentation, but not about the absence of 
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
 # documentation.
 
 WARN_NO_PARAMDOC       = NO
 
-# The WARN_FORMAT tag determines the format of the warning messages that 
-# doxygen can produce. The string should contain the $file, $line, and $text 
-# tags, which will be replaced by the file and line number from which the 
-# warning originated and the warning text. Optionally the format may contain 
-# $version, which will be replaced by the version of the file (if it could 
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
 # be obtained via FILE_VERSION_FILTER)
 
 WARN_FORMAT            = "$file:$line: $text"
 
-# The WARN_LOGFILE tag can be used to specify a file to which warning 
-# and error messages should be written. If left blank the output is written 
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
 # to stderr.
 
-WARN_LOGFILE           = 
+WARN_LOGFILE           =
 
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
 
-# The INPUT tag can be used to specify the files and/or directories that contain 
-# documented source files. You may enter file names like "myfile.cpp" or 
-# directories like "/usr/src/myproject". Separate the files or directories 
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
 # with spaces.
 
-INPUT                  = @CMAKE_CURRENT_SOURCE_DIR@/../Src
+INPUT                  = @CMAKE_CURRENT_SOURCE_DIR@/../Doc/Site_dox \
+                         @CMAKE_CURRENT_SOURCE_DIR@/../Src/
 
-# This tag can be used to specify the character encoding of the source files 
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
-# also the default input encoding. Doxygen uses libiconv (or the iconv built 
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
 # the list of possible encodings.
 
 INPUT_ENCODING         = UTF-8
 
-# If the value of the INPUT tag contains directories, you can use the 
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
-# and *.h) to filter out the source-files in the directories. If left 
-# blank the following patterns are tested: 
-# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh 
-# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py 
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
 # *.f90 *.f *.for *.vhd *.vhdl
 
 FILE_PATTERNS          = *.c \
@@ -715,153 +715,166 @@ FILE_PATTERNS          = *.c \
                          *.vhd \
                          *.vhdl
 
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
-# should be searched for input files as well. Possible values are YES and NO. 
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
 # If left blank NO is used.
 
 RECURSIVE              = YES
 
-# The EXCLUDE tag can be used to specify files and/or directories that should be 
-# excluded from the INPUT source files. This way you can easily exclude a 
-# subdirectory from a directory tree whose root is specified with the INPUT tag. 
-# Note that relative paths are relative to the directory from which doxygen is 
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = 
+EXCLUDE                =
 
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or 
-# directories that are symbolic links (a Unix file system feature) are excluded 
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
 # from the input.
 
 EXCLUDE_SYMLINKS       = YES
 
-# If the value of the INPUT tag contains directories, you can use the 
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
-# certain files from those directories. Note that the wildcards are matched 
-# against the file with absolute path, so to exclude all test directories 
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
 # for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       = 
+EXCLUDE_PATTERNS       =
 
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
-# (namespaces, classes, functions, etc.) that should be excluded from the 
-# output. The symbol name can be a fully qualified name, a word, or if the 
-# wildcard * is used, a substring. Examples: ANamespace, AClass, 
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
 # AClass::ANamespace, ANamespace::*Test
 
-EXCLUDE_SYMBOLS        = 
+EXCLUDE_SYMBOLS        =
 
-# The EXAMPLE_PATH tag can be used to specify one or more files or 
-# directories that contain example code fragments that are included (see 
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
 # the \include command).
 
 EXAMPLE_PATH           = @CMAKE_CURRENT_SOURCE_DIR@/../UTests \
                          @CMAKE_CURRENT_SOURCE_DIR@/../Tests
 
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
-# and *.h) to filter out the source-files in the directories. If left 
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
 # blank all files are included.
 
 EXAMPLE_PATTERNS       = *
 
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
-# searched for input files to be used with the \include or \dontinclude 
-# commands irrespective of the value of the RECURSIVE tag. 
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
 # Possible values are YES and NO. If left blank NO is used.
 
 EXAMPLE_RECURSIVE      = YES
 
-# The IMAGE_PATH tag can be used to specify one or more files or 
-# directories that contain image that are included in the documentation (see 
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
 # the \image command).
 
-IMAGE_PATH             = 
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should 
-# invoke to filter for each input file. Doxygen will invoke the filter program 
-# by executing (via popen()) the command <filter> <input-file>, where <filter> 
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
-# input file. Doxygen will then use the output that the filter program writes 
-# to standard output.  If FILTER_PATTERNS is specified, this tag will be 
-# ignored.
-
-INPUT_FILTER           = 
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
-# basis.  Doxygen will compare the file name with each pattern and apply the 
-# filter if there is a match.  The filters are a list of the form: 
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
-# info on how filters are used. If FILTER_PATTERNS is empty or if 
+IMAGE_PATH             = @CMAKE_CURRENT_SOURCE_DIR@/../Doc/Image_dox/.
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be ignored.
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
 # non of the patterns match the file name, INPUT_FILTER is applied.
 
-FILTER_PATTERNS        = 
+FILTER_PATTERNS        =
 
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
-# INPUT_FILTER) will be used to filter the input files when producing source 
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
 # files to browse (i.e. when SOURCE_BROWSER is set to YES).
 
 FILTER_SOURCE_FILES    = NO
 
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file 
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) 
-# and it is also possible to disable source filtering for a specific pattern 
-# using *.ext= (so without naming a filter). This option only has effect when 
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
 # FILTER_SOURCE_FILES is enabled.
 
-FILTER_SOURCE_PATTERNS = 
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
 
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
 
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
-# be generated. Documented entities will be cross-referenced with these sources. 
-# Note: To get rid of all source code in the generated output, make sure also 
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
 # VERBATIM_HEADERS is set to NO.
 
 SOURCE_BROWSER         = NO
 
-# Setting the INLINE_SOURCES tag to YES will include the body 
+# Setting the INLINE_SOURCES tag to YES will include the body
 # of functions and classes directly in the documentation.
 
 INLINE_SOURCES         = NO
 
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
-# doxygen to hide any special comment blocks from generated source code 
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
 # fragments. Normal C, C++ and Fortran comments will always remain visible.
 
 STRIP_CODE_COMMENTS    = NO
 
-# If the REFERENCED_BY_RELATION tag is set to YES 
-# then for each documented function all documented 
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
 # functions referencing it will be listed.
 
 REFERENCED_BY_RELATION = NO
 
-# If the REFERENCES_RELATION tag is set to YES 
-# then for each documented function all documented entities 
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
 # called/used by that function will be listed.
 
 REFERENCES_RELATION    = NO
 
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 
-# link to the source code.  Otherwise they will link to the documentation.
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
 
 REFERENCES_LINK_SOURCE = NO
 
-# If the USE_HTAGS tag is set to YES then the references to source code 
-# will point to the HTML generated by the htags(1) tool instead of doxygen 
-# built-in source browser. The htags tool is part of GNU's global source 
-# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
 # will need version 4.8.6 or higher.
 
 USE_HTAGS              = NO
 
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
-# will generate a verbatim copy of the header file for each class for 
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
 # which an include is specified. Set to NO to disable this.
 
 VERBATIM_HEADERS       = NO
@@ -870,161 +883,171 @@ VERBATIM_HEADERS       = NO
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
 
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
-# of all compounds will be generated. Enable this if the project 
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
 # contains a lot of classes, structs, unions or interfaces.
 
 ALPHABETICAL_INDEX     = YES
 
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
 # in which this list will be split (can be a number in the range [1..20])
 
 COLS_IN_ALPHA_INDEX    = 5
 
-# In case all classes in a project start with a common prefix, all 
-# classes will be put under the same header in the alphabetical index. 
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
 # should be ignored while generating the index headers.
 
-IGNORE_PREFIX          = 
+IGNORE_PREFIX          =
 
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
 # generate HTML output.
 
 GENERATE_HTML          = YES
 
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
 # put in front of it. If left blank `html' will be used as the default path.
 
 HTML_OUTPUT            = html
 
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
 # doxygen will generate files with .html extension.
 
 HTML_FILE_EXTENSION    = .html
 
-# The HTML_HEADER tag can be used to specify a personal HTML header for 
-# each generated HTML page. If it is left blank doxygen will generate a 
-# standard header. Note that when using a custom header you are responsible  
-# for the proper inclusion of any scripts and style sheets that doxygen 
-# needs, which is dependent on the configuration options used. 
-# It is advised to generate a default header using "doxygen -w html 
-# header.html footer.html stylesheet.css YourConfigFile" and then modify 
-# that header. Note that the header is subject to change so you typically 
-# have to redo this when upgrading to a newer version of doxygen or when 
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+#  for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
 # changing the value of configuration settings such as GENERATE_TREEVIEW!
 
-HTML_HEADER            = 
+HTML_HEADER            =
 
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
-# each generated HTML page. If it is left blank doxygen will generate a 
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
 # standard footer.
 
-HTML_FOOTER            = 
+HTML_FOOTER            =
 
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
-# style sheet that is used by each HTML page. It can be used to 
-# fine-tune the look of the HTML output. If the tag is left blank doxygen 
-# will generate a default style sheet. Note that doxygen will try to copy 
-# the style sheet file to the HTML output directory, so don't put your own 
-# style sheet in the HTML output directory as well, or it will be erased!
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If left blank doxygen will
+# generate a default style sheet. Note that it is recommended to use
+# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
+# tag will in the future become obsolete.
 
-HTML_STYLESHEET        = 
+HTML_STYLESHEET        =
 
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or 
-# other source files which should be copied to the HTML output directory. Note 
-# that these files will be copied to the base HTML output directory. Use the 
-# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these 
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that 
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
+# user-defined cascading style sheet that is included after the standard
+# style sheets created by doxygen. Using this option one can overrule
+# certain style aspects. This is preferred over using HTML_STYLESHEET
+# since it does not replace the standard style sheet and is therefor more
+# robust against future updates. Doxygen will copy the style sheet file to
+# the output directory.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
 # the files will be copied as-is; there are no commands or markers available.
 
-HTML_EXTRA_FILES       = 
+HTML_EXTRA_FILES       =
 
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. 
-# Doxygen will adjust the colors in the style sheet and background images 
-# according to this color. Hue is specified as an angle on a colorwheel, 
-# see http://en.wikipedia.org/wiki/Hue for more information. 
-# For instance the value 0 represents red, 60 is yellow, 120 is green, 
-# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. 
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
 # The allowed range is 0 to 359.
 
 HTML_COLORSTYLE_HUE    = 220
 
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of 
-# the colors in the HTML output. For a value of 0 the output will use 
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
 # grayscales only. A value of 255 will produce the most vivid colors.
 
 HTML_COLORSTYLE_SAT    = 100
 
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to 
-# the luminance component of the colors in the HTML output. Values below 
-# 100 gradually make the output lighter, whereas values above 100 make 
-# the output darker. The value divided by 100 is the actual gamma applied, 
-# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, 
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
 # and 100 does not change the gamma.
 
 HTML_COLORSTYLE_GAMMA  = 80
 
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML 
-# page will contain the date and time when the page was generated. Setting 
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
 # this to NO can help when comparing the output of multiple runs.
 
 HTML_TIMESTAMP         = YES
 
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
-# documentation will contain sections that can be hidden and shown after the 
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
 # page has loaded.
 
 HTML_DYNAMIC_SECTIONS  = NO
 
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of 
-# entries shown in the various tree structured indices initially; the user 
-# can expand and collapse entries dynamically later on. Doxygen will expand 
-# the tree to such a level that at most the specified number of entries are 
-# visible (unless a fully collapsed tree already exceeds this amount). 
-# So setting the number of entries 1 will produce a full collapsed tree by 
-# default. 0 is a special value representing an infinite number of entries 
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
+# entries shown in the various tree structured indices initially; the user
+# can expand and collapse entries dynamically later on. Doxygen will expand
+# the tree to such a level that at most the specified number of entries are
+# visible (unless a fully collapsed tree already exceeds this amount).
+# So setting the number of entries 1 will produce a full collapsed tree by
+# default. 0 is a special value representing an infinite number of entries
 # and will result in a full expanded tree by default.
 
 HTML_INDEX_NUM_ENTRIES = 100
 
-# If the GENERATE_DOCSET tag is set to YES, additional index files 
-# will be generated that can be used as input for Apple's Xcode 3 
-# integrated development environment, introduced with OSX 10.5 (Leopard). 
-# To create a documentation set, doxygen will generate a Makefile in the 
-# HTML output directory. Running make will produce the docset in that 
-# directory and running "make install" will install the docset in 
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
-# it at startup. 
-# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html 
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
 # for more information.
 
 GENERATE_DOCSET        = NO
 
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 
-# feed. A documentation feed provides an umbrella under which multiple 
-# documentation sets from a single provider (such as a company or product suite) 
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
 # can be grouped.
 
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 
-# should uniquely identify the documentation set bundle. This should be a 
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
 # will append .docset to the name.
 
 DOCSET_BUNDLE_ID       = org.doxygen.Project
 
-# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify 
-# the documentation publisher. This should be a reverse domain-name style 
-# string, e.g. com.mycompany.MyDocSet.documentation.
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
+# identify the documentation publisher. This should be a reverse domain-name
+# style string, e.g. com.mycompany.MyDocSet.documentation.
 
 DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 
@@ -1032,314 +1055,374 @@ DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
 
 DOCSET_PUBLISHER_NAME  = Publisher
 
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
-# will be generated that can be used as input for tools like the 
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
 # of the generated HTML documentation.
 
 GENERATE_HTMLHELP      = NO
 
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
-# be used to specify the file name of the resulting .chm file. You 
-# can add a path in front of the file if the result should not be 
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
 # written to the html output directory.
 
-CHM_FILE               = 
+CHM_FILE               =
 
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
-# be used to specify the location (absolute path including file name) of 
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
 # the HTML help compiler on the generated index.hhp.
 
-HHC_LOCATION           = 
+HHC_LOCATION           =
 
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
-# controls if a separate .chi index file is generated (YES) or that 
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
 # it should be included in the master .chm file (NO).
 
 GENERATE_CHI           = NO
 
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file 
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
 # content.
 
-CHM_INDEX_ENCODING     = 
+CHM_INDEX_ENCODING     =
 
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
-# controls whether a binary table of contents is generated (YES) or a 
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
 # normal table of contents (NO) in the .chm file.
 
 BINARY_TOC             = NO
 
-# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
 # to the contents of the HTML help documentation and to the tree view.
 
 TOC_EXPAND             = NO
 
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and 
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated 
-# that can be used as input for Qt's qhelpgenerator to generate a 
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
 # Qt Compressed Help (.qch) of the generated HTML documentation.
 
 GENERATE_QHP           = NO
 
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can 
-# be used to specify the file name of the resulting .qch file. 
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
 # The path specified is relative to the HTML output folder.
 
-QCH_FILE               = 
+QCH_FILE               =
 
-# The QHP_NAMESPACE tag specifies the namespace to use when generating 
-# Qt Help Project output. For more information please see 
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
 # http://doc.trolltech.com/qthelpproject.html#namespace
 
 QHP_NAMESPACE          = org.doxygen.Project
 
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 
-# Qt Help Project output. For more information please see 
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
 # http://doc.trolltech.com/qthelpproject.html#virtual-folders
 
 QHP_VIRTUAL_FOLDER     = doc
 
-# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to 
-# add. For more information please see 
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
 # http://doc.trolltech.com/qthelpproject.html#custom-filters
 
-QHP_CUST_FILTER_NAME   = 
+QHP_CUST_FILTER_NAME   =
 
-# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the 
-# custom filter to add. For more information please see 
-# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> 
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
 # Qt Help Project / Custom Filters</a>.
 
-QHP_CUST_FILTER_ATTRS  = 
+QHP_CUST_FILTER_ATTRS  =
 
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this 
-# project's 
-# filter section matches. 
-# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> 
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
 # Qt Help Project / Filter Attributes</a>.
 
-QHP_SECT_FILTER_ATTRS  = 
+QHP_SECT_FILTER_ATTRS  =
 
-# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 
-# be used to specify the location of Qt's qhelpgenerator. 
-# If non-empty doxygen will try to run qhelpgenerator on the generated 
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
 # .qhp file.
 
-QHG_LOCATION           = 
+QHG_LOCATION           =
 
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files  
-# will be generated, which together with the HTML files, form an Eclipse help 
-# plugin. To install this plugin and make it available under the help contents 
-# menu in Eclipse, the contents of the directory containing the HTML and XML 
-# files needs to be copied into the plugins directory of eclipse. The name of 
-# the directory within the plugins directory should be the same as 
-# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before 
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
 # the help appears.
 
 GENERATE_ECLIPSEHELP   = NO
 
-# A unique identifier for the eclipse help plugin. When installing the plugin 
-# the directory name containing the HTML and XML files should also have 
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
 # this name.
 
 ECLIPSE_DOC_ID         = org.doxygen.Project
 
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) 
-# at top of each HTML page. The value NO (the default) enables the index and 
-# the value YES disables it. Since the tabs have the same information as the 
-# navigation tree you can set this option to NO if you already set 
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
 # GENERATE_TREEVIEW to YES.
 
 DISABLE_INDEX          = NO
 
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 
-# structure should be generated to display hierarchical information. 
-# If the tag value is set to YES, a side panel will be generated 
-# containing a tree-like index structure (just like the one that 
-# is generated for HTML Help). For this to work a browser that supports 
-# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). 
-# Windows users are probably better off using the HTML help feature. 
-# Since the tree basically has the same information as the tab index you 
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
 # could consider to set DISABLE_INDEX to NO when enabling this option.
 
 GENERATE_TREEVIEW      = YES
 
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values 
-# (range [0,1..20]) that doxygen will group on one line in the generated HTML 
-# documentation. Note that a value of 0 will completely suppress the enum 
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
 # values from appearing in the overview section.
 
 ENUM_VALUES_PER_LINE   = 4
 
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
-# used to set the initial width (in pixels) of the frame in which the tree 
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
 # is shown.
 
 TREEVIEW_WIDTH         = 250
 
-# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open 
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
 # links to external symbols imported via tag files in a separate window.
 
 EXT_LINKS_IN_WINDOW    = NO
 
-# Use this tag to change the font size of Latex formulas included 
-# as images in the HTML documentation. The default is 10. Note that 
-# when you change the font size after a successful doxygen run you need 
-# to manually remove any form_*.png images from the HTML output directory 
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
 # to force them to be regenerated.
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images 
-# generated for formulas are transparent PNGs. Transparent PNGs are 
-# not supported properly for IE 6.0, but are supported on all modern browsers. 
-# Note that when changing this option you need to delete any form_*.png files 
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
 # in the HTML output before the changes have effect.
 
 FORMULA_TRANSPARENT    = YES
 
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax 
-# (see http://www.mathjax.org) which uses client side Javascript for the 
-# rendering instead of using prerendered bitmaps. Use this if you do not 
-# have LaTeX installed or if you want to formulas look prettier in the HTML 
-# output. When enabled you may also need to install MathJax separately and 
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you may also need to install MathJax separately and
 # configure the path to it using the MATHJAX_RELPATH option.
 
 USE_MATHJAX            = NO
 
-# When MathJax is enabled you need to specify the location relative to the 
-# HTML output directory using the MATHJAX_RELPATH option. The destination 
-# directory should contain the MathJax.js script. For instance, if the mathjax 
-# directory is located at the same level as the HTML output directory, then 
-# MATHJAX_RELPATH should be ../mathjax. The default value points to 
-# the MathJax Content Delivery Network so you can quickly see the result without 
-# installing MathJax.  However, it is strongly recommended to install a local 
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and
+# SVG. The default value is HTML-CSS, which is slower, but has the best
+# compatibility.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to
+# the MathJax Content Delivery Network so you can quickly see the result without
+# installing MathJax.
+# However, it is strongly recommended to install a local
 # copy of MathJax from http://www.mathjax.org before deployment.
 
 MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
 
-# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension 
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
 # names that should be enabled during MathJax rendering.
 
-MATHJAX_EXTENSIONS     = 
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript
+# pieces of code that will be used on startup of the MathJax code.
 
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box 
-# for the HTML output. The underlying search engine uses javascript 
-# and DHTML and should work on any modern browser. Note that when using 
-# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets 
-# (GENERATE_DOCSET) there is already a search function so this one should 
-# typically be disabled. For large projects the javascript based search engine 
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
 # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
 
 SEARCHENGINE           = NO
 
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be 
-# implemented using a PHP enabled web server instead of at the web client 
-# using Javascript. Doxygen will generate the search PHP script and index 
-# file to put on the web server. The advantage of the server 
-# based approach is that it scales better to large projects and allows 
-# full text search. The disadvantages are that it is more difficult to setup 
-# and does not have live searching capabilities.
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript.
+# There are two flavours of web server based search depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools.
+# See the manual for details.
 
 SERVER_BASED_SEARCH    = NO
 
+# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain
+# the search results. Doxygen ships with an example indexer (doxyindexer) and
+# search engine (doxysearch.cgi) which are based on the open source search
+# engine library Xapian. See the manual for configuration details.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will returned the search results when EXTERNAL_SEARCH is enabled.
+# Doxygen ships with an example search engine (doxysearch) which is based on
+# the open source search engine library Xapian. See the manual for configuration
+# details.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id
+# of to a relative location where the documentation can be found.
+# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ...
+
+EXTRA_SEARCH_MAPPINGS  =
+
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
 # generate Latex output.
 
 GENERATE_LATEX         = NO
 
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
 # put in front of it. If left blank `latex' will be used as the default path.
 
 LATEX_OUTPUT           = latex
 
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
-# invoked. If left blank `latex' will be used as the default command name. 
-# Note that when enabling USE_PDFLATEX this option is only used for 
-# generating bitmaps for formulas in the HTML output, but not in the 
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
 # Makefile that is written to the output directory.
 
 LATEX_CMD_NAME         = latex
 
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
-# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
 # default command name.
 
 MAKEINDEX_CMD_NAME     = makeindex
 
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
-# LaTeX documents. This may be useful for small projects and may help to 
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
 # save some trees in general.
 
 COMPACT_LATEX          = NO
 
-# The PAPER_TYPE tag can be used to set the paper type that is used 
-# by the printer. Possible values are: a4, letter, legal and 
-# executive. If left blank a4wide will be used.
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4 will be used.
 
 PAPER_TYPE             = a4wide
 
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
 # packages that should be included in the LaTeX output.
 
-EXTRA_PACKAGES         = 
+EXTRA_PACKAGES         =
 
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
-# the generated latex document. The header should contain everything until 
-# the first chapter. If it is left blank doxygen will generate a 
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
 # standard header. Notice: only use this tag if you know what you are doing!
 
-LATEX_HEADER           = 
+LATEX_HEADER           =
 
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for 
-# the generated latex document. The footer should contain everything after 
-# the last chapter. If it is left blank doxygen will generate a 
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
 # standard footer. Notice: only use this tag if you know what you are doing!
 
-LATEX_FOOTER           = 
+LATEX_FOOTER           =
 
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
-# contain links (just like the HTML output) instead of page references 
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images
+# or other source files which should be copied to the LaTeX output directory.
+# Note that the files will be copied as-is; there are no commands or markers
+# available.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
 # This makes the output suitable for online browsing using a pdf viewer.
 
 PDF_HYPERLINKS         = YES
 
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
-# plain latex in the generated Makefile. Set this option to YES to get a 
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
 # higher quality PDF documentation.
 
 USE_PDFLATEX           = YES
 
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
-# command to the generated LaTeX files. This will instruct LaTeX to keep 
-# running if errors occur, instead of asking the user for help. 
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
 # This option is also used when generating formulas in HTML.
 
 LATEX_BATCHMODE        = NO
 
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
-# include the index chapters (such as File Index, Compound Index, etc.) 
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
 # in the output.
 
 LATEX_HIDE_INDICES     = NO
 
-# If LATEX_SOURCE_CODE is set to YES then doxygen will include 
-# source code with syntax highlighting in the LaTeX output. 
-# Note that which sources are shown also depends on other settings 
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
 # such as SOURCE_BROWSER.
 
 LATEX_SOURCE_CODE      = NO
 
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the 
-# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See 
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
 # http://en.wikipedia.org/wiki/BibTeX for more info.
 
 LATEX_BIB_STYLE        = plain
@@ -1348,68 +1431,68 @@ LATEX_BIB_STYLE        = plain
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
-# The RTF output is optimized for Word 97 and may not look very pretty with 
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
 # other RTF readers or editors.
 
 GENERATE_RTF           = NO
 
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
 # put in front of it. If left blank `rtf' will be used as the default path.
 
 RTF_OUTPUT             = rtf
 
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
-# RTF documents. This may be useful for small projects and may help to 
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
 # save some trees in general.
 
 COMPACT_RTF            = NO
 
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
-# will contain hyperlink fields. The RTF file will 
-# contain links (just like the HTML output) instead of page references. 
-# This makes the output suitable for online browsing using WORD or other 
-# programs which support those fields. 
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
 # Note: wordpad (write) and others do not support links.
 
 RTF_HYPERLINKS         = NO
 
-# Load style sheet definitions from file. Syntax is similar to doxygen's 
-# config file, i.e. a series of assignments. You only have to provide 
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
 # replacements, missing definitions are set to their default value.
 
-RTF_STYLESHEET_FILE    = 
+RTF_STYLESHEET_FILE    =
 
-# Set optional variables used in the generation of an rtf document. 
+# Set optional variables used in the generation of an rtf document.
 # Syntax is similar to doxygen's config file.
 
-RTF_EXTENSIONS_FILE    = 
+RTF_EXTENSIONS_FILE    =
 
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
 # generate man pages
 
 GENERATE_MAN           = NO
 
-# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
 # put in front of it. If left blank `man' will be used as the default path.
 
 MAN_OUTPUT             = man
 
-# The MAN_EXTENSION tag determines the extension that is added to 
+# The MAN_EXTENSION tag determines the extension that is added to
 # the generated man pages (default is the subroutine's section .3)
 
 MAN_EXTENSION          = .3
 
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
-# then it will generate one additional man file for each entity 
-# documented in the real man page(s). These additional files 
-# only source the real man page, but without them the man command 
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
 # would be unable to find the correct page. The default is NO.
 
 MAN_LINKS              = NO
@@ -1418,45 +1501,60 @@ MAN_LINKS              = NO
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_XML tag is set to YES Doxygen will 
-# generate an XML file that captures the structure of 
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
 # the code including all documentation.
 
 GENERATE_XML           = NO
 
-# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
 # put in front of it. If left blank `xml' will be used as the default path.
 
 XML_OUTPUT             = xml
 
-# The XML_SCHEMA tag can be used to specify an XML schema, 
-# which can be used by a validating XML parser to check the 
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
 # syntax of the XML files.
 
-XML_SCHEMA             = 
+XML_SCHEMA             =
 
-# The XML_DTD tag can be used to specify an XML DTD, 
-# which can be used by a validating XML parser to check the 
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
 # syntax of the XML files.
 
-XML_DTD                = 
+XML_DTD                =
 
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
-# dump the program listings (including syntax highlighting 
-# and cross-referencing information) to the XML output. Note that 
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
 # enabling this will significantly increase the size of the XML output.
 
 XML_PROGRAMLISTING     = YES
 
+#---------------------------------------------------------------------------
+# configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files
+# that can be used to generate PDF.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it. If left blank docbook will be used as the default path.
+
+DOCBOOK_OUTPUT         = docbook
+
 #---------------------------------------------------------------------------
 # configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
-# generate an AutoGen Definitions (see autogen.sf.net) file 
-# that captures the structure of the code including all 
-# documentation. Note that this feature is still experimental 
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
 # and incomplete at the moment.
 
 GENERATE_AUTOGEN_DEF   = NO
@@ -1465,97 +1563,99 @@ GENERATE_AUTOGEN_DEF   = NO
 # configuration options related to the Perl module output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
-# generate a Perl module file that captures the structure of 
-# the code including all documentation. Note that this 
-# feature is still experimental and incomplete at the 
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
 # moment.
 
 GENERATE_PERLMOD       = NO
 
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
 # to generate PDF and DVI output from the Perl module output.
 
 PERLMOD_LATEX          = NO
 
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
-# nicely formatted so it can be parsed by a human reader.  This is useful 
-# if you want to understand what is going on.  On the other hand, if this 
-# tag is set to NO the size of the Perl module output will be much smaller 
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
 # and Perl will parse it just the same.
 
 PERLMOD_PRETTY         = YES
 
-# The names of the make variables in the generated doxyrules.make file 
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
-# This is useful so different doxyrules.make files included by the same 
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
 # Makefile don't overwrite each other's variables.
 
-PERLMOD_MAKEVAR_PREFIX = 
+PERLMOD_MAKEVAR_PREFIX =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
-# evaluate all C-preprocessor directives found in the sources and include 
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
 # files.
 
 ENABLE_PREPROCESSING   = YES
 
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
-# names in the source code. If set to NO (the default) only conditional 
-# compilation will be performed. Macro expansion can be done in a controlled 
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
 # way by setting EXPAND_ONLY_PREDEF to YES.
 
 MACRO_EXPANSION        = NO
 
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
-# then the macro expansion is limited to the macros specified with the 
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
 # PREDEFINED and EXPAND_AS_DEFINED tags.
 
 EXPAND_ONLY_PREDEF     = NO
 
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
 # pointed to by INCLUDE_PATH will be searched when a #include is found.
 
 SEARCH_INCLUDES        = YES
 
-# The INCLUDE_PATH tag can be used to specify one or more directories that 
-# contain include files that are not input files but should be processed by 
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
 # the preprocessor.
 
-INCLUDE_PATH           = 
+INCLUDE_PATH           =
 
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
-# patterns (like *.h and *.hpp) to filter out the header-files in the 
-# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
 # be used.
 
-INCLUDE_FILE_PATTERNS  = 
+INCLUDE_FILE_PATTERNS  =
 
-# The PREDEFINED tag can be used to specify one or more macro names that 
-# are defined before the preprocessor is started (similar to the -D option of 
-# gcc). The argument of the tag is a list of macros of the form: name 
-# or name=definition (no spaces). If the definition and the = are 
-# omitted =1 is assumed. To prevent a macro definition from being 
-# undefined via #undef or recursively expanded use the := operator 
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
 # instead of the = operator.
 
-PREDEFINED             = 
+PREDEFINED             =
 
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
-# this tag can be used to specify a list of macro names that should be expanded. 
-# The macro definition that is found in the sources will be used. 
-# Use the PREDEFINED tag if you want to use a different macro definition that 
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
 # overrules the definition found in the source code.
 
-EXPAND_AS_DEFINED      = 
+EXPAND_AS_DEFINED      =
 
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
-# doxygen's preprocessor will remove all references to function-like macros 
-# that are alone on a line, have an all uppercase name, and do not end with a 
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
 # semicolon, because these will confuse the parser if not removed.
 
 SKIP_FUNCTION_MACROS   = YES
@@ -1564,37 +1664,45 @@ SKIP_FUNCTION_MACROS   = YES
 # Configuration::additions related to external references
 #---------------------------------------------------------------------------
 
-# The TAGFILES option can be used to specify one or more tagfiles. For each 
-# tag file the location of the external documentation should be added. The 
-# format of a tag file without this location is as follows: 
-#   TAGFILES = file1 file2 ... 
-# Adding location for the tag files is done as follows: 
-#   TAGFILES = file1=loc1 "file2 = loc2" ... 
-# where "loc1" and "loc2" can be relative or absolute paths 
-# or URLs. Note that each tag file must have a unique name (where the name does 
-# NOT include the path). If a tag file is not located in the directory in which 
+# The TAGFILES option can be used to specify one or more tagfiles. For each
+# tag file the location of the external documentation should be added. The
+# format of a tag file without this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths
+# or URLs. Note that each tag file must have a unique name (where the name does
+# NOT include the path). If a tag file is not located in the directory in which
 # doxygen is run, you must also specify the path to the tagfile here.
 
-TAGFILES               = 
+TAGFILES               =
 
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
 # a tag file that is based on the input files it reads.
 
 GENERATE_TAGFILE       = scalfmm.tag
 
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
-# in the class index. If set to NO only the inherited external classes 
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
 # will be listed.
 
 ALLEXTERNALS           = NO
 
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
-# in the modules index. If set to NO, only the current project's groups will 
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
 # be listed.
 
 EXTERNAL_GROUPS        = YES
 
-# The PERL_PATH should be the absolute path and name of the perl script 
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed
+# in the related pages index. If set to NO, only the current project's
+# pages will be listed.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
 # interpreter (i.e. the result of `which perl').
 
 PERL_PATH              = /usr/bin/perl
@@ -1603,222 +1711,222 @@ PERL_PATH              = /usr/bin/perl
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
-# or super classes. Setting the tag to NO turns the diagrams off. Note that 
-# this option also works with HAVE_DOT disabled, but it is recommended to 
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
 # install and use dot, since it yields more powerful graphs.
 
 CLASS_DIAGRAMS         = YES
 
-# You can define message sequence charts within doxygen comments using the \msc 
-# command. Doxygen will then run the mscgen tool (see 
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where 
-# the mscgen tool resides. If left empty the tool is assumed to be found in the 
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
 # default search path.
 
-MSCGEN_PATH            = 
+MSCGEN_PATH            =
 
-# If set to YES, the inheritance and collaboration graphs will hide 
-# inheritance and usage relations if the target is undocumented 
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
 # or is not a class.
 
 HIDE_UNDOC_RELATIONS   = YES
 
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
-# available from the path. This tool is part of Graphviz, a graph visualization 
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
 # have no effect if this option is set to NO (the default)
 
 HAVE_DOT               = YES
 
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is 
-# allowed to run in parallel. When set to 0 (the default) doxygen will 
-# base this on the number of processors available in the system. You can set it 
-# explicitly to a value larger than 0 to get control over the balance 
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
 # between CPU load and processing speed.
 
 DOT_NUM_THREADS        = 0
 
-# By default doxygen will use the Helvetica font for all dot files that 
-# doxygen generates. When you want a differently looking font you can specify 
-# the font name using DOT_FONTNAME. You need to make sure dot is able to find 
-# the font, which can be done by putting it in a standard location or by setting 
-# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the 
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
 # directory containing the font.
 
 DOT_FONTNAME           = FreeSans.ttf
 
-# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
 # The default size is 10pt.
 
 DOT_FONTSIZE           = 10
 
-# By default doxygen will tell dot to use the Helvetica font. 
-# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to 
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
 # set the path where dot can find it.
 
-DOT_FONTPATH           = 
+DOT_FONTPATH           =
 
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
-# will generate a graph for each documented class showing the direct and 
-# indirect inheritance relations. Setting this tag to YES will force the 
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
 # CLASS_DIAGRAMS tag to NO.
 
 CLASS_GRAPH            = YES
 
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
-# will generate a graph for each documented class showing the direct and 
-# indirect implementation dependencies (inheritance, containment, and 
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
 # class references variables) of the class with other documented classes.
 
 COLLABORATION_GRAPH    = YES
 
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
 # will generate a graph for groups, showing the direct groups dependencies
 
 GROUP_GRAPHS           = YES
 
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
-# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
 # Language.
 
 UML_LOOK               = YES
 
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside 
-# the class node. If there are many fields or methods and many nodes the 
-# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS 
-# threshold limits the number of items for each type to make the size more 
-# managable. Set this to 0 for no limit. Note that the threshold may be 
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside
+# the class node. If there are many fields or methods and many nodes the
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
+# threshold limits the number of items for each type to make the size more
+# manageable. Set this to 0 for no limit. Note that the threshold may be
 # exceeded by 50% before the limit is enforced.
 
 UML_LIMIT_NUM_FIELDS   = 10
 
-# If set to YES, the inheritance and collaboration graphs will show the 
+# If set to YES, the inheritance and collaboration graphs will show the
 # relations between templates and their instances.
 
 TEMPLATE_RELATIONS     = YES
 
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
-# tags are set to YES then doxygen will generate a graph for each documented 
-# file showing the direct and indirect include dependencies of the file with 
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
 # other documented files.
 
 INCLUDE_GRAPH          = YES
 
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
-# documented header file showing the documented files that directly or 
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
 # indirectly include this file.
 
 INCLUDED_BY_GRAPH      = NO
 
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then 
-# doxygen will generate a call dependency graph for every global function 
-# or class method. Note that enabling this option will significantly increase 
-# the time of a run. So in most cases it will be better to enable call graphs 
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
 # for selected functions only using the \callgraph command.
 
 CALL_GRAPH             = NO
 
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 
-# doxygen will generate a caller dependency graph for every global function 
-# or class method. Note that enabling this option will significantly increase 
-# the time of a run. So in most cases it will be better to enable caller 
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
 # graphs for selected functions only using the \callergraph command.
 
 CALLER_GRAPH           = NO
 
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
 # will generate a graphical hierarchy of all classes instead of a textual one.
 
 GRAPHICAL_HIERARCHY    = YES
 
-# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES 
-# then doxygen will show the dependencies a directory has on other directories 
-# in a graphical way. The dependency relations are determined by the #include 
+# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
 # relations between the files in the directories.
 
 DIRECTORY_GRAPH        = YES
 
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
-# generated by dot. Possible values are svg, png, jpg, or gif. 
-# If left blank png will be used. If you choose svg you need to set 
-# HTML_FILE_EXTENSION to xhtml in order to make the SVG files 
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
 # visible in IE 9+ (other browsers do not have this requirement).
 
 DOT_IMAGE_FORMAT       = png
 
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to 
-# enable generation of interactive SVG images that allow zooming and panning. 
-# Note that this requires a modern browser other than Internet Explorer. 
-# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you 
-# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files 
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
 # visible. Older versions of IE do not have SVG support.
 
 INTERACTIVE_SVG        = NO
 
-# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
 # found. If left blank, it is assumed the dot tool can be found in the path.
 
-DOT_PATH               = 
+DOT_PATH               =
 
-# The DOTFILE_DIRS tag can be used to specify one or more directories that 
-# contain dot files that are included in the documentation (see the 
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
 # \dotfile command).
 
-DOTFILE_DIRS           = 
+DOTFILE_DIRS           =
 
-# The MSCFILE_DIRS tag can be used to specify one or more directories that 
-# contain msc files that are included in the documentation (see the 
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
 # \mscfile command).
 
-MSCFILE_DIRS           = 
+MSCFILE_DIRS           =
 
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
-# nodes that will be shown in the graph. If the number of nodes in a graph 
-# becomes larger than this value, doxygen will truncate the graph, which is 
-# visualized by representing a node as a red box. Note that doxygen if the 
-# number of direct children of the root node in a graph is already larger than 
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
 # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
 
 DOT_GRAPH_MAX_NODES    = 50
 
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
-# graphs generated by dot. A depth value of 3 means that only nodes reachable 
-# from the root by following a path via at most 3 edges will be shown. Nodes 
-# that lay further from the root node will be omitted. Note that setting this 
-# option to 1 or 2 may greatly reduce the computation time needed for large 
-# code bases. Also note that the size of a graph can be further restricted by 
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
 # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
 
 MAX_DOT_GRAPH_DEPTH    = 0
 
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
-# background. This is disabled by default, because dot on Windows does not 
-# seem to support this out of the box. Warning: Depending on the platform used, 
-# enabling this option may lead to badly anti-aliased labels on the edges of 
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
 # a graph (i.e. they become hard to read).
 
 DOT_TRANSPARENT        = NO
 
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
-# files in one run (i.e. multiple -o and -T options on the command line). This 
-# makes dot run faster, but since only newer versions of dot (>1.8.10) 
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
 # support this, this feature is disabled by default.
 
 DOT_MULTI_TARGETS      = NO
 
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
-# generate a legend page explaining the meaning of the various boxes and 
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
 # arrows in the dot generated graphs.
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
-# remove the intermediate dot files that are used to generate 
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
 # the various graphs.
 
 DOT_CLEANUP            = YES
diff --git a/Doc/Image_dox/Classes.png b/Doc/Image_dox/Classes.png
new file mode 100755
index 0000000000000000000000000000000000000000..fdb09b88e3f853c4a7a8000558b7d03af043bead
Binary files /dev/null and b/Doc/Image_dox/Classes.png differ
diff --git a/Doc/Image_dox/interactionList.png b/Doc/Image_dox/interactionList.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2f8b99cee7401dfa3b94acbea2280b27f6fa0d5
Binary files /dev/null and b/Doc/Image_dox/interactionList.png differ
diff --git a/Doc/ParallelDetails.pdf b/Doc/ParallelDetails.pdf
index 8455adc002e7acbf5d5a3a872e97d37a020cbdde..e5fc94b6da71a4270db3eb89b806a3fccdb45759 100755
Binary files a/Doc/ParallelDetails.pdf and b/Doc/ParallelDetails.pdf differ
diff --git a/Doc/ParallelDetails.tex b/Doc/ParallelDetails.tex
deleted file mode 100755
index 415c8dbeb422c4821c2fe2b2c08e81a9a13ba6b7..0000000000000000000000000000000000000000
--- a/Doc/ParallelDetails.tex
+++ /dev/null
@@ -1,383 +0,0 @@
-\documentclass[12pt,letterpaper,titlepage]{report}
-\usepackage{algorithm2e}
-\usepackage{listings}
-\usepackage{geometry}
-\usepackage{graphicx}
-\usepackage[hypertexnames=false, pdftex]{hyperref}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% use:$ pdflatex ParallelDetails.tex
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\author{Berenger Bramas}
-\title{ScalFmm - Parallel Algorithms (Draft)}
-\date{August 11, 2011}
-
-%% Package config
-\lstset{language=c++, frame=lines}
-\restylealgo{boxed}
-\geometry{scale=0.8, nohead}
-\hypersetup{ colorlinks = true, linkcolor = black, urlcolor = blue, citecolor = blue }
-%% Remove introduction numbering
-\setcounter{secnumdepth}{-1}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{document}
-\maketitle{}
-\newpage
-\tableofcontents
-\newpage
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Introduction}
-In this document we introduce the principles and the algorithms used in our library to run in a distributed environment using MPI.
-The algorithms in this document may not be up to date comparing to those used in the code.
-We advise to check the version of this document and the code to have the latest available.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\chapter{Building the tree in Parallel}
-\section{Description}
-The main motivation to create a distributed version of the FMM is to run large simulations.
-These ones contain more particles than a computer can host which involves using several computers.
-Moreover, it is not reasonable to ask a master process to load an entire file and to dispatch the data to others processes. Without being able to know the entire tree it may send randomly the data to the slaves.
-To override this situation, our solution can be viewed as a two steps process.
-First, each node loads a part of the file to possess several particles.
-After this task, each node can compute the Morton index for the particles he had loaded.
-The Morton index of a particle depends of the system properties but also of the tree height.
-If we want to choose the tree height and the number of nodes at run time then we cannot pre-process the file.
-The second step is a parallel sort based on the Morton index between all nodes with a balancing operation at the end.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Load a file in parallel}
-We use the MPI $I/O$ functions to split a file between all the mpi processes.
-The prerequisite to make the splitting easier is to have a binary file.
-Thereby, using a very basic formula each node knows which part of the file it needs to load.
-\begin{equation}
-size per proc \leftarrow \left (file size - header size \right ) / nbprocs
-\end{equation}
-\begin{equation}
-offset \leftarrow header size + size per proc .\left ( rank - 1 \right )
-\end{equation}
-\newline
-We do not use the view system to read that data as it is used to write. The MPI\_File\_read is called as described in the fallowing $C++$ code.
-\begin{lstlisting}
-// From FMpiFmaLoader
-MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal),
-                 particles, int(bufsize), MPI_FLOAT, &status);
-\end{lstlisting}
-Our files are composed by a header fallowing by all the particles.
-The header enables to check several properties as the precision of the file.
-Finally, a particle is represented by four decimal values: a position and a physical value.
-\newline
-\underline{Remark:} The MPI IO function do not work if we use a MPI\_Initthread(MPI\_THREAD\_MULTIPLE) and a version above 1.5.1.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Sorting the particles}
-Once each node has a set of particles we need to sort them.
-This problem boils down to a simple parallel sort where Morton index are used to compare particles.
-We use two different approaches to sort the data.
-In the next version of scalfmm the less efficient method should be deleted.
-
-\subsection{Using QuickSort}
-A first approach is to use a famous sorting algorithm.
-We choose to use the quick sort algorithm because the distributed and the shared memory approaches are mostly similar.
-Our implementation is based on the algorithm described in \cite{itpc03}.
-The efficiency of this algorithm depends roughly of the pivot choice.
-In fact, a wrong idea of the parallel quick sort is to think that each process first sort their particles using quick sort and then use a merge sort to share their results.
-Instead, the nodes choose a common pivot and progress for one quick sort iteration together.
-From that point all process has an array with a left part where all values are lower than the pivot and a right part where all values are upper or equal than the pivot.
-Then, the nodes exchange data and some of them will work on the lower part and the other on the upper parts until there is one process for a part.
-At this point, the process performs a shared memory quick sort.
-To choose the pivot we tried to use an average of all the data hosted by the nodes:
-\newline
-\begin{algorithm}[H]
-\linesnumbered
-\SetLine
-\KwResult{A Morton index as next iteration pivot}
-\BlankLine
-myFirstIndex $\leftarrow$ particles$[0]$.index\;
-allFirstIndexes = MortonIndex$[nbprocs]$\;
-allGather(myFirstIndex, allFirstIndexes)\;
-pivot $\leftarrow$ Sum(allFirstIndexes(:) / nbprocs)\;
-\BlankLine
-\caption{Choosing the QS pivot}
-\end{algorithm}
-\newline
-A bug was made when at the beginning, we did an average by summing all the values first and dividing after. But the Morton index may be extremly high, so we need to to divide all the value before performing the sum.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Using a Sorting Network}
-In \cite{ptttplwaefmm11}, a proposition has been made to sort the data using a sorting network.
-We implemented a such sorting algorithm but the result were not extremly efficient.
-Contrary to Quick sort, a sorting network is extremly stable and all the nodes performs similar work.
-The quick sort is pivot dependant and some nodes may work much more than other.
-But, the average case the quick sort enable higher efficiency.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{Using an intermediate Octree}
-The second approach uses an octree to sort the particles in each process instead of a sorting algorithm.
-The time complexity is equivalent but it needs more memory since it is not done in place.
-After inserting the particles in the tree, we can iterate at the leaves level and access to the particles in an ordered way.
-Then, the processes are doing a minimum and a maximum reduction to know the real Morton interval of the system.
-By building the system interval in term of Morton index, the nodes cannot know the data scattering.
-Finally, the processes split the interval in a uniform manner and exchange data with $P^{2}$ communication in the worst case.
-\newline
-\newline
-In both approaches the data may not be balanced at the end.
-In fact, the first method is pivot dependent and the second consider that the data are uniformly distributed.
-That is the reason why we need to balance the data among nodes.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Balancing the leaves}
-After sorting, each process has potentially several leaves.
-If we have two processes $P_{i}$ and $P_{j}$ with $i < j$ the sort guarantees that all leaves from node i are inferior than the leaves on the node j in a Morton indexing way.
-But the leaves are randomly distributed among the nodes and we need to balance them.
-It is a simple reordoring of the data, but the data has to stayed sorted.
-
-\begin{enumerate}
-\item Each process informs other to tell how many leaves it holds.
-\item Each process compute how many leaves it has to send or to receive from left or right.
-\end{enumerate}
-At the end of the algorithm our system is completely balanced with the same number of leaves on each process.
-
-\begin{figure}[h!]
-\begin{center}
-\includegraphics[width=15cm, height=15cm, keepaspectratio=true]{Balance.png}
-\caption{Balancing Example}
-\end{center}
-\end{figure}
-
-A process has to send data to the left if its current left limit is upper than its objective limit.
-Same in the other side, and we can reverse the calculs to know if a process has to received data.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\chapter{Simple operators: P2M, M2M, L2L}
-We present the different FMM operators in two separated parts depending on their parallel complexity.
-In this first part, we present the three simplest operators P2M, M2M and L2L.
-Their simplicity is explained by the possible prediction to know which node hosts a cell and how to organize the communication.
-
-\section{P2M}
-The P2M still unchanged from the sequential approach to the distributed memory algorithm.
-In fact, in the sequential model we compute a P2M between all particles of a leaf and this leaf which is also a cell.
-Although, a leaf and the particles it hosts belong to only one node so doing the P2M operator do not require any information from another node.
-From that point, using the shared memory operator makes sense.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{M2M}
-During the upward pass information moves from a level to the upper one.
-The problem in a distributed memory model is that one cell can exist in several trees i.e. in several nodes.
-Because the M2M operator computes the relation between a cell and its child, the nodes which have a cell in common need to share information.
-Moreover, we have to decide which process will be responsible of the computation if the cell is present on more than one node.
-We have decided that the node with the smallest rank has the responsibility to compute the M2M and propagate the value for the future operations.
-Despite the fact that others processes are not computing this cell, they have to send the child of this shared cell to the responsible node.
-We can establish some rules and some properties of the communication during this operation.
-In fact, at each iteration a process never needs to send more than 7 cells, also a process never needs to receive more than 7 cells.
-The shared cells are always at extremities and one process cannot be designed to be the responsible of more than one shared cell at a level.
-
-\begin{figure}[h!]
-\begin{center}
-\includegraphics[width=14cm, height=7cm, keepaspectratio=true]{ruleillu.jpg}
-\caption{Potential Conflicts}
-\end{center}
-\end{figure}
-
-\begin{algorithm}[H]
-\restylealgo{boxed}
-\linesnumbered
-\SetLine
-\KwData{none}
-\KwResult{none}
-\BlankLine
-\For{idxLevel $\leftarrow$ $Height - 2$ \KwTo 1}{
-        \ForAll{Cell c at level idxLevel}{
-                M2M(c, c.child)\;
-        }
-}
-\BlankLine
-\caption{Traditional M2M}
-\end{algorithm}
-\begin{algorithm}[H]
-\restylealgo{boxed}
-\linesnumbered
-\SetLine
-\KwData{none}
-\KwResult{none}
-\BlankLine
-\For{idxLevel $\leftarrow$ $Height - 2$ \KwTo 1}{
-        \uIf{$cells[0]$ not in my working interval}{
-                isend($cells[0].child$)\;
-                hasSend $\leftarrow$ true\;
-        }
-        \uIf{$cells[end]$ in another working interval}{
-                irecv(recvBuffer)\;
-                hasRecv $\leftarrow$ true\;
-        }
-        \ForAll{Cell c at level idxLevel in working interval}{
-                M2M(c, c.child)\;
-        }
-        \emph{Wait send and recv if needed}\;
-        \uIf{hasRecv is true}{
-                M2M($cells[end]$, recvBuffer)\;
-        }
-}
-\BlankLine
-\caption{Distributed M2M}
-\end{algorithm}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{L2L}
-The L2L operator is very similar to the M2M.
-It is just the contrary, a result hosted by only one node needs to be shared with every others nodes that are responsible of at least one child of this node.
-\BlankLine
-\begin{algorithm}[H]
-\restylealgo{boxed}
-\linesnumbered
-\SetLine
-\KwData{none}
-\KwResult{none}
-\BlankLine
-\For{idxLevel $\leftarrow$ 2 \KwTo $Height - 2$ }{
-        \uIf{$cells[0]$ not in my working interval}{
-                irecv($cells[0]$)\;
-                hasRecv $\leftarrow$ true\;
-        }
-        \uIf{$cells[end]$ in another working interval}{
-                isend($cells[end]$)\;
-                hasSend $\leftarrow$ true\;
-        }
-        \ForAll{Cell c at level idxLevel in working interval}{
-                M2M(c, c.child)\;
-        }
-        \emph{Wait send and recv if needed}\;
-        \uIf{hasRecv is true}{
-                M2M($cells[0]$, $cells[0].child$)\;
-        }
-}
-\BlankLine
-\caption{Distributed L2L}
-\end{algorithm}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\chapter{Complex operators: P2P, M2L}
-These two operators are more complex than the ones presented in the previous chapter.
-In fact, it is very difficult to predict the communication between nodes.
-Each step requires pre-processing to know what are the potential communications and a gather to inform other about the needs.
-\section{P2P}
-To compute the P2P a leaf need to know all its direct neighbors.
-Even if the Morton indexing maximizes the locality, the neighbors of a leaf can be on any node.
-Also, the tree used in our library is an indirection tree.
-It means that only the leaves that contain particles are created.
-That is the reason why when we know that a leaf needs another one on a different node, this other node may not realize this relation if this neighbor leaf do not exist on its own tree.
-At the contrary, if this neighbor leaf exists then the node wills require the first leaf to compute the P2P too.
-In our current version we are first processing each potential needs to know the communication we should need.
-Then the nodes do an all gather to inform each other how many communication they are going to send.
-Finally they send and receive data in an asynchronous way and cover it by the P2P they can do.
-\BlankLine
-\begin{algorithm}[H]
-\restylealgo{boxed}
-\linesnumbered
-\SetLine
-\KwData{none}
-\KwResult{none}
-\BlankLine
-\ForAll{Leaf lf}{
-        neighborsIndexes $\leftarrow$ $lf.potentialNeighbors()$\;
-        \ForAll{index in neighborsIndexes}{
-                \uIf{index belong to another proc}{
-                        isend(lf)\;
-                        \emph{Mark lf as a leaf that is linked to another proc}\;
-                }
-        }
-}
-\emph{all gather how many particles to send to who}\;
-\emph{prepare the buffer to receive data}\;
-\ForAll{Leaf lf}{
-        \uIf{lf is not linked to another proc}{
-                neighbors $\leftarrow$ $tree.getNeighbors(lf)$\;
-                P2P(lf, neighbors)\;
-        }
-}
-\While{We do not have receive/send everything}{
-	\emph{Wait some send and recv}\;
-	\emph{Put received particles in a fake tree}\;
-}
-\ForAll{Leaf lf}{
-	\uIf{lf is linked to another proc}{
-	        neighbors $\leftarrow$ $tree.getNeighbors(lf)$\;
-	        otherNeighbors $\leftarrow$ $fakeTree.getNeighbors(lf)$\;
-	        P2P(lf, neighbors + otherNeighbors)\;
-	}
-}
-\BlankLine
-\caption{Distributed P2P}
-\end{algorithm}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{M2L}
-The M2L operator is relatively similar to the P2P.
-Hence P2P is done at the leaves level, M2L is done on several levels from Height - 2 to 2.
-At each level, a node needs to have access to all the distant neighbors of the cells it is the proprietary and those ones can be hosted by any other node.
-Anyway, each node can compute a part of the M2L with the data it has.
-The algorithm can be viewed as several tasks:
-\begin{enumerate}
-\item Compute to know what data has to be sent
-\item All gather to know what data has to be received
-\item Do all the computation we can without the data from other nodes
-\item Wait $send/receive$
-\item Compute M2L with the data we received
-\end{enumerate}
-\BlankLine
-\begin{algorithm}[H]
-\restylealgo{boxed}
-\linesnumbered
-\SetLine
-\KwData{none}
-\KwResult{none}
-\BlankLine
-\ForAll{Level idxLeve from 2 to Height - 2}{
-        \ForAll{Cell c at level idxLevel}{
-                neighborsIndexes $\leftarrow$ $c.potentialDistantNeighbors()$\;
-                \ForAll{index in neighborsIndexes}{
-                        \uIf{index belong to another proc}{
-                                isend(c)\;
-                                \emph{Mark c as a cell that is linked to another proc}\;
-                        }
-                }
-        }
-}
-\emph{Normal M2L}\;
-\emph{Wait send and recv if needed}\;
-\ForAll{Cell c received}{
-        $lightOctree.insert( c )$\;
-}
-\ForAll{Level idxLeve from 2 to Height - 1}{
-        \ForAll{Cell c at level idxLevel that are marked}{
-                neighborsIndexes $\leftarrow$ $c.potentialDistantNeighbors()$\;
-                neighbors $\leftarrow$ lightOctree.get(neighborsIndexes)\;
-                M2L( c, neighbors)\;
-        }
-}
-\BlankLine
-\caption{Distributed M2L}
-\end{algorithm}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{thebibliography}{9}
-\bibitem{itpc03}
-   Ananth Grama, George Karypis, Vipin Kumar, Anshul Gupta,
-   \emph{Introduction to Parallel Computing}.
-   Addison Wesley, Massachusetts,
-   2nd Edition,
-   2003.
-\bibitem{ptttplwaefmm11}
-   I. Kabadshow, H. Dachsel,
-   \emph{Passing The Three Trillion Particle Limit With An Error-Controlled Fast Multipole Method}.
-   2011.
-\end{thebibliography}
-\end{document}
-
-
diff --git a/Doc/ProgrammingRules.txt b/Doc/ProgrammingRules.txt
index cb4c87833688b6951ed18b1ff11ceccbcf686818..da8609acd111ed35ba41ee8449ea28fe44769923 100644
--- a/Doc/ProgrammingRules.txt
+++ b/Doc/ProgrammingRules.txt
@@ -1,19 +1,21 @@
 
-Please follow those rules when developing:
-==========================================
-Static (variables, members, attributes) should start with a capital letter
-Class name should start with a capital letter
-Mix capital and normal letter to write names (IAmAName) no underscore (but if you have several variables that looks the same be careful: thisIsAXVariable thisIsAYVariable)
-Put const every where it is possible (method, value, pointer etc.)
-Declare variables usually as late as possible (we are not in C so do not declare every thing at the beginning of a function)
-Declare index in the for loop where they are used
-Use pre-inc if possible
-If an parameter is changed by a function use pointer else use const ref
-Please do not name variables with one letter (except it is related to a mathematical formula but then this one should be in the comment in latex format)
-Try not to have function bigger than 100 lines
-Be consistent
-Methods should start with a verb or an action (get, compute, print, etc...)
-
+Please follow those rules when developing in scalfmm:
+=====================================================
+1 - Static (variables, members, attributes) should start with a capital letter
+2 - Class name should start with a capital letter
+3 - Mix capital and normal letter to write names (IAmAStaticName, iAmAName) no underscore (but if you have several variables that looks the same be careful: thisIsAXVariable thisIsAYVariable)
+4 - Put const every where it is possible (method, value, pointer etc.)
+5 - Declare variables usually as late as possible (we are not in old C so do not declare every thing at the beginning of a function). Of course, variable that are used in a loop but with always the same value can be declared at the function beginning.
+6 - Declare index in the for loop where they are used (and their names should start with "idx")
+7 - Use pre-inc if possible (even if the compiler will optimized post-inc for native type)
+8 - If a parameter is changed by a function use pointer else use const ref (if the size of object is less than 20 Bytes you can pass it by value, moreover if it is intensively used!)
+9 - Please do not name variables with one letter (except if it is related to a mathematical formula but then this one should be in the comment in latex format)
+10 - Try not to have function bigger than 100 lines
+11 - Be consistent
+12 - Methods should start with a verb or an action (get, compute, print, etc...)
+13 - If some code are here temporary (for testing, assertion, etc), put it in a section "{}", and add a comment that explicitly says that it can be removed "todo remove this section" for example.
+14 - Sometime no comment is better than outdated (or wrong copy-pasted) comments.
+15 - Plain-data struct can be used if it seams natural to use container without method.
 
 
 Why everything is inside the HPP!? A discussion about why scalfmm should stay like this for now
diff --git a/Doc/Site_dox/FContact.dox b/Doc/Site_dox/FContact.dox
new file mode 100644
index 0000000000000000000000000000000000000000..9abd5065ad638512ad58f956001ade4be178bd36
--- /dev/null
+++ b/Doc/Site_dox/FContact.dox
@@ -0,0 +1,27 @@
+/*! \page contacts Contacts
+ *
+ * \section authors Authors
+
+ * ScalFmm is a library for the Fast Multipole Method (FMM). It is
+ * written in C++ and use OpenMP and MPI to support parallel
+ * execution. It is developed by the HiePACS team at the INRIA.
+
+ * You can contact the development team for any questions at
+ * <ul>
+ * <li> scalfmm-public-support@lists.gforge.inria.fr</li>
+ * </ul>
+
+ * If you want to see others projects of HiePACS Inria teams, please
+ * see : https://team.inria.fr/hiepacs/ .
+
+ * \section contributors Contributors
+ *
+ * <ul>
+ * <li> Olivier Coulaud </li>
+ * <li> Bérenger Bramas </li>
+ * <li> Cyrille Piacibello </li>
+ * </ul>
+  
+
+ */
+ 
diff --git a/Doc/Site_dox/FDownload.dox b/Doc/Site_dox/FDownload.dox
new file mode 100644
index 0000000000000000000000000000000000000000..1c701b5590ff5b66e17de516941324f0447f5eca
--- /dev/null
+++ b/Doc/Site_dox/FDownload.dox
@@ -0,0 +1,72 @@
+/*! \page install Downloading, Building and Installing ScalFMM
+*
+* \section download Download ScalFMM
+*
+* To download ScalFmm go on http://scalfmm-public.gforge.inria.fr/download.html.
+*
+* You can stay in touch of the updates and new version by registering
+* to the users mailing list
+* (scalfmm-public-users@lists.gforge.inria.fr) which has a very low
+* traffic (one email per month) at :
+* http://lists.gforge.inria.fr/cgi-bin/mailman/listinfo/scalfmm-public-users.
+*
+* \section secNecessary Require
+*
+* <ul>
+  * <li> CMake for compiling, ccmake for configuring the build </li>
+  * <li> Open MP </li>
+  * <li> MPI, if you want to use distributed version of the algorithm </li>
+  * <li> A compiler supporting C++ 2011 </li>
+  * </ul>
+*
+* \section build Build
+* How to build ScalFMM
+* <ul>
+  * <li> Go to scalfmm/Build directory </li>
+  * <li> type <b> $> cmake .. </b> 
+    * <ul>
+      * <li> If you want to enable MPI, use <b> $> cmake .. -DSCALFMM_USE_MPI=ON </b> </li>
+      * </ul>
+    * </li>
+  * <li> then configure your build with <b> $> ccmake .. </b> if needed </li>
+  * <li> then type <b> $> make name_of_exec </b> </li>
+  * </ul>
+*
+* \subsection conf Configuration of the build
+* 
+* This is all the build options you can modify.
+*
+* <ul>
+  * <li> CMAKE_INSTALL_PREFIX : to choose where to install ScalFmm </li>
+  * <li> DSCALFMM_USE_MPI : to use and enable MPI. Warning, you need to use this parameter at the first cmake command you write. </li>
+  * <li> SCALFMM_ATTACHE_SOURCE : to build with -g </li>
+  * <li> SCALFMM_BUILD_DEBUG : to build in debug mode of cmake (with -O0) </li>
+  * <li> SCALFMM_BUILD_TESTS : to build the text and the examples </li>
+  * <li> SCALFMM_BUILD_UTESTS : to build the unit tests </li>
+  * <li> SCALFMM_USE_ADDONS : to activate add ons </li>
+  * <ul>
+    * <li> SCALFMM_ADDON_FMMAPI : to build Fmm Api </li>
+    * </ul>
+  * <li> SCALFMM_USE_DOUBLE_PRECISION : to build in double precision </li>
+  * <li> SCALFMM_USE_MEM_STATS : to use memory stats (which count any new/delete done during a simulation) </li>
+  * <li> SCALFMM_USE_BLAS : to enable blas (needed by most of the kernel) </li>
+  * <ul>
+    * <li> SCALFMM_USE_MKL_AS_BLAS : to use MKL as blas </li>
+    * </ul>
+  * <li> SCALFMM_USE_TRACE : to create trace </li>
+  * <ul>
+    * <li> SCALFMM_USE_ITAC : to use Intel ITAC tool as trace generator </li>
+    * </ul>
+  * </ul>
+*
+
+* Once the library is built, you may want to install it : <b> $> make
+* install </b>. Note that it is not an obligation to install ScalFmm
+* to use it. You can build it and use it from the Build directory.
+
+* The whole project is documented using Doxygen. You can build the doc
+* by typing <b> $> make doc </b> in the Build directory.
+
+*
+*
+*/
diff --git a/Doc/Site_dox/FLicense.dox b/Doc/Site_dox/FLicense.dox
new file mode 100644
index 0000000000000000000000000000000000000000..bfb8a53e7256b2782ffb1825edb6b501a4fda04c
--- /dev/null
+++ b/Doc/Site_dox/FLicense.dox
@@ -0,0 +1,20 @@
+/*! \page License License information
+ *
+ *
+ * The library is under LGPL + CeCILL-C licenses. In case of conflict
+ * the more restrictive has to be used. We encourage users to have a
+ * look to the official license websites in case of doubts.  
+ *
+ * See the GNU General Public and CeCILL-C Licenses for more details.
+ * "http://www.cecill.info".  "http://www.gnu.org/licenses".
+ *
+ * ScalFmm is under software patent number
+ * IDDN.FR.001.100030.000.S.P.2012.000.31235. If you need a more
+ * flexible license, please do not hesitate to contact us.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+ *
+ *
+ */
diff --git a/Doc/Site_dox/FQuick.dox b/Doc/Site_dox/FQuick.dox
new file mode 100644
index 0000000000000000000000000000000000000000..09920f1f3b8a4d41db62dcd92c6cb7384f8dd47a
--- /dev/null
+++ b/Doc/Site_dox/FQuick.dox
@@ -0,0 +1,320 @@
+/*! \page quick Quick Start
+
+ * In this section, we present the data structure organization and the 
+ * classes design to understand fully ScalFmm. 
+ 
+ * \tableofcontents 
+ 
+ * \section prerequisite Prerequisite 
+ 
+ * In it is better to have built the library or at minimum to have 
+ * downloaded the sources. The user needs to be comfortable with 'C++' 
+ * language and if possible templates. 
+
+ * \section classes Overview of general architecture
+ *
+ * \image html Classes.png "General architecture"
+ 
+ * \section data What Data 
+ 
+ * In ScalFmm we proceed the Fast Multipole Method. New users should see 
+ * this process has a way to estimate far interactions and compute 
+ * accurately the close interactions in a group of particles. We then 
+ * have some particles that we insert in a octree. The octree stores the 
+ * particles in its leaves. From the root to the leaves there are the 
+ * cells. At this point we only express primitives classes which hold 
+ * data or primitives classes. 
+ 
+ * Then, we need a kernel which is computational part of the FMM. It is a 
+ * class that is able to compute the interactions between particles or 
+ * cells, etc. There is several possible kernels depending on what we 
+ * want to compute. 
+ 
+ * Finally, the FMM Core algorithm is a class that takes the primitives 
+ * classes and calls the kernel with the correct arguments. In our 
+ * implementation, the user has to choose between sequential FMM or 
+ * OpenMP FMM. 
+ 
+ * \section primitivesclasses Primitives Classes
+ 
+ * \subsection particles Particles
+ 
+ * To be stored in the octree, a particle must inherites
+ * FAbstractParticleContainer. This is the class needed:
+
+
+ * <pre class='brush: cpp'>
+ * class FAbstractParticleContainer{
+ 
+ * template<typename... Args>
+ * void push(const FPoint& , Args ... ){
+    
+ * };
+ * </pre>
+
+ * A class implements this minimum required methods, it is
+ * FBasicParticleContainer. This is what MUST proposes a particle
+ * class to be able to be inserted in the tree. Then, the user can add
+ * other methods to match the kernel requirement. For example, some
+ * kernel may need a particle to hold a physical value, a forces
+ * vector and a potential. See FRotationParticleContainer if you want
+ * an example of a Particle class.
+
+ * \subsection cells Cells
+
+ * The same principle apply to cells. There is a minimum sets of
+ * methods that must propose a cell class to be able to be used in the
+ * octree. And then, there are some other methods that you can add to
+ * make it usable per the kernel.
+
+ * The class Src/Components/FAbstractCell.hpp shows what should
+ * implement a cell:
+
+ * <pre class='brush: cpp'> 
+  * class FAbstractCell{ 
+  *  public: 
+  *  virtual ~FAbstractCell(){ 
+  *  } 
+  *  virtual MortonIndex getMortonIndex() const = 0; 
+  *  virtual void setMortonIndex(const MortonIndex inIndex) = 0; 
+  *  virtual void setPosition(const FPoint& inPosition) = 0; 
+  *  virtual const FTreeCoordinate& getCoordinate() const = 0; 
+  *  virtual void setCoordinate(const long inX, const long inY, const long inZ) = 0; 
+  *  virtual bool hasSrcChild() const = 0; 
+  *  virtual bool hasTargetsChild() const = 0; 
+  *  virtual void setSrcChildTrue() = 0; 
+  *  virtual void setTargetsChildTrue() = 0; 
+  *  }; 
+
+ * </pre>
+ 
+ * The FBasicCell class provides an implementation of all these
+ * methods.
+
+ * \subsection leaves Leaves 
+
+ * The leaf is the class responsible of hosting the particles. The
+ * octree uses this class and order to store a particle. Behind the
+ * scene, the leaf does what it wants. But, the octree also needs a way
+ * to get the particles it has inserted which can be targets or
+ * sources
+
+ * In the following class, FAbstractLeaf, one can see what is required
+ * by the algorithm :
+
+ * <pre>
+ * template< class ParticleClass, class ContainerClass > 
+ *   class FAbstractLeaf { 
+ *   public: 
+ *   // Default destructor
+ *   virtual ~FAbstractLeaf(){ 
+ *   } 
+ *   virtual void push(const ParticleClass& particle) = 0; 
+ *   virtual ContainerClass* getSrc() = 0; 
+ *   virtual ContainerClass* getTargets() = 0; 
+ *   }; 
+ * </pre>
+
+ * The FSimpleLeaf class provides an implementation of all thes
+ * methods.
+
+ * \section octree Octree
+ 
+ * The octree is templatized and then can host particles, cells and
+ * leaves. It also needs some information about the simulation like the
+ * size and the center of the box. Moreover, the user has to precise
+ * the height of the octree. The root is the level 0, so giving a
+ * height of 3 creates the root level, a cells level and the leaves
+ * level. The usual way of declaring the octree, taken from
+ * Tests/Utils/testOctree.cpp, is as follow:
+
+ * <pre>
+ * typedef FVector<FBasicParticle>                                        ContainerClass;
+ * typedef FSimpleLeaf<FBasicParticle, ContainerClass >                        LeafClass;
+ * typedef FOctree<FBasicParticle, FBasicCell, ContainerClass , LeafClass >  OctreeClass;
+ * OctreeClass tree(HEIGHT, SUBHEIGHT, BoxWidth, CenterOfBox);
+ * </pre>
+ 
+ * \subsection loading Loading Particle
+
+ * Once the octree is created, we need to put some particles in
+ * it. This is perform using classes called 'loader'.
+
+ * A loader should proposes theses methods :
+
+ * <pre>
+ * template <class ParticleClass> 
+ *   class FAbstractLoader { 
+ *   public:	 
+ *   // Default destructor 
+ *   virtual ~FAbstractLoader(){ 
+ *   } 
+ *   virtual FSize getNumberOfParticles() const = 0; 
+ *   virtual FPoint getCenterOfBox() const = 0; 
+ *   virtual FReal getBoxWidth() const = 0; 
+ *   virtual bool isOpen() const = 0; 
+ *   virtual void fillParticle(ParticleClass& inParticle) = 0; 
+ *   template <class OctreeClass> 
+ *   void fillTree(OctreeClass& tree){ 
+ *       ParticleClass particleToFill; 
+ *       for(int idxPart = 0 ; idxPart < getNumberOfParticles() ; ++idxPart){ 
+ *           fillParticle(particleToFill); 
+ *           tree.insert(particleToFill); 
+ *       } 
+ *   } 
+ *  }; 
+ * </pre>
+
+ * There exist several loaders; one per file format. Depending of the
+ * loader, the particle class should implement special methods. For
+ * example, the basic loader only fill the position of the
+ * particles. Whereas, the FMA loader also fill the physical value of
+ * the particles.
+
+ * The usual way of loading the particle is as follow :
+
+ * <pre>
+ * FRandomLoader<ParticleClass> loader(NbPart, 1, FPoint(0.5,0.5,0.5), 1);
+ * OctreeClass tree(10, 3, loader.getBoxWidth(), loader.getCenterOfBox());
+ * loader.fillTree(tree);
+ * </pre>
+
+ * \subsection octreeIterator Iterating on an Octree
+ 
+ *If the user wants to iterate on the tree and access the particles or
+ *the cells. To do so, he needs to declare an iterator and use it to
+ *move from top to bottom and from left to right. It is critical that
+ *the octree is not empty!
+
+ * This next sample is taken from Tests/Utils/testOctreeIter.cpp and
+ * count the leaves :
+ 
+ * <pre>
+ * OctreeClass::Iterator octreeIterator(&tree);
+ *     octreeIterator.gotoBottomLeft();
+ *     int counter = 0;
+ *     do{
+ *             ++counter;
+ *     } while(octreeIterator.moveRight());
+ * </pre>
+
+ * To iterate on the cells we can proceed as follow :
+ * <pre>
+ * OctreeClass::Iterator octreeIterator(&tree);
+ * octreeIterator.gotoBottomLeft();
+ * for(int idxLevel = NbLevels - 1 ; idxLevel >= 1 ; --idxLevel ){
+ *    int counter = 0;
+ *    do{
+ *       ++counter;
+ *    } while(octreeIterator.moveRight());
+ *    octreeIterator.moveUp();
+ *    octreeIterator.gotoLeft();
+ *    std::cout << "Cells at level " << idxLevel << " = " << counter << " ...\n";
+ * }
+ * </pre>
+
+ * \section kernel The kernel
+ 
+ * The kernel is a class that should perform the usual FMM
+ * operators. Each kind of kernel may require special methods and
+ * needs on the particles and the cells.
+
+ * An empty kernel can be found in Src/Components/FBasicKernels.hpp,
+ * it implements the class definition FAbstractKernels :
+
+ * <pre>
+ * template< class ParticleClass, class CellClass, class ContainerClass> class FBasicKernels : public FAbstractKernels<ParticleClass,CellClass,ContainerClass> { 
+ * public: 
+ *
+ * // Default destructor
+ * virtual ~FBasicKernels(){}
+ * virtual void P2M(CellClass* const , const ContainerClass* const ) {}
+ * virtual void M2M(CellClass* const FRestrict , const CellClass*const FRestrict *const FRestrict , const int ) {} 
+ * virtual void M2L(CellClass* const FRestrict , const CellClass* [], const int , const int ) {}
+ * virtual void L2L(const CellClass* const FRestrict , CellClass* FRestrict *const FRestrict  , const int ) {}
+ * virtual void L2P(const CellClass* const , ContainerClass* const ){}
+ * virtual void P2P(const FTreeCoordinate& , 
+ *                  ContainerClass* const FRestrict , const ContainerClass* const FRestrict , 
+ *                  ContainerClass* const [27], const int ){}
+ * virtual void P2PRemote(const FTreeCoordinate& , 
+ *                  ContainerClass* const FRestrict , const ContainerClass* const FRestrict , 
+ *                  ContainerClass* const [27], const int ){}
+ * </pre>
+
+ * One example of kernel is the 'test' kernel called
+ * FTestKernels. This kernels simply sum the particles (one particle
+ * weigh = 1) so at the end of the simulation each particles should be
+ * have a weigh of N. We just declare this kernel based on the
+ * components type but usually do not call any method manually since
+ * this is performed per the FMM core.
+
+ * <pre>
+ * typedef FTestKernels<ParticleClass, CellClass, ContainerClass >         KernelClass;
+ * KernelClass kernels;
+ * </pre>
+
+ * \section coreFMM The FMM Core
+ 
+ * We showed how to have an octree and a kernel. Now, we show how to use
+ * a Fmm Algorithm on the data. Remember, the FMM algorithm simply
+ * takes the data from the octree and call the method of the
+ * kernel. The goal is to have a FMM independent from the data.
+
+ * The next sample is taken from Tests/Utils/testFmmAlgorithm.cpp and
+ * use the basic sequential FMM :
+ 
+ * <pre>
+ * typedef FFmmAlgorithm<OctreeClass, ParticleClass, CellClass, ContainerClass, KernelClass, LeafClass >     FmmClass;
+ * FmmClass algo(&tree,&kernels);
+ * algo.execute();
+ * </pre>
+
+ * To move to the OpenMP threaded FMM we can use the fallowing code by
+ * changing 'FFmmAlgorithm' per 'FFmmAlgorithmThread' :
+
+ * <pre>
+ * typedef FFmmAlgorithmThread<OctreeClass, ParticleClass, CellClass, ContainerClass, KernelClass, LeafClass >     FmmClass;
+ * FmmClass algo(&tree,&kernels);
+ * algo.execute();
+ * </pre>
+
+ \section reasons The reasons why ...
+ 
+ * Of course the library is changing and re-factorized usually but
+ * lets discuss about 'The reasons why' : 
+ 
+ * <ul>
+ * <li> Every things is templatized : 
+ * <blockquote>
+ * The reason is to avoid the use of virtual and abstract class. In
+ * this page we present some abstract classes, but they are not really
+ * use. They only define the need, the minimum required to implement a
+ * particle or a cell. But the kernels should not work on an abstract
+ * type but on the real data. This enable lots of compiler
+ * optimizations and avoid the use of V-Table.
+ * </blockquote>
+ * </li>
+ 
+ * <li>
+ * Some destructors are not virtual :
+ * <blockquote>
+ * As we said, the objective of the class are not to be inherited. So
+ * a virtual destructor is not needed.
+ * </blockquote>
+ * </li>
+
+ * <li>
+ * Typedef is used like this : 
+ * <blockquote>
+ * It can take some time to understand how it works. But all our users
+ * finally like the way of using typedef and template. As you will see
+ * in most of the example the struct is the same and you will not be
+ * lost since in any example 'ParticleClass' is used for the particle
+ * type and so on.
+ * </blockquote>
+ * </li>
+
+ 
+ * </ul>
+
+*/
diff --git a/Doc/Site_dox/MainPage.dox b/Doc/Site_dox/MainPage.dox
new file mode 100644
index 0000000000000000000000000000000000000000..9e0f58b53acc1c20ab0e631fee199af97ddaa418
--- /dev/null
+++ b/Doc/Site_dox/MainPage.dox
@@ -0,0 +1,53 @@
+// This page contains the special doxygen pages and mainpage.
+
+/*!
+ * @mainpage ScalFmm
+ *
+ * \section overV Overview
+ *
+ * ScalFMM is a software library to simulate N-body interactions using
+ * the Fast Multipole Method.
+ * 
+ * The library offers two methods to compute interactions between
+ * bodies when the potential decays like 1/r. The first method is the
+ * classical FMM based on spherical harmonic expansions and the second is
+ * the Black-Box method which is an independent kernel formulation
+ * (introduced by E. Darve @ Stanford). With this method, we can now
+ * easily add new non oscillatory kernels in our library. For the
+ * classical method, two approaches are used to decrease the complexity
+ * of the operators. We consider either matrix formulation that allows us
+ * to use BLAS routines or rotation matrices to speed up the M2L
+ * operator.
+ *
+ * ScalFMM intends to offer all the functionalities needed to perform
+ * large parallel simulations while enabling an easy customization of
+ * the simulation components: kernels, particles and cells. It works
+ * in parallel in a shared/distributed memory model using OpenMP and
+ * MPI. The software architecture has been designed with two major
+ * objectives: being easy to maintain and easy to understand. There is
+ * two main parts: 
+ * <ul> 
+ * <li> the management of the octree and the
+ * parallelization of the method; </li> 
+ * <li> The kernels. This new
+ * architecture allow us to easily add new FMM algorithm or kernels
+ * and new paradigm of parallelization. </li> 
+ * </ul>
+ *
+ *
+ * To download build and install the application, please see \ref
+ * install.
+ *
+ * The \ref quick can help you to understand the architectur of the
+ * library.
+ *
+ * This software is distributed under a specific License. For more
+ * informations, see \ref License.
+ *
+ * If you want to cite the project and/or the team, please contact
+ * us. See \ref contacts page.
+ *
+ */
+
+
+
diff --git a/Doc/Site_dox/scalfmm.png b/Doc/Site_dox/scalfmm.png
new file mode 100644
index 0000000000000000000000000000000000000000..766406d32e2b10c1ae68fc6e4c6f05651cd6126d
Binary files /dev/null and b/Doc/Site_dox/scalfmm.png differ
diff --git a/Doc/Src_tex/ParallelDetails.tex b/Doc/Src_tex/ParallelDetails.tex
new file mode 100755
index 0000000000000000000000000000000000000000..d04c0b88fef566fc6dbec873f00169ad28ad5487
--- /dev/null
+++ b/Doc/Src_tex/ParallelDetails.tex
@@ -0,0 +1,480 @@
+\documentclass[12pt,letterpaper,titlepage]{report}
+\usepackage{algorithm2e}
+\usepackage{listings}
+\usepackage{geometry}
+\usepackage{graphicx}
+\usepackage[hypertexnames=false, pdftex]{hyperref}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% use:$ pdflatex ParallelDetails.tex
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\author{Berenger Bramas}
+\title{ScalFmm - Parallel Algorithms (Draft)}
+\date{August 11, 2011}
+
+%% Package config
+\lstset{language=c++, frame=lines}
+\RestyleAlgo{boxed}
+\geometry{scale=0.8, nohead}
+\hypersetup{ colorlinks = true, linkcolor = black, urlcolor = blue, citecolor = blue }
+%% Remove introduction numbering
+\setcounter{secnumdepth}{-1}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{document}
+\maketitle{}
+\newpage
+\tableofcontents
+\newpage
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Introduction}
+In this document we introduce the principles and the algorithms used
+in our library to run in a distributed environment using MPI.  The
+algorithms in this document may not be up to date comparing to those
+used in the code.  We advise to check the version of this document and
+the code to have the latest available.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\chapter{Building the tree in Parallel}
+\section{Description}
+The main motivation to create a distributed version of the FMM is to
+run large simulations.  These ones contain more particles than a
+computer can host which involves using several computers.  Moreover,
+it is not reasonable to ask a master process to load an entire file
+and to dispatch the data to others processes. Without being able to
+know the entire tree it may send randomly the data to the slaves.  To
+override this situation, our solution can be viewed as a two steps
+process.  First, each node loads a part of the file to possess several
+particles.  After this task, each node can compute the Morton index
+for the particles he had loaded.  The Morton index of a particle
+depends of the system properties but also of the tree height.  If we
+want to choose the tree height and the number of nodes at run time
+then we cannot pre-process the file.  The second step is a parallel
+sort based on the Morton index between all nodes with a balancing
+operation at the end.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Load a file in parallel}
+We use the MPI $I/O$ functions to split a file between all the mpi
+processes.  The prerequisite to make the splitting easier is to have a
+binary file.  Thereby, using a very basic formula each node knows
+which part of the file it needs to load.
+\begin{equation}
+  size per proc \leftarrow \left (file size - header size \right ) / nbprocs
+\end{equation}
+\begin{equation}
+  offset \leftarrow header size + size per proc .\left ( rank - 1 \right )
+\end{equation}
+\newline
+We do not use the view system to read that data as it is used to
+write. The MPI\_File\_read is called as described in the fallowing
+$C++$ code.
+\begin{lstlisting}
+  // From FMpiFmaLoader
+  MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal),
+  particles, int(bufsize), MPI_FLOAT, &status);
+\end{lstlisting}
+Our files are composed by a header fallowing by all the particles.
+The header enables to check several properties as the precision of the file.
+Finally, a particle is represented by four decimal values: a position and a physical value.
+\newline
+\underline{Remark:} The MPI IO function do not work if we use a MPI\_Initthread(MPI\_THREAD\_MULTIPLE) and a version above 1.5.1.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Sorting the particles}
+Once each node has a set of particles we need to sort them.  This
+problem boils down to a simple parallel sort where Morton index are
+used to compare particles.  We use two different approaches to sort
+the data.  In the next version of scalfmm the less efficient method
+should be deleted.
+
+\subsection{Using QuickSort}
+A first approach is to use a famous sorting algorithm.  We choose to
+use the quick sort algorithm because the distributed and the shared
+memory approaches are mostly similar.  Our implementation is based on
+the algorithm described in \cite{itpc03}.  The efficiency of this
+algorithm depends roughly of the pivot choice.  In fact, a wrong idea
+of the parallel quick sort is to think that each process first sort
+their particles using quick sort and then use a merge sort to share
+their results.  Instead, the nodes choose a common pivot and progress
+for one quick sort iteration together.  From that point all process
+has an array with a left part where all values are lower than the
+pivot and a right part where all values are upper or equal than the
+pivot.  Then, the nodes exchange data and some of them will work on
+the lower part and the other on the upper parts until there is one
+process for a part.  At this point, the process performs a shared
+memory quick sort.  To choose the pivot we tried to use an average of
+all the data hosted by the nodes:
+\newline
+\begin{algorithm}[H]
+  \LinesNumbered
+  \SetAlgoLined
+  \KwResult{A Morton index as next iteration pivot}
+  \BlankLine
+  myFirstIndex $\leftarrow$ particles$[0]$.index\;
+  allFirstIndexes = MortonIndex$[nbprocs]$\;
+  allGather(myFirstIndex, allFirstIndexes)\;
+  pivot $\leftarrow$ Sum(allFirstIndexes(:) / nbprocs)\;
+  \BlankLine
+  \caption{Choosing the QS pivot}
+\end{algorithm}
+
+A bug was made when at the beginning, we did an average by summing all
+the values first and dividing after. But the Morton index may be
+extremly high, so we need to to divide all the value before performing
+the sum.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Using a Sorting Network}
+In \cite{ptttplwaefmm11}, a proposition has been made to sort the data using a sorting network.
+We implemented a such sorting algorithm but the result were not extremly efficient.
+Contrary to Quick sort, a sorting network is extremly stable and all the nodes performs similar work.
+The quick sort is pivot dependant and some nodes may work much more than other.
+But, the average case the quick sort enable higher efficiency.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Using an intermediate Octree}
+The second approach uses an octree to sort the particles in each process instead of a sorting algorithm.
+The time complexity is equivalent but it needs more memory since it is not done in place.
+After inserting the particles in the tree, we can iterate at the leaves level and access to the particles in an ordered way.
+Then, the processes are doing a minimum and a maximum reduction to know the real Morton interval of the system.
+By building the system interval in term of Morton index, the nodes cannot know the data scattering.
+Finally, the processes split the interval in a uniform manner and exchange data with $P^{2}$ communication in the worst case.
+\newline
+\newline
+In both approaches the data may not be balanced at the end.
+In fact, the first method is pivot dependent and the second consider that the data are uniformly distributed.
+That is the reason why we need to balance the data among nodes.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Balancing the leaves}
+After sorting, each process has potentially several leaves.
+If we have two processes $P_{i}$ and $P_{j}$ with $i < j$ the sort guarantees that all leaves from node i are inferior than the leaves on the node j in a Morton indexing way.
+But the leaves are randomly distributed among the nodes and we need to balance them.
+It is a simple reordoring of the data, but the data has to stayed sorted.
+
+\begin{enumerate}
+\item Each process informs other to tell how many leaves it holds.
+\item Each process compute how many leaves it has to send or to receive from left or right.
+\end{enumerate}
+At the end of the algorithm our system is completely balanced with the same number of leaves on each process.
+
+\begin{figure}[h!]
+  \begin{center}
+    \includegraphics[width=15cm, height=15cm, keepaspectratio=true]{Balance.png}
+    \caption{Balancing Example}
+  \end{center}
+\end{figure}
+
+A process has to send data to the left if its current left limit is upper than its objective limit.
+Same in the other side, and we can reverse the calculs to know if a process has to received data.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\chapter{Simple operators: P2M, M2M, L2L}
+We present the different FMM operators in two separated parts
+depending on their parallel complexity.  In this first part, we
+present the three simplest operators P2M, M2M and L2L.  Their
+simplicity is explained by the possible prediction to know which node
+hosts a cell and how to organize the communication.
+
+We will first present how the different processus can know which cell
+or leaf belongs to which processus.
+
+\section{Morton Index Intervals}
+A Morton Index Interval is a simple structure with two Morton indexes
+inside, referencing the first a last leaf of each processus.  Each
+processus compute its Morton Index Interval at first by scanning all
+its leafs.
+
+Once each processus compute its interval, there is a global
+communication for the processus to know the interval of the others,
+and the result is stored in an array of interval structures.
+
+
+\section{P2M}
+The P2M still unchanged from the sequential approach to the
+distributed memory algorithm.  In fact, in the sequential model we
+compute a P2M between all particles of a leaf and this leaf which is
+also a cell.  Although, a leaf and the particles it hosts belong to
+only one node so doing the P2M operator do not require any information
+from another node.  From that point, using the shared memory operator
+makes sense.
+
+\clearpage
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{M2M}
+During the upward pass information moves from a level to the upper
+one.  The problem in a distributed memory model is that one cell can
+exist in several trees i.e. in several nodes.  Because the M2M
+operator computes the relation between a cell and its child, the nodes
+which have a cell in common need to share information.
+
+Moreover, we have to decide which process will be responsible of the
+computation if the cell is present on more than one node.  We have
+decided that the node with the smallest rank has the responsibility to
+compute the M2M and propagate the value for the future operations.
+
+Despite the fact that others processes are not computing this cell,
+they have to send the child of this shared cell to the responsible
+node.
+
+We can establish some rules and some properties of the communication
+during this operation.  In fact, at each iteration a process never
+needs to send more than 7 cells, also a process never needs to receive
+more than 7 cells.  The shared cells are always at extremities and one
+process cannot be designed to be the responsible of more than one
+shared cell at a level. 
+
+There are to cases : 
+\begin{itemize}
+  \item My first cell is shared means that I need to send the children I have of
+    this cell to the processus on my left.
+  \item My last cell is shared means that I need to receive some
+    children from the processus on my right.
+\end{itemize}
+
+
+\begin{figure}[h!]
+  \begin{center}
+    \includegraphics[width=14cm, height=7cm, keepaspectratio=true]{ruleillu.jpg}
+    \caption{Potential Conflicts}
+  \end{center}
+\end{figure}
+
+\begin{algorithm}[H]
+  \RestyleAlgo{boxed}
+  \LinesNumbered
+  \SetAlgoLined
+  \KwData{none}
+  \KwResult{none}
+  \BlankLine
+  \For{idxLevel $\leftarrow$ $Height - 2$ \KwTo 1}{
+    \ForAll{Cell c at level idxLevel}{
+      M2M(c, c.child)\;
+    }
+  }
+  \BlankLine
+  \caption{Traditional M2M}
+\end{algorithm}
+\begin{algorithm}[H]
+  \RestyleAlgo{boxed}
+  \LinesNumbered
+  \SetAlgoLined
+  \KwData{none}
+  \KwResult{none}
+  \BlankLine
+  \For{idxLevel $\leftarrow$ $Height - 2$ \KwTo 1}{
+    \uIf{$cells[0]$ not in my working interval}{
+      isend($cells[0].child$)\;
+      hasSend $\leftarrow$ true\;
+    }
+    \uIf{$cells[end]$ in another working interval}{
+      irecv(recvBuffer)\;
+      hasRecv $\leftarrow$ true\;
+    }
+    \ForAll{Cell c at level idxLevel in working interval}{
+      M2M(c, c.child)\;
+    }
+    \emph{Wait send and recv if needed}\;
+    \uIf{hasRecv is true}{
+      M2M($cells[end]$, recvBuffer)\;
+    }
+  }
+  \BlankLine
+  \caption{Distributed M2M}
+\end{algorithm}
+
+In the oct-tree, a cell or a leaf only exists if it has some children
+or particles in. When the processus receive some cells, it need to
+know their positions in the tree, because maybe one of the cells has
+not be sent since it didn't exist.
+
+The first thing to read from the buffer received is the heading, which
+is a bit vector of length 8 (practically a char), indexing each cells
+send.
+
+
+Example :
+\begin{tabular}{| c || c | c | c |}
+  \hline
+  Header & Datas & ... & Datas \\
+  \hline
+  00001011 & Datas of cell 5 & Datas of cell 7 & Datas of cell 8 \\
+  \hline
+\end{tabular}
+
+
+\clearpage
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{L2L}
+The L2L operator is very similar to the M2M.  It is just the contrary,
+a result hosted by only one node needs to be shared with every others
+nodes that are responsible of at least one child of this node.
+
+The L2L operator fill child local array from parent local array, so
+there is no need to precise wich cell is send, since it's the parent
+cell that is send. Consequently, there is no need for a heading.
+
+\BlankLine
+\begin{algorithm}[H]
+  \RestyleAlgo{boxed}
+  \LinesNumbered
+  \SetAlgoLined
+  \KwData{none}
+  \KwResult{none}
+  \BlankLine
+  \For{idxLevel $\leftarrow$ 2 \KwTo $Height - 2$ }{
+    \uIf{$cells[0]$ not in my working interval}{
+      irecv($cells[0]$)\;
+      hasRecv $\leftarrow$ true\;
+    }
+    \uIf{$cells[end]$ in another working interval}{
+      isend($cells[end]$)\;
+      hasSend $\leftarrow$ true\;
+    }
+    \ForAll{Cell c at level idxLevel in working interval}{
+      M2M(c, c.child)\;
+    }
+    \emph{Wait send and recv if needed}\;
+    \uIf{hasRecv is true}{
+      M2M($cells[0]$, $cells[0].child$)\;
+    }
+  }
+  \BlankLine
+  \caption{Distributed L2L}
+\end{algorithm}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\chapter{Complex operators: P2P, M2L}
+These two operators are more complex than the ones presented in the previous chapter.
+In fact, it is very difficult to predict the communication between nodes.
+Each step requires pre-processing to know what are the potential communications and a gather to inform other about the needs.
+\section{P2P}
+To compute the P2P a leaf need to know all its direct neighbors.
+Even if the Morton indexing maximizes the locality, the neighbors of a leaf can be on any node.
+Also, the tree used in our library is an indirection tree.
+It means that only the leaves that contain particles are created.
+
+That is the reason why when we know that a leaf needs another one on a
+different node, this other node may not realize this relation if this
+neighbor leaf do not exist on its own tree.
+
+At the contrary, if this neighbor leaf exists then the node wills require the first leaf to compute the P2P too.
+In our current version we are first processing each potential needs to know the communication we should need.
+Then the nodes do an all gather to inform each other how many communication they are going to send.
+Finally they send and receive data in an asynchronous way and cover it by the P2P they can do.
+\BlankLine
+\begin{algorithm}[H]
+  \RestyleAlgo{boxed}
+  \LinesNumbered
+  \SetAlgoLined
+  \KwData{none}
+  \KwResult{none}
+  \BlankLine
+  \ForAll{Leaf lf}{
+    neighborsIndexes $\leftarrow$ $lf.potentialNeighbors()$\;
+    \ForAll{index in neighborsIndexes}{
+      \uIf{index belong to another proc}{
+        isend(lf)\;
+        \emph{Mark lf as a leaf that is linked to another proc}\;
+      }
+    }
+  }
+  \emph{all gather how many particles to send to who}\;
+  \emph{prepare the buffer to receive data}\;
+  \ForAll{Leaf lf}{
+    \uIf{lf is not linked to another proc}{
+      neighbors $\leftarrow$ $tree.getNeighbors(lf)$\;
+      P2P(lf, neighbors)\;
+    }
+  }
+  \While{We do not have receive/send everything}{
+    \emph{Wait some send and recv}\;
+    \emph{Put received particles in a fake tree}\;
+  }
+  \ForAll{Leaf lf}{
+    \uIf{lf is linked to another proc}{
+      neighbors $\leftarrow$ $tree.getNeighbors(lf)$\;
+      otherNeighbors $\leftarrow$ $fakeTree.getNeighbors(lf)$\;
+      P2P(lf, neighbors + otherNeighbors)\;
+    }
+  }
+  \BlankLine
+  \caption{Distributed P2P}
+\end{algorithm}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{M2L}
+The M2L operator is relatively similar to the P2P.
+Hence P2P is done at the leaves level, M2L is done on several levels from Height - 2 to 2.
+At each level, a node needs to have access to all the distant neighbors of the cells it is the proprietary and those ones can be hosted by any other node.
+Anyway, each node can compute a part of the M2L with the data it has.
+The algorithm can be viewed as several tasks:
+\begin{enumerate}
+\item Compute to know what data has to be sent
+\item All gather to know what data has to be received
+\item Do all the computation we can without the data from other nodes
+\item Wait $send/receive$
+\item Compute M2L with the data we received
+\end{enumerate}
+\BlankLine
+\begin{algorithm}[H]
+  \RestyleAlgo{boxed}
+  \LinesNumbered
+  \SetAlgoLined
+  \KwData{none}
+  \KwResult{none}
+  \BlankLine
+  \ForAll{Level idxLeve from 2 to Height - 2}{
+    \ForAll{Cell c at level idxLevel}{
+      neighborsIndexes $\leftarrow$ $c.potentialDistantNeighbors()$\;
+      \ForAll{index in neighborsIndexes}{
+        \uIf{index belong to another proc}{
+          isend(c)\;
+          \emph{Mark c as a cell that is linked to another proc}\;
+        }
+      }
+    }
+  }
+  \emph{Normal M2L}\;
+  \emph{Wait send and recv if needed}\;
+  \ForAll{Cell c received}{
+    $lightOctree.insert( c )$\;
+  }
+  \ForAll{Level idxLeve from 2 to Height - 1}{
+    \ForAll{Cell c at level idxLevel that are marked}{
+      neighborsIndexes $\leftarrow$ $c.potentialDistantNeighbors()$\;
+      neighbors $\leftarrow$ lightOctree.get(neighborsIndexes)\;
+      M2L( c, neighbors)\;
+    }
+  }
+  \BlankLine
+  \caption{Distributed M2L}
+\end{algorithm}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{thebibliography}{9}
+\bibitem{itpc03}
+  Ananth Grama, George Karypis, Vipin Kumar, Anshul Gupta,
+  \emph{Introduction to Parallel Computing}.
+  Addison Wesley, Massachusetts,
+  2nd Edition,
+  2003.
+\bibitem{ptttplwaefmm11}
+  I. Kabadshow, H. Dachsel,
+  \emph{Passing The Three Trillion Particle Limit With An Error-Controlled Fast Multipole Method}.
+  2011.
+\end{thebibliography}
+\end{document}
+
+
diff --git a/README.txt b/README.txt
index a504379e4d47900d09e3da4a724b69b43b1b0814..5e398ae57d8c9081bf4a4c6ae593cafb6d5db3af 100644
--- a/README.txt
+++ b/README.txt
@@ -10,6 +10,7 @@ cmake ..
 # Or if you want to use MPI
 cmake .. -DScalFMM_USE_MPI=ON
 
+
 # Configure, for example with:
 ccmake ..
 # turn on/off the options you want
@@ -29,6 +30,3 @@ cd scalfmm/Build
 make doc
 # This will create a Html dir
 browser scalfmm/Build/Doc/html/index.html
-
-
-
diff --git a/Src/Arranger/FOctreeArranger.hpp b/Src/Arranger/FOctreeArranger.hpp
index 79a66e5b27ae201a7bc16caa26206494e06f7041..39d88146ba18665f7f79fb8b56f6fa05212c866a 100755
--- a/Src/Arranger/FOctreeArranger.hpp
+++ b/Src/Arranger/FOctreeArranger.hpp
@@ -19,7 +19,7 @@
 #include "../Utils/FGlobal.hpp"
 #include "../Utils/FPoint.hpp"
 #include "../Containers/FVector.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 
 #include "../Utils/FGlobalPeriodic.hpp"
 
@@ -37,13 +37,13 @@
 * tree.
 */
 template <class OctreeClass, class ContainerClass, class ParticleClass, class ConverterClass >
-class FOctreeArranger : FAssertable {
+class FOctreeArranger {
     OctreeClass* const tree; //< The tree to work on
 
 public:
     /** Basic constructor */
     FOctreeArranger(OctreeClass* const inTree) : tree(inTree) {
-        fassert(tree, "Tree cannot be null", __LINE__ , __FILE__ );
+        FAssertLF(tree, "Tree cannot be null" );
     }
 
     /** Arrange */
diff --git a/Src/Arranger/FOctreeArrangerProc.hpp b/Src/Arranger/FOctreeArrangerProc.hpp
index c6b751672795d3384b58934ebdd551fa3e8f532c..cdbc0710d7b65b1793b919960b61cea16a1066dc 100755
--- a/Src/Arranger/FOctreeArrangerProc.hpp
+++ b/Src/Arranger/FOctreeArrangerProc.hpp
@@ -18,7 +18,7 @@
 
 #include "../Utils/FGlobal.hpp"
 #include "../Containers/FVector.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FMpi.hpp"
 
 #include "../Utils/FGlobalPeriodic.hpp"
@@ -36,7 +36,7 @@
   * tree.
   */
 template <class OctreeClass, class ContainerClass, class ParticleClass, class ConverterClass >
-class FOctreeArrangerProc : FAssertable {
+class FOctreeArrangerProc  {
     /** Interval is the min/max morton index
       * for a proc
       */
@@ -64,7 +64,7 @@ class FOctreeArrangerProc : FAssertable {
 public:
     /** Basic constructor */
     FOctreeArrangerProc(OctreeClass* const inTree) : tree(inTree) {
-        fassert(tree, "Tree cannot be null", __LINE__ , __FILE__ );
+        FAssertLF(tree, "Tree cannot be null");
     }
 
     /** return false if the tree is empty after processing */
diff --git a/Src/Components/FAbstractSendable.hpp b/Src/Components/FAbstractSendable.hpp
index 01bb3c1bee5be948e15a419e045d127c9ee5e2bb..b3c37eaa50d9214a533d109264b50b640cb38050 100755
--- a/Src/Components/FAbstractSendable.hpp
+++ b/Src/Components/FAbstractSendable.hpp
@@ -16,9 +16,6 @@
 #ifndef FABSTRACTSENDABLE_HPP
 #define FABSTRACTSENDABLE_HPP
 
-class FBufferReader;
-class FBufferWriter;
-
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
 * @class FAbstractSendable
@@ -36,18 +33,30 @@ protected:
     ///////////////////////////////////////////////
 
     /** Save your data */
-    virtual void serializeUp(FBufferWriter&) const  = 0;
+    template <class BufferWriterClass>
+    void serializeUp(BufferWriterClass&) const{
+        static_assert(sizeof(BufferWriterClass) == 0 , "Your class should implement serializeUp");
+    }
     /** Retrieve your data */
-    virtual void deserializeUp(FBufferReader&) = 0;
+    template <class BufferReaderClass>
+    void deserializeUp(BufferReaderClass&){
+        static_assert(sizeof(BufferWriterClass) == 0 , "Your class should implement deserializeUp");
+    }
 
     ///////////////////////////////////////////////
     // For Downward pass
     ///////////////////////////////////////////////
 
     /** Save your data */
-    virtual void serializeDown(FBufferWriter&) const = 0;
+    template <class BufferWriterClass>
+    void serializeDown(BufferWriterClass&) const{
+        static_assert(sizeof(BufferWriterClass) == 0 , "Your class should implement serializeDown");
+    }
     /** Retrieve your data */
-    virtual void deserializeDown(FBufferReader&) = 0;
+    template <class BufferReaderClass>
+    void deserializeDown(BufferReaderClass&){
+        static_assert(sizeof(BufferWriterClass) == 0 , "Your class should implement deserializeDown");
+    }
 };
 
 
diff --git a/Src/Components/FAbstractSerializable.hpp b/Src/Components/FAbstractSerializable.hpp
index 5a31dc72448157b61aeeae88ecf6cc78deaa6dbd..58d4732c8b6426193eeb7fa399d68a605365b67f 100755
--- a/Src/Components/FAbstractSerializable.hpp
+++ b/Src/Components/FAbstractSerializable.hpp
@@ -16,8 +16,6 @@
 #ifndef FABSTRACTSERIALIZABLE_HPP
 #define FABSTRACTSERIALIZABLE_HPP
 
-class FBufferReader;
-class FBufferWriter;
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -29,8 +27,14 @@ class FBufferWriter;
 */
 class FAbstractSerializable {
 protected:
-    virtual void save(FBufferWriter&) const  = 0;
-    virtual void restore(FBufferReader&) = 0;
+    template <class BufferWriterClass>
+    void save(BufferWriterClass&) const{
+        static_assert(sizeof(BufferWriterClass) == 0 , "Your class should implement save");
+    }
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass&){
+        static_assert(sizeof(BufferReaderClass) == 0 , "Your class should implement restore");
+    }
 };
 
 #endif // FABSTRACTSERIALIZABLE_HPP
diff --git a/Src/Components/FBasicCell.hpp b/Src/Components/FBasicCell.hpp
index 4c25249b4d763b7e1dc7cfb7e38286e5a4491184..e5abafe9534f4effe144cfbb51900bb849389577 100755
--- a/Src/Components/FBasicCell.hpp
+++ b/Src/Components/FBasicCell.hpp
@@ -16,7 +16,7 @@
 #ifndef FBASICCELL_HPP
 #define FBASICCELL_HPP
 
-
+#include "FAbstractSerializable.hpp"
 #include "../Extensions/FExtendMortonIndex.hpp"
 #include "../Extensions/FExtendCoordinate.hpp"
 
@@ -34,19 +34,21 @@
 *
 *
 */
-class FBasicCell : public FExtendMortonIndex, public FExtendCoordinate {
+class FBasicCell : public FExtendMortonIndex, public FExtendCoordinate, public FAbstractSerializable {
 public:
     /** Default destructor */
     virtual ~FBasicCell(){
     }
 
     /** Save the current cell in a buffer */
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         FExtendMortonIndex::save(buffer);
         FExtendCoordinate::save(buffer);
     }
     /** Restore the current cell from a buffer */
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         FExtendMortonIndex::restore(buffer);
         FExtendCoordinate::restore(buffer);
     }
diff --git a/Src/Components/FBasicParticleContainer.hpp b/Src/Components/FBasicParticleContainer.hpp
index 1c09c2609da6c5f16d5102cb017409c5c0ccfce0..1509eaa9cc28758331485e9b6333c87f9892273f 100755
--- a/Src/Components/FBasicParticleContainer.hpp
+++ b/Src/Components/FBasicParticleContainer.hpp
@@ -17,6 +17,7 @@
 #define FBASICPARTICLECONTAINER_HPP
 
 #include "FAbstractParticleContainer.hpp"
+#include "FAbstractSerializable.hpp"
 
 #include "../Utils/FAlignedMemory.hpp"
 #include "../Utils/FMath.hpp"
@@ -42,7 +43,7 @@
 * @code AStruct* strucs = container.getAttributes<0>();
 */
 template <unsigned NbAttributesPerParticle, class AttributeClass = FReal >
-class FBasicParticleContainer : public FAbstractParticleContainer {
+class FBasicParticleContainer : public FAbstractParticleContainer, public FAbstractSerializable {
 protected:
     /** The number of particles in the container */
     int nbParticles;
@@ -248,11 +249,12 @@ public:
 
     /** The size to send a leaf */
     int getSavedSize() const{
-        return int(nbParticles * (3 * sizeof(FReal) + NbAttributesPerParticle * sizeof(AttributeClass)));
+      return int(sizeof(nbParticles) + nbParticles * (3 * sizeof(FReal) + NbAttributesPerParticle * sizeof(AttributeClass)));
     }
 
     /** Save the current cell in a buffer */
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         buffer << nbParticles;
         for(int idx = 0 ; idx < 3 ; ++idx){
             buffer.write(positions[idx], nbParticles);
@@ -262,7 +264,8 @@ public:
         }
     }
     /** Restore the current cell from a buffer */
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         buffer >> nbParticles;
         if( nbParticles >= allocatedParticles ){
             // allocate memory
diff --git a/Src/Components/FTestCell.hpp b/Src/Components/FTestCell.hpp
index 1acc12264dd9533d0ad37a99d6328f89a51c983f..1ff98841728900c852390535c673ce1a8f8e7320 100755
--- a/Src/Components/FTestCell.hpp
+++ b/Src/Components/FTestCell.hpp
@@ -16,7 +16,6 @@
 #ifndef FTESTCELL_HPP
 #define FTESTCELL_HPP
 
-
 #include "FBasicCell.hpp"
 
 /**
@@ -63,13 +62,15 @@ public:
     /////////////////////////////////////////////////
 
     /** Save the current cell in a buffer */
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         FBasicCell::save(buffer);
         buffer << dataDown << dataUp;
     }
 
     /** Restore the current cell from a buffer */
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         FBasicCell::restore(buffer);
         buffer >> dataDown >> dataUp;
     }
@@ -77,22 +78,27 @@ public:
     /////////////////////////////////////////////////
 
     /** Serialize only up data in a buffer */
-    void serializeUp(FBufferWriter& buffer) const {
+    template <class BufferWriterClass>
+    void serializeUp(BufferWriterClass& buffer) const {
         buffer << this->dataUp;
     }
     /** Deserialize only up data in a buffer */
-    void deserializeUp(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeUp(BufferReaderClass& buffer){
         buffer >> this->dataUp;
     }
 
     /** Serialize only down data in a buffer */
-    void serializeDown(FBufferWriter& buffer) const {
+    template <class BufferWriterClass>
+    void serializeDown(BufferWriterClass& buffer) const {
         buffer << this->dataDown;
     }
     /** Deserialize only up data in a buffer */
-    void deserializeDown(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeDown(BufferReaderClass& buffer){
         buffer >> this->dataDown;
     }
+
 };
 
 
diff --git a/Src/Components/FTestKernels.hpp b/Src/Components/FTestKernels.hpp
index e2aeacc72fd5f94e754e4e3f622bf1a9c6df7b95..bed6b0486612faa04644daf2649a3ff42480858b 100755
--- a/Src/Components/FTestKernels.hpp
+++ b/Src/Components/FTestKernels.hpp
@@ -186,8 +186,9 @@ void ValidateFMMAlgo(OctreeClass* const tree){
             for(int idxPart = 0 ; idxPart < octreeIterator.getCurrentListTargets()->getNbParticles() ; ++idxPart){
                 if( (!isUsingTsm && dataDown[idxPart] != NbPart - 1) ||
                     (isUsingTsm && dataDown[idxPart] != NbPart) ){
-                    std::cout << "Problem L2P + P2P : " << dataDown[idxPart] <<
-                                 "(" << octreeIterator.getCurrentGlobalIndex() << ")\n";
+                    std::cout << "Problem L2P + P2P : " << dataDown[idxPart] << ", " <<
+                                 " NbPart : " << NbPart << ", " <<
+                                 " ( Index " << octreeIterator.getCurrentGlobalIndex() << ")\n";
                 }
             }
         } while(octreeIterator.moveRight());
diff --git a/Src/Components/FTypedLeaf.hpp b/Src/Components/FTypedLeaf.hpp
index 1fc1a65d14f5bbc30a8a2ab1ea2ef59cd0d780b2..ad4694424963c82ecb98d875d48165652e7ba6a0 100755
--- a/Src/Components/FTypedLeaf.hpp
+++ b/Src/Components/FTypedLeaf.hpp
@@ -17,7 +17,7 @@
 #define FTYPEDLEAF_HPP
 
 
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "FAbstractLeaf.hpp"
 #include "FParticleType.hpp"
 
@@ -33,7 +33,7 @@
 * Particles should be typed to enable targets/sources difference.
 */
 template< class ContainerClass>
-class FTypedLeaf  : public FAbstractLeaf<ContainerClass>, public FAssertable {
+class FTypedLeaf  : public FAbstractLeaf<ContainerClass> {
     ContainerClass sources; //< The sources containers
     ContainerClass targets; //< The targets containers
 
diff --git a/Src/Containers/FAbstractBuffer.hpp b/Src/Containers/FAbstractBuffer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..853de59f045116bca5fd725f6471a135761f8d83
--- /dev/null
+++ b/Src/Containers/FAbstractBuffer.hpp
@@ -0,0 +1,66 @@
+#ifndef FABSTRACTBUFFER_HPP
+#define FABSTRACTBUFFER_HPP
+
+class FAbstractBufferReader {
+public:
+    virtual ~FAbstractBufferReader(){
+    }
+
+    virtual char* data() = 0;
+    virtual const char* data() const  = 0;
+    virtual int getSize() const = 0;
+    virtual void seek(const int inIndex) = 0;
+    virtual int tell() const  = 0;
+
+    template <class ClassType>
+    ClassType getValue(){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement getValue.");
+        return ClassType();
+    }
+    template <class ClassType>
+    void fillValue(ClassType* const){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement fillValue.");
+    }
+    template <class ClassType>
+    void fillArray(ClassType* const , const int ){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement fillArray.");
+    }
+    template <class ClassType>
+    FAbstractBufferReader& operator>>(ClassType& ){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement operator>>.");
+        return *this;
+    }
+};
+
+
+
+class FAbstractBufferWriter {
+public:
+    virtual ~FAbstractBufferWriter(){
+    }
+
+    virtual char* data() = 0;
+    virtual const char* data()  const = 0;
+    virtual int getSize() const = 0;
+    virtual void reset() = 0;
+
+    template <class ClassType>
+    void write(const ClassType& object){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement write.");
+    }
+    template <class ClassType>
+    void writeAt(const int position, const ClassType& object){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement writeAt.");
+    }
+    template <class ClassType>
+    void write(const ClassType* const objects, const int inSize){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement write.");
+    }
+    template <class ClassType>
+    FAbstractBufferWriter& operator<<(const ClassType& ){
+        static_assert(sizeof(ClassType) == 0, "Your Buffer should implement operator<<.");
+        return *this;
+    }
+};
+
+#endif // FABSTRACTBUFFER_HPP
diff --git a/Src/Containers/FBufferReader.hpp b/Src/Containers/FBufferReader.hpp
index 2355c08ab95049fccfb3b12c2827cc81a83876bf..15041f27417808efce12d18b3c23f8633ca20e9f 100755
--- a/Src/Containers/FBufferReader.hpp
+++ b/Src/Containers/FBufferReader.hpp
@@ -17,7 +17,7 @@
 #define FBUFFERREADER_HPP
 
 #include "FVector.hpp"
-
+#include "FAbstractBuffer.hpp"
 
 /** @author Berenger Bramas
   * This class provide a fast way to manage a memory and convert
@@ -26,7 +26,7 @@
   * Specifie the needed space with reserve, then fill it with data
   * finaly read and convert.
   */
-class FBufferReader {
+class FBufferReader : public FAbstractBufferReader {
     FVector<char> buffer;   //< The memory buffer
     int index;              //< The current index reading position
 
diff --git a/Src/Containers/FBufferWriter.hpp b/Src/Containers/FBufferWriter.hpp
index 7111f6a2708ad3c1cc04ba304896a480c2990868..17fb48e57da44a82f166f2562c92e6ee3bb9eb13 100755
--- a/Src/Containers/FBufferWriter.hpp
+++ b/Src/Containers/FBufferWriter.hpp
@@ -17,6 +17,7 @@
 #define FBUFFERWRITER_HPP
 
 #include "FVector.hpp"
+#include "FAbstractBuffer.hpp"
 
 /** @author Berenger Bramas
   * This class provide a fast way to manage a memory and fill it
@@ -25,7 +26,7 @@
   * then insert back if needed
   * finaly use data pointer as you like
   */
-class FBufferWriter {
+class FBufferWriter : public FAbstractBufferWriter {
 private:
     FVector<char> buffer; //< The buffer
 
diff --git a/Src/Containers/FMpiBufferReader.hpp b/Src/Containers/FMpiBufferReader.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e99c06e70bad178314c8963dbab3f25f6b910c6f
--- /dev/null
+++ b/Src/Containers/FMpiBufferReader.hpp
@@ -0,0 +1,140 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.  
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info". 
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#ifndef FMPIBUFFERREADER_HPP
+#define FMPIBUFFERREADER_HPP
+
+#include <memory>
+#include "../Utils/FMpi.hpp"
+#include "FAbstractBuffer.hpp"
+
+
+/** @author Cyrille Piacibello
+ * This class provide the same features as FBufferWriter using MPI_Pack system
+ *
+ * Put some data
+ * then insert back if needed
+ * finally use data pointer as you like
+ */
+class FMpiBufferReader : public FAbstractBufferReader {
+  const MPI_Comm comm;            //< Communicator needed by MPI_Pack functions
+  const int arrayCapacity;        //< Allocated space
+  std::unique_ptr<char[]> array;  //< Allocated Array
+  int currentIndex;
+
+  /** Test and exit if not enought space */
+  void assertRemainingSpace(const size_t requestedSpace) const {
+    if(int(currentIndex + requestedSpace) > arrayCapacity){
+      printf("Error FMpiBufferReader has not enough space\n");
+      exit(0);
+    }
+  }
+
+public :
+  /*Constructor with a default arrayCapacity of 512 bytes */
+  FMpiBufferReader(const MPI_Comm inComm, const int inCapacity = 512):
+    comm(inComm),
+    arrayCapacity(inCapacity),
+    array(new char[inCapacity]),
+    currentIndex(0)
+  {}
+  
+  /** Destructor
+   */
+  virtual ~FMpiBufferReader(){
+  }
+  
+  /** Get allocated memory pointer */
+  char* data(){
+    return array.get();
+  }
+  
+  /** Get allocated memory pointer */
+  const char* data() const {
+    return array.get();
+  }
+  
+  /** get the filled space */
+  int getSize() const{
+    return currentIndex;
+  }
+
+  /** Size of the memory initialized */
+  int getCapacity() const{
+    return arrayCapacity;
+  }
+
+  /** Move the read index to a position */
+  void seek(const int inIndex){
+    if(inIndex > arrayCapacity){
+      printf("FMpiBufferReader :: Aborting :: Can't move index because buffer isn't long enough");
+      exit(0);
+    }
+    else{
+      currentIndex = inIndex;
+    }
+  }
+
+  /** Get the read position */
+  int tell() const {
+    return currentIndex;
+  }
+  
+  /** Get a value with memory cast */
+  template <class ClassType>
+  ClassType getValue(){
+    ClassType value;
+    int previousIndex = currentIndex;
+    seek(sizeof(value) + previousIndex);
+    MPI_Unpack(array.get(),arrayCapacity,&previousIndex,&value,1,FMpi::GetType(value),comm);
+    return value;
+  }
+
+  /** Get a value with memory cast at a specified index */
+  template <class ClassType>
+  ClassType getValue(const int ind){
+    ClassType value;
+    int previousIndex = ind;
+    seek(sizeof(value)+ind);
+    MPI_Unpack(array.get(),arrayCapacity,&previousIndex,&value,1,FMpi::GetType(value),comm);
+    return value;
+  }
+
+  /** Fill a value with memory cast */
+  template <class ClassType>
+  void fillValue(ClassType* const inValue){
+    int previousIndex = currentIndex;
+    seek(sizeof(ClassType) + previousIndex);
+    MPI_Unpack(array.get(),arrayCapacity,&previousIndex,inValue,1,FMpi::GetType(*inValue),comm);
+  }
+
+  /** Fill one/many value(s) with memcpy */
+  template <class ClassType>
+  void fillArray(ClassType* const inArray, const int inSize){
+    int previousIndex = currentIndex;
+    seek(sizeof(ClassType) * inSize + previousIndex);
+    MPI_Unpack(array.get(),arrayCapacity,&previousIndex,inArray,inSize,FMpi::GetType(*inArray),comm);
+  }
+
+  /** Same as fillValue */
+  template <class ClassType>
+  FMpiBufferReader& operator>>(ClassType& object){
+    fillValue(&object);
+    return *this;
+  }
+
+};
+#endif
+
diff --git a/Src/Containers/FMpiBufferWriter.hpp b/Src/Containers/FMpiBufferWriter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..081bb4f82aeacb711d10d24cb7e0ea33f7928e78
--- /dev/null
+++ b/Src/Containers/FMpiBufferWriter.hpp
@@ -0,0 +1,130 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.  
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info". 
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#ifndef FMPIBUFFERWRITER_HPP
+#define FMPIBUFFERWRITER_HPP
+
+#include <memory>
+#include "../Utils/FMpi.hpp"
+#include "FAbstractBuffer.hpp"
+
+/** @author Cyrille Piacibello
+ * This class provide the same features as FBufferWriter using MPI_Pack system
+ *
+ * Put some data
+ * then insert back if needed
+ * finally use data pointer as you like
+ */
+class FMpiBufferWriter : public FAbstractBufferWriter {
+  const MPI_Comm mpiComm;         //< Communicator needed by MPI_Pack functions
+  const int arrayCapacity;        //< Allocated Space
+  std::unique_ptr<char[]> array;  //< Allocated Array
+  int currentIndex;               //< Currently filled space
+
+  /** Test and exit if not enought space */
+  void assertRemainingSpace(const size_t requestedSpace) const {
+    if(int(currentIndex + requestedSpace) > arrayCapacity){
+      printf("Error FMpiBufferWriter has not enough space\n");
+      exit(0);
+    }
+  }
+
+public:
+  /** Constructor with a default arrayCapacity of 512 bytes */
+  FMpiBufferWriter(const MPI_Comm inComm, const int inCapacity = 1024):
+    mpiComm(inComm), 
+    arrayCapacity(inCapacity), 
+    array(new char[inCapacity]), 
+    currentIndex(0)
+  {}
+
+  /** Destructor */
+  virtual ~FMpiBufferWriter(){
+  }
+
+  /** Get allocated memory pointer */
+  char* data(){
+    return array.get();
+  }
+
+  /** Get allocated memory pointer */
+  const char* data() const {
+    return array.get();
+  }
+
+  /** Get the filled space */
+  int getSize() const {
+    return currentIndex;
+  }
+
+  /** Get the allocated space */
+  int getCapacity() const {
+    return arrayCapacity;
+  }
+
+  /** Write data by packing cpy */
+  template <class ClassType>
+  void write(const ClassType& object){
+    //printf("Space need in the write : %d, index set on %d \n",sizeof(ClassType),currentIndex);
+    assertRemainingSpace(sizeof(ClassType));
+    MPI_Pack(const_cast<ClassType*>(&object), 1, FMpi::GetType(object), array.get(), arrayCapacity, &currentIndex, mpiComm);
+  }
+
+  /**
+   * Allow to pass rvalue to write
+   */
+  template <class ClassType>
+  void write(const ClassType&& object){
+    // printf("Space need in the write : %d \n",sizeof(ClassType));
+    assertRemainingSpace(sizeof(ClassType));
+    MPI_Pack(const_cast<ClassType*>(&object), 1, FMpi::GetType(object), array.get(), arrayCapacity, &currentIndex, mpiComm);
+  }
+
+  /** Write back, position + sizeof(object) has to be < size */
+  template <class ClassType>
+  void writeAt(const int position, const ClassType& object){
+    if(position + (int) sizeof(ClassType) > currentIndex){
+      printf("Not enought space\n");
+      exit(0);
+    }
+    int noConstPosition = position;
+    MPI_Pack(const_cast<ClassType*>(&object), 1, FMpi::GetType(object), array.get(), arrayCapacity, &noConstPosition, mpiComm);
+  }
+
+  /** Write an array
+   * Warning : inSize is a number of ClassType object to write, not a size in bytes
+   */
+  template <class ClassType>
+  void write(const ClassType* const objects, const int inSize){
+    //    printf("Space need in the write : %d, index set on %d, and capacity is %d \n",sizeof(ClassType)*inSize,currentIndex,arrayCapacity);
+    assertRemainingSpace(sizeof(ClassType) * inSize);
+    MPI_Pack( const_cast<ClassType*>(objects), inSize, FMpi::GetType(*objects), array.get(), arrayCapacity, &currentIndex, mpiComm);
+  }
+
+  /** Equivalent to write */
+  template <class ClassType>
+  FMpiBufferWriter& operator<<(const ClassType& object){
+    write(object);
+    return *this;
+  }
+
+  /** Reset the writing index, but do not change the arrayCapacity */
+  void reset(){
+    currentIndex = 0;
+  }
+};
+
+
+#endif // FBUFFERWRITER_HPP
diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp
index 928d591f450b6c87b0cb07be0e4702ef23b8d95a..e79e267f84f5f47aa3d17c882ed4744cef74f2d1 100755
--- a/Src/Containers/FOctree.hpp
+++ b/Src/Containers/FOctree.hpp
@@ -28,7 +28,7 @@
 #include "../Utils/FPoint.hpp"
 #include "../Utils/FMath.hpp"
 #include "../Utils/FNoCopyable.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 
 
 /**
@@ -53,7 +53,7 @@
  * CellAllocator can be FListBlockAllocator<CellClass, 10> or FBasicBlockAllocator<CellClass>
  */
 template< class CellClass, class ContainerClass, class LeafClass, class CellAllocatorClass = FListBlockAllocator<CellClass, 15> >
-class FOctree : protected FAssertable, public FNoCopyable {
+class FOctree : public FNoCopyable {
     typedef FSubOctreeWithLeafs< CellClass , ContainerClass, LeafClass, CellAllocatorClass> SubOctreeWithLeaves;
     typedef FSubOctree< CellClass , ContainerClass, LeafClass, CellAllocatorClass> SubOctree;
 
@@ -93,14 +93,10 @@ class FOctree : protected FAssertable, public FNoCopyable {
         * @return the box num at the leaf level that contains inRelativePosition
         */
     int getTreeCoordinate(const FReal inRelativePosition) const {
-        FLOG( fassert(inRelativePosition >= 0 && inRelativePosition < this->boxWidth, "Particle out of box", __LINE__, __FILE__) );
-        const FReal indexFReal = inRelativePosition / this->boxWidthAtLevel[this->leafIndex];
-        /*const int index = int(FMath::dfloor(indexFReal));
-        if( index && FMath::LookEqual(inRelativePosition, this->boxWidthAtLevel[this->leafIndex] * FReal(index) ) ){
-            return index - 1;
-        }
-        return index;*/
-        return static_cast<int>(indexFReal);
+      FAssertLF( (inRelativePosition >= 0 && inRelativePosition < this->boxWidth), "inRelativePosition : ",inRelativePosition );
+      FAssertLF( inRelativePosition >= 0 && inRelativePosition < this->boxWidth, "Particle out of box" );
+      const FReal indexFReal = inRelativePosition / this->boxWidthAtLevel[this->leafIndex];
+      return static_cast<int>(indexFReal);
     }
 
 public:
@@ -117,7 +113,7 @@ public:
           height(inHeight) , subHeight(inSubHeight), leafIndex(this->height-1),
           boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth)
     {
-        fassert(subHeight <= height - 1, "Subheight cannot be greater than height", __LINE__, __FILE__ );
+        FAssertLF(subHeight <= height - 1, "Subheight cannot be greater than height", __LINE__, __FILE__ );
         // Does we only need one suboctree?
         if(subHeight == height - 1){
             root = new FSubOctreeWithLeafs< CellClass , ContainerClass, LeafClass,CellAllocatorClass>(0, 0, this->subHeight, 1);
@@ -271,7 +267,7 @@ public:
           * It uses the left right limit on each suboctree and their morton index.
           * Please have a look to the move functions to understand how the system is working.
           */
-    class Iterator : protected FAssertable {
+    class Iterator  {
         SubOctreeTypes current; //< Current suboctree
 
         int currentLocalIndex;  //< Current index (array position) in the current_suboctree.cells[ currentLocalLevel ]
@@ -295,8 +291,8 @@ public:
             */
         explicit Iterator(FOctree* const inTarget)
             : currentLocalIndex(0) , currentLocalLevel(0) {
-            fassert(inTarget, "Target for Octree::Iterator cannot be null", __LINE__, __FILE__);
-            fassert(inTarget->root->getRightLeafIndex() >= 0, "Octree seems to be empty, getRightLeafIndex == 0", __LINE__, __FILE__);
+            FAssertLF(inTarget, "Target for Octree::Iterator cannot be null", __LINE__, __FILE__);
+            FAssertLF(inTarget->root->getRightLeafIndex() >= 0, "Octree seems to be empty, getRightLeafIndex == 0", __LINE__, __FILE__);
 
             // Start by the root
             this->current.tree = inTarget->root;
diff --git a/Src/Containers/FSubOctree.hpp b/Src/Containers/FSubOctree.hpp
index 5a5258217f35d7036f61798b9fadc5dc7fb4c711..491c8c298482b2f11d41bc4b17928691d24f3efa 100755
--- a/Src/Containers/FSubOctree.hpp
+++ b/Src/Containers/FSubOctree.hpp
@@ -19,7 +19,7 @@
 
 #include "../Utils/FGlobal.hpp"
 #include "../Utils/FPoint.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FMath.hpp"
 
 #include "FTreeCoordinate.hpp"
@@ -48,7 +48,7 @@
  * @warning Give the particleClass & cellClass
  */
 template< class CellClass , class ContainerClass, class LeafClass, class CellAllocatorClass>
-class FAbstractSubOctree : protected FAssertable{
+class FAbstractSubOctree {
 protected:
 
     CellClass*** cells;		            //< Potential cells, cells are allocated only if needed
@@ -187,7 +187,7 @@ public:
                         subOctreeHeight( inSubOctreeHeight ), subOctreePosition( inSubOctreePosition ), isLeafSubtree(inIsLeafSubtree) {
 
         this->cells = new CellClass**[this->subOctreeHeight];
-        fassert(this->cells, "Allocation failled", __LINE__, __FILE__);
+        FAssertLF(this->cells, "Allocation failled");
 
         memset(this->cells, 0, sizeof(CellClass**) * subOctreeHeight);
 
@@ -195,7 +195,7 @@ public:
         int cellsAtlevel = 8;
         for( int indexLevel = 0 ; indexLevel < this->subOctreeHeight ; ++indexLevel ){
             this->cells[indexLevel] = new CellClass*[cellsAtlevel];
-            fassert(this->cells[indexLevel], "Allocation failled", __LINE__, __FILE__);
+            FAssertLF(this->cells[indexLevel], "Allocation failled");
 
             memset(this->cells[indexLevel], 0, sizeof(CellClass*) * cellsAtlevel);
 
@@ -278,7 +278,7 @@ public:
       * @param level the level to access cells array (must be < subOctreeHeight)
       * @return cells[level] */
     CellClass** cellsAt(const int level) const{
-        fassert(level < subOctreeHeight, "Level out of memory", __LINE__, __FILE__);
+        FAssertLF(level < subOctreeHeight, "Level out of memory");
         return cells[level];
     }
 
@@ -360,7 +360,7 @@ public:
         const int cellsAtLeafLevel = 1 << (3 * inSubOctreeHeight);
 
         this->leafs = new LeafClass*[cellsAtLeafLevel];
-        Parent::fassert(this->leafs, "Allocation failled", __LINE__, __FILE__);
+        FAssertLF(this->leafs, "Allocation failled");
 
         memset(leafs, 0, sizeof(LeafClass*) * cellsAtLeafLevel);
     }
@@ -507,7 +507,7 @@ public:
         const int cellsAtLeafLevel = 1 << (3 * inSubOctreeHeight);
 
         this->subleafs = new Parent*[cellsAtLeafLevel];
-        Parent::fassert(this->subleafs, "Allocation failled", __LINE__, __FILE__);
+        FAssertLF(this->subleafs, "Allocation failled");
 
         memset(subleafs, 0, sizeof(Parent**) * cellsAtLeafLevel);
     }
diff --git a/Src/Containers/FTreeCoordinate.hpp b/Src/Containers/FTreeCoordinate.hpp
index 09625b8b3fd606fa81165ea6b381433b0b99db76..ec7b92f7c6a01ee144cbe9f63c38f3f972c34d5b 100755
--- a/Src/Containers/FTreeCoordinate.hpp
+++ b/Src/Containers/FTreeCoordinate.hpp
@@ -19,9 +19,9 @@
 #include <string>
 
 #include "../Utils/FGlobal.hpp"
-#include "../Containers/FBufferReader.hpp"
-#include "../Containers/FBufferWriter.hpp"
+#include "../Utils/FMath.hpp"
 
+#include "../Components/FAbstractSerializable.hpp"
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -33,7 +33,7 @@
 * It is directly related to morton index, as interleaves
 * bits from this coordinate make the morton index
 */
-class FTreeCoordinate{
+class FTreeCoordinate : public FAbstractSerializable {
 private:
     int data[3];	//< all box-th position
 
@@ -248,13 +248,16 @@ public:
     }
 
     /** Save current object */
-    void save(FBufferWriter& buffer) const {
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const {
         buffer << data[0] << data[1] << data[2];
     }
     /** Retrieve current object */
-    void restore(FBufferReader& buffer) {
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer) {
         buffer >> data[0] >> data[1] >> data[2];
     }
+  
 
     static std::string MortonToBinary(MortonIndex index, int level){
         std::string str;
@@ -272,6 +275,157 @@ public:
         }
         return str;
     }
+  
+  /* @brief Compute the index of the cells in neighborhood of a given cell  
+   * @param OtreeHeight Height of the Octree
+   * @param indexes target array to store the MortonIndexes computed
+   * @param indexInArray store 
+   */
+  int getNeighborsIndexes(const int OctreeHeight, MortonIndex indexes[26], int indexInArray[26]) const{
+    int idxNeig = 0;
+    int limite = 1 << (OctreeHeight - 1);
+    // We test all cells around
+    for(int idxX = -1 ; idxX <= 1 ; ++idxX){
+      if(!FMath::Between(this->getX() + idxX,0, limite)) continue;
+
+      for(int idxY = -1 ; idxY <= 1 ; ++idxY){
+	if(!FMath::Between(this->getY() + idxY,0, limite)) continue;
+
+	for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
+	  if(!FMath::Between(this->getZ() + idxZ,0, limite)) continue;
+
+	  // if we are not on the current cell
+	  if( idxX || idxY || idxZ ){
+	    const FTreeCoordinate other(this->getX() + idxX, this->getY() + idxY, this->getZ() + idxZ);
+	    indexes[ idxNeig ] = other.getMortonIndex(OctreeHeight - 1);
+	    indexInArray[ idxNeig ] = ((idxX+1)*3 + (idxY+1)) * 3 + (idxZ+1);
+	    ++idxNeig;
+	  }
+	}
+      }
+    }
+    return idxNeig;
+  }
+
+  
+  /* @brief Compute the indexes of the neighborhood of the calling cell  
+   * @param OtreeHeight Height of the Octree
+   * @param indexes target array to store the MortonIndexes computed
+   */
+  int getNeighborsIndexes(const int OctreeHeight, MortonIndex indexes[26]) const{
+    int idxNeig = 0;
+    int limite = 1 << (OctreeHeight - 1);
+    // We test all cells around
+    for(int idxX = -1 ; idxX <= 1 ; ++idxX){
+      if(!FMath::Between(this->getX() + idxX,0, limite)) continue;
+
+      for(int idxY = -1 ; idxY <= 1 ; ++idxY){
+	if(!FMath::Between(this->getY() + idxY,0, limite)) continue;
+
+	for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
+	  if(!FMath::Between(this->getZ() + idxZ,0, limite)) continue;
+
+	  // if we are not on the current cell
+	  if( idxX || idxY || idxZ ){
+	    const FTreeCoordinate other(this->getX() + idxX, this->getY() + idxY, this->getZ() + idxZ);
+	    indexes[ idxNeig ] = other.getMortonIndex(OctreeHeight - 1);
+	    ++idxNeig;
+	  }
+	}
+      }
+    }
+    return idxNeig;
+  }
+  
+  int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{
+    // Then take each child of the parent's neighbors if not in directNeighbors
+    // Father coordinate
+    const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1);
+
+    // Limite at parent level number of box (split by 2 by level)
+    const int limite = FMath::pow2(inLevel-1);
+
+    int idxNeighbors = 0;
+    // We test all cells around
+    for(int idxX = -1 ; idxX <= 1 ; ++idxX){
+      if(!FMath::Between(parentCell.getX() + idxX,0,limite)) continue;
+
+      for(int idxY = -1 ; idxY <= 1 ; ++idxY){
+	if(!FMath::Between(parentCell.getY() + idxY,0,limite)) continue;
+
+	for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
+	  if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue;
+
+	  // if we are not on the current cell
+	  if( idxX || idxY || idxZ ){
+	    const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ);
+	    const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1);
+
+	    // For each child
+	    for(int idxCousin = 0 ; idxCousin < 8 ; ++idxCousin){
+	      const int xdiff  = ((otherParent.getX()<<1) | ( (idxCousin>>2) & 1)) - this->getX();
+	      const int ydiff  = ((otherParent.getY()<<1) | ( (idxCousin>>1) & 1)) - this->getY();
+	      const int zdiff  = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ();
+
+	      // Test if it is a direct neighbor
+	      if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){
+		// add to neighbors
+		inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3;
+		inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin;
+	      }
+	    }
+	  }
+	}
+      }
+    }
+
+    return idxNeighbors;
+  }
+
+  int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189]) const{
+    // Then take each child of the parent's neighbors if not in directNeighbors
+    // Father coordinate
+    const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1);
+
+    // Limite at parent level number of box (split by 2 by level)
+    const int limite = FMath::pow2(inLevel-1);
+
+    int idxNeighbors = 0;
+    // We test all cells around
+    for(int idxX = -1 ; idxX <= 1 ; ++idxX){
+      if(!FMath::Between(parentCell.getX() + idxX,0,limite)) continue;
+
+      for(int idxY = -1 ; idxY <= 1 ; ++idxY){
+	if(!FMath::Between(parentCell.getY() + idxY,0,limite)) continue;
+
+	for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
+	  if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue;
+
+	  // if we are not on the current cell
+	  if( idxX || idxY || idxZ ){
+	    const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ);
+	    const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1);
+
+	    // For each child
+	    for(int idxCousin = 0 ; idxCousin < 8 ; ++idxCousin){
+	      const int xdiff  = ((otherParent.getX()<<1) | ( (idxCousin>>2) & 1)) - this->getX();
+	      const int ydiff  = ((otherParent.getY()<<1) | ( (idxCousin>>1) & 1)) - this->getY();
+	      const int zdiff  = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ();
+
+	      // Test if it is a direct neighbor
+	      if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){
+		// add to neighbors
+		inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin;
+	      }
+	    }
+	  }
+	}
+      }
+    }
+
+    return idxNeighbors;
+  }
+
 };
 
 
diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp
index 17ed2c7b88295227f0de8cba9b9748df24b8839b..f180110d035d78a7cc86ac79024fb7deb45c0440 100755
--- a/Src/Core/FFmmAlgorithm.hpp
+++ b/Src/Core/FFmmAlgorithm.hpp
@@ -18,7 +18,7 @@
 
 
 #include "../Utils/FGlobal.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -40,7 +40,7 @@
 * Of course this class does not deallocate pointer given in arguements.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithm : protected FAssertable, public FAbstractAlgorithm {
+class FFmmAlgorithm :  public FAbstractAlgorithm {
 
     OctreeClass* const tree;       //< The octree to work on
     KernelClass* const kernels;    //< The kernels
@@ -56,8 +56,8 @@ public:
     FFmmAlgorithm(OctreeClass* const inTree, KernelClass* const inKernels)
         : tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()) {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(kernels, "kernels cannot be null");
 
         FLOG(FLog::Controller << "FFmmAlgorithm\n");
     }
diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp
index 0cdfbfb19c96c09cf18df3468b641cce2d240627..127117ec5f7c65c177c70504b9bfff49e8640b7d 100755
--- a/Src/Core/FFmmAlgorithmPeriodic.hpp
+++ b/Src/Core/FFmmAlgorithmPeriodic.hpp
@@ -19,7 +19,7 @@
 
 #include "../Utils/FGlobal.hpp"
 #include "../Utils/FGlobalPeriodic.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -42,7 +42,7 @@
 * Of course this class does not deallocate pointer given in arguments.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmPeriodic : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmPeriodic : public FAbstractAlgorithm{
 
     OctreeClass* const tree;        //< The octree to work on
     KernelClass* kernels;           //< The kernels
@@ -66,8 +66,8 @@ public:
           nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3),
           periodicDirections(inPeriodicDirections) {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(-1 <= inUpperLevel, "inUpperLevel cannot be < -1", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
 
         FLOG(FLog::Controller << "FFmmAlgorithmPeriodic\n");
     }
@@ -85,7 +85,7 @@ public:
       * Call this function to run the complete algorithm
       */
     void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
-        fassert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+        FAssertLF(kernels, "kernels cannot be null");
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );       
 
         if(operationsToProceed & FFmmP2M) bottomPass();
diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp
index 03ce8718e787921e9c34fe8f5e422916c5be68ad..e044bb8fef67b672b08f9c9494677e57f720a553 100755
--- a/Src/Core/FFmmAlgorithmSectionTask.hpp
+++ b/Src/Core/FFmmAlgorithmSectionTask.hpp
@@ -18,7 +18,7 @@
 
 
 #include "../Utils/FGlobal.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -40,7 +40,7 @@
 * Of course this class does not deallocate pointer given in arguements.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmSectionTask : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmSectionTask : public FAbstractAlgorithm{
 
     OctreeClass* const tree;       //< The octree to work on
     KernelClass** kernels;    //< The kernels
@@ -60,8 +60,8 @@ public:
           MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight())
     {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(inKernels, "kernels cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(inKernels, "kernels cannot be null");
 
         this->kernels = new KernelClass*[MaxThreads];
         for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp
index 82068dae05b0f83d681660c6ddff71739b9cdcd9..0c21ff31528f8a9785224bc779b6166aa50b9d80 100755
--- a/Src/Core/FFmmAlgorithmTask.hpp
+++ b/Src/Core/FFmmAlgorithmTask.hpp
@@ -18,7 +18,7 @@
 
 
 #include "../Utils/FGlobal.hpp"
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -40,7 +40,7 @@
 * Of course this class does not deallocate pointer given in arguements.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmTask : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmTask : public FAbstractAlgorithm{
 
     OctreeClass* const tree;       //< The octree to work on
     KernelClass** kernels;    //< The kernels
@@ -60,8 +60,8 @@ public:
           MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight())
     {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(inKernels, "kernels cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(inKernels, "kernels cannot be null");
 
         this->kernels = new KernelClass*[MaxThreads];
         for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp
index 97cc18dec909fd4abab4e956a81409cbd75e4455..09dcb9100dfc57fe1dfd63931dc1c26bfac7a085 100755
--- a/Src/Core/FFmmAlgorithmThread.hpp
+++ b/Src/Core/FFmmAlgorithmThread.hpp
@@ -17,7 +17,7 @@
 #define FFMMALGORITHMTHREAD_HPP
 
 
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -45,7 +45,7 @@
 * When using this algorithm the P2P is thread safe.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmThread : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmThread : public FAbstractAlgorithm{
     OctreeClass* const tree;                  //< The octree to work on
     KernelClass** kernels;                    //< The kernels
 
@@ -69,7 +69,7 @@ public:
         : tree(inTree) , kernels(0), iterArray(0), leafsNumber(0),
           MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
 
         this->kernels = new KernelClass*[MaxThreads];
         for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
@@ -109,7 +109,7 @@ public:
 
         } while(octreeIterator.moveRight());
         iterArray = new typename OctreeClass::Iterator[leafsNumber];
-        fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
+        FAssertLF(iterArray, "iterArray bad alloc");
 
         if(operationsToProceed & FFmmP2M) bottomPass();
 
diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index e9918901cd73db40da90cee54c595586c38511dd..5b9171413d39bc0659704231177441164f1b588b 100755
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -19,7 +19,7 @@
 #include <omp.h>
 
 //
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -31,6 +31,8 @@
 
 #include "../Containers/FBufferWriter.hpp"
 #include "../Containers/FBufferReader.hpp"
+#include "../Containers/FMpiBufferWriter.hpp"
+#include "../Containers/FMpiBufferReader.hpp"
 
 #include "../Utils/FMpi.hpp"
 
@@ -38,1314 +40,1263 @@
 #include "FCoreCommon.hpp"
 
 /**
-* @author Berenger Bramas (berenger.bramas@inria.fr)
-* @class FFmmAlgorithmThreadProc
-* @brief
-* Please read the license
-*
-* This class is a threaded FMM algorithm with mpi.
-* It just iterates on a tree and call the kernels with good arguments.
-* It used the inspector-executor model :
-* iterates on the tree and builds an array to work in parallel on this array
-*
-* Of course this class does not deallocate pointer given in arguements.
-*
-* Threaded & based on the inspector-executor model
-* schedule(runtime) export OMP_NUM_THREADS=2
-* export OMPI_CXX=`which g++-4.4`
-* mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp
-* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
-* ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
-*/
+ * @author Berenger Bramas (berenger.bramas@inria.fr)
+ * @class FFmmAlgorithmThreadProc
+ * @brief
+ * Please read the license
+ *
+ * This class is a threaded FMM algorithm with mpi.
+ * It just iterates on a tree and call the kernels with good arguments.
+ * It used the inspector-executor model :
+ * iterates on the tree and builds an array to work in parallel on this array
+ *
+ * Of course this class does not deallocate pointer given in arguements.
+ *
+ * Threaded & based on the inspector-executor model
+ * schedule(runtime) export OMP_NUM_THREADS=2
+ * export OMPI_CXX=`which g++-4.4`
+ * mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp
+ * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
+ * ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
+ */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmThreadProc : protected FAssertable , public FAbstractAlgorithm {
+class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
+
+  static const int MaxSizePerCell = 1024;
+  
+  OctreeClass* const tree;                 //< The octree to work on
+  KernelClass** kernels;                   //< The kernels
+  
+  const FMpi::FComm& comm;                 //< MPI comm
+  
+  typename OctreeClass::Iterator* iterArray;  //
+  int numberOfLeafs;                          //< To store the size at the previous level
+
+  const int MaxThreads;               //< the max number of thread allowed by openmp
+  
+  const int nbProcess;                //< Number of process
+  const int idProcess;                //< Id of current process
+  
+  const int OctreeHeight;            //<Height of the tree
+  
+  /** An interval is the morton index interval
+   * that a proc use (it holds data in this interval)
+   */
+  struct Interval{
+    MortonIndex min;
+    MortonIndex max;
+  };
+  /** My interval */
+  Interval*const intervals;
+  /** All process intervals */
+  Interval*const workingIntervalsPerLevel;
+
+  /** Get an interval from proc id and level */
+  Interval& getWorkingInterval( int level,  int proc){
+    return workingIntervalsPerLevel[OctreeHeight * proc + level];
+  }
 
-    static const int MaxSizePerCell = 1024;
 
-    OctreeClass* const tree;                 //< The octree to work on
-    KernelClass** kernels;                   //< The kernels
+public:
+  /** Get current proc interval at level */
+  Interval& getWorkingInterval( int level){
+    return getWorkingInterval(level, idProcess);
+  }
+
+  /** Does the current proc has some work at this level */
+  bool hasWorkAtLevel( int level){
+    return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).max) < (getWorkingInterval(level, idProcess).max);
+  }
+
+  /** The constructor need the octree and the kernels used for computation
+   * @param inTree the octree to work on
+   * @param inKernels the kernels to call
+   * An assert is launched if one of the arguments is null
+   */
+  FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels)
+    : tree(inTree) , kernels(0), comm(inComm), iterArray(nullptr),numberOfLeafs(0),
+      MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
+      OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
+      workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]){
+
+    FAssertLF(tree, "tree cannot be null");
+
+    this->kernels = new KernelClass*[MaxThreads];
+    for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
+      this->kernels[idxThread] = new KernelClass(*inKernels);
+    }
 
-    const FMpi::FComm& comm;                 //< MPI comm
+    FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
+    FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
+  }
+  /** Default destructor */
+  virtual ~FFmmAlgorithmThreadProc(){
+    for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
+      delete this->kernels[idxThread];
+    }
+    delete [] this->kernels;
+
+    delete [] intervals;
+    delete [] workingIntervalsPerLevel;
+  }
+
+  /**
+   * To execute the fmm algorithm
+   * Call this function to run the complete algorithm
+   */
+  void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
+    FTRACE( FTrace::FFunction functionTrace( __FUNCTION__, "Fmm" , __FILE__ , __LINE__ ) );
+
+    // Count leaf
+    this->numberOfLeafs = 0;
+    {
+      FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
+
+      Interval myLastInterval;
+      {
+	typename OctreeClass::Iterator octreeIterator(tree);
+	octreeIterator.gotoBottomLeft();
+	myLastInterval.min = octreeIterator.getCurrentGlobalIndex();
+	do{
+	  ++this->numberOfLeafs;
+	} while(octreeIterator.moveRight());
+	myLastInterval.max = octreeIterator.getCurrentGlobalIndex();
+      }
+      iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
+      FAssertLF(iterArray, "iterArray bad alloc");
+
+      // We get the min/max indexes from each procs
+      FMpi::MpiAssert( MPI_Allgather( &myLastInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()),  __LINE__ );
+
+      Interval*const myIntervals = new Interval[OctreeHeight];
+      myIntervals[OctreeHeight - 1] = myLastInterval;
+      for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
+	myIntervals[idxLevel].min = myIntervals[idxLevel+1].min >> 3;
+	myIntervals[idxLevel].max = myIntervals[idxLevel+1].max >> 3;
+      }
+      if(idProcess != 0){
+	typename OctreeClass::Iterator octreeIterator(tree);
+	octreeIterator.gotoBottomLeft();
+	octreeIterator.moveUp();
+
+	//Da fck is dat ?!
+	MortonIndex currentLimit = intervals[idProcess-1].max >> 3;
+
+	for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 ; --idxLevel){
+	  while(octreeIterator.getCurrentGlobalIndex() <= currentLimit){
+	    if( !octreeIterator.moveRight() ) break; 
+	  }
+	  myIntervals[idxLevel].min = octreeIterator.getCurrentGlobalIndex();
+	  octreeIterator.moveUp();
+	  currentLimit >>= 3;
+	}
+      }
+
+      // We get the min/max indexes from each procs
+      FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
+				      workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()),  __LINE__ );
+      delete[] myIntervals;
+    }
 
-    typename OctreeClass::Iterator* iterArray;
-    int numberOfLeafs;                          //< To store the size at the previous level
+    // run;
+    if(operationsToProceed & FFmmP2M) bottomPass();
 
-    const int MaxThreads;               //< the max number of thread allowed by openmp
+    if(operationsToProceed & FFmmM2M) upwardPass();
 
-    const int nbProcess;                //< Number of process
-    const int idProcess;                //< Id of current process
+    if(operationsToProceed & FFmmM2L) transferPass();
 
-    const int OctreeHeight;
+    if(operationsToProceed & FFmmL2L) downardPass();
 
-    /** An interval is the morton index interval
-      * that a proc use (it holds data in this interval)
-      */
-    struct Interval{
-        MortonIndex min;
-        MortonIndex max;
-    };
-    /** My interval */
-    Interval*const intervals;
-    /** All process intervals */
-    Interval*const workingIntervalsPerLevel;
+    if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass();
+
+    // delete array
+    delete [] iterArray;
+    iterArray = 0;
+  }
+
+private:
 
-    /** Get an interval from proc id and level */
-    Interval& getWorkingInterval( int level,  int proc){
-        return workingIntervalsPerLevel[OctreeHeight * proc + level];
+  /////////////////////////////////////////////////////////////////////////////
+  // P2M
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** P2M Bottom Pass */
+  void bottomPass(){
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
+    FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
+    FLOG(FTic counterTime);
+
+    typename OctreeClass::Iterator octreeIterator(tree);
+
+    // Iterate on leafs
+    octreeIterator.gotoBottomLeft();
+    int leafs = 0;
+    do{
+      iterArray[leafs++] = octreeIterator;
+    } while(octreeIterator.moveRight());
+
+    FLOG(FTic computationCounter);
+#pragma omp parallel
+    {
+      KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
+#pragma omp for nowait
+      for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
+	myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
+      }
+    }
+    FLOG(computationCounter.tac());
+
+    FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
+    FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
+   
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Upward
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** M2M */
+  void upwardPass(){
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
+    FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
+    FLOG(FTic counterTime);
+    FLOG(FTic computationCounter);
+    FLOG(FTic prepareCounter);
+    FLOG(FTic waitCounter);
+
+    // Start from leal level - 1
+    typename OctreeClass::Iterator octreeIterator(tree);
+    octreeIterator.gotoBottomLeft();
+    octreeIterator.moveUp();
+    typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+
+    // This variable is the proc responsible
+    // of the shared cells
+    int sendToProc = idProcess;
+
+    // There are a maximum of 8-1 sends and 8-1 receptions
+    MPI_Request requests[14];
+    MPI_Status status[14];
+
+    // Maximum data per message is:
+    FMpiBufferWriter sendBuffer(comm.getComm(),7*MaxSizePerCell);
+    const int recvBufferOffset = (8 * MaxSizePerCell + 1);
+    FMpiBufferReader recvBuffer(comm.getComm(), nbProcess*recvBufferOffset);
+    CellClass recvBufferCells[8];
+    
+    int firstProcThatSend = idProcess + 1;
+
+    // for each levels
+    for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
+      // No more work for me
+      if(idProcess != 0
+	 && getWorkingInterval((idxLevel+1), idProcess).max <= getWorkingInterval((idxLevel+1), idProcess - 1).max){
+	break;
+      }
+
+      // copy cells to work with
+      int numberOfCells = 0;
+      // for each cells
+      do{
+	iterArray[numberOfCells++] = octreeIterator;
+      } while(octreeIterator.moveRight());
+      avoidGotoLeftIterator.moveUp();
+      octreeIterator = avoidGotoLeftIterator;
+
+      // We may need to send something
+      int iterRequests = 0;
+      int cellsToSend = -1;
+
+      while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).min){
+	++cellsToSend;
+      }
+
+      FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
+
+      FLOG(prepareCounter.tic());
+      if(idProcess != 0
+	 && (getWorkingInterval((idxLevel+1), idProcess).min >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).max >>3)){
+	
+	char state = 0;
+	sendBuffer.write(state);
+	
+	const CellClass* const* const child = iterArray[cellsToSend].getCurrentChild();
+	for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
+	  if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).min <= child[idxChild]->getMortonIndex() ){
+	    child[idxChild]->serializeUp(sendBuffer);
+	    state = char(state | (0x1 << idxChild));
+	  }
+	}
+	sendBuffer.writeAt(0,state);
+	
+	while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() == getWorkingInterval(idxLevel , sendToProc - 1).max){
+	  --sendToProc;
+	}
+
+	MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, sendToProc, 
+		  FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
+      }
+
+      // We may need to receive something
+      bool hasToReceive = false;
+      int endProcThatSend = firstProcThatSend;
+
+      if(idProcess != nbProcess - 1){ // if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
+	while(firstProcThatSend < nbProcess
+	      && (getWorkingInterval((idxLevel+1), firstProcThatSend).max) < (getWorkingInterval((idxLevel+1), idProcess).max)){
+	  // Second condition :: while firstProcThatSend max morton index is < to myself max interval
+	  ++firstProcThatSend;
+	}
+
+	if(firstProcThatSend < nbProcess &&
+	   (getWorkingInterval((idxLevel+1), firstProcThatSend).min >>3) == (getWorkingInterval((idxLevel+1) , idProcess).max>>3) ){
+
+	  endProcThatSend = firstProcThatSend;
+
+	  while( endProcThatSend < nbProcess &&
+		 (getWorkingInterval((idxLevel+1) ,endProcThatSend).min >>3) == (getWorkingInterval((idxLevel+1) , idProcess).max>>3)){
+	    ++endProcThatSend;
+	  }
+
+
+	  if(firstProcThatSend != endProcThatSend){
+	    hasToReceive = true;
+
+	    for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
+	      MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_PACKED,
+			idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
+	    }
+	  }
+	}
+      }
+      FLOG(prepareCounter.tac());
+      FTRACE( regionTrace.end() );
+
+      // Compute
+      const int endIndex = (hasToReceive?numberOfCells-1:numberOfCells);
+      FLOG(computationCounter.tic());
+#pragma omp parallel
+      {
+	KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
+#pragma omp for nowait
+	for( int idxCell = cellsToSend + 1 ; idxCell < endIndex ; ++idxCell){
+	  myThreadkernels.M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
+	}
+      }
+      FLOG(computationCounter.tac());
+
+      // Are we sending or waiting anything?
+      if(iterRequests){
+	FLOG(waitCounter.tic());
+	MPI_Waitall( iterRequests, requests, status);
+	FLOG(waitCounter.tac());
+
+	// we were receiving data
+	if( hasToReceive ){
+	  CellClass* currentChild[8];
+	  memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
+
+	  // retreive data and merge my child and the child from others
+	  for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
+	    recvBuffer.seek(idxProc * recvBufferOffset);
+	    int state = int(recvBuffer.getValue<char>());
+	    
+	    int position = 0;
+	    while( state && position < 8){
+	      while(!(state & 0x1)){
+		state >>= 1;
+		++position;
+	      }
+
+          FAssertLF(!currentChild[position], "Already has a cell here");
+	      
+	      recvBufferCells[position].deserializeUp(recvBuffer);
+	      currentChild[position] = (CellClass*) &recvBufferCells[position];
+
+	      state >>= 1;
+	      ++position;
+	    }
+	  }
+
+	  // Finally compute
+	  FLOG(computationCounter.tic());
+	  (*kernels[0]).M2M( iterArray[numberOfCells - 1].getCurrentCell() , currentChild, idxLevel);
+	  FLOG(computationCounter.tac());
+
+
+	  firstProcThatSend = endProcThatSend - 1;
+	}
+      }
+      sendBuffer.reset();
+      recvBuffer.seek(0);
     }
 
 
-public:
+    FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
+    FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.cumulated() << " s\n" );
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Downard
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** M2L  */
+  void transferPass(){
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
+
+    FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
+    FLOG(FTic counterTime);
+    FLOG(FTic computationCounter);
+    FLOG(FTic sendCounter);
+    FLOG(FTic receiveCounter);
+    FLOG(FTic prepareCounter);
+    FLOG(FTic gatherCounter);
+
+    //////////////////////////////////////////////////////////////////
+    // First know what to send to who
+    //////////////////////////////////////////////////////////////////
 
-    /** Get current proc interval at level */
-    Interval& getWorkingInterval( int level){
-        return getWorkingInterval(level, idProcess);
+    // pointer to send
+    FVector<typename OctreeClass::Iterator> toSend[nbProcess * OctreeHeight];
+    // index
+    int*const indexToSend = new int[nbProcess * OctreeHeight];
+    memset(indexToSend, 0, sizeof(int) * nbProcess * OctreeHeight);
+    // To know which one has need someone
+    FBoolArray** const leafsNeedOther = new FBoolArray*[OctreeHeight];
+    memset(leafsNeedOther, 0, sizeof(FBoolArray*) * OctreeHeight);
+
+    {
+      FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
+      FLOG(prepareCounter.tic());
+
+      // To know if a leaf has been already sent to a proc
+      bool*const alreadySent = new bool[nbProcess];
+      memset(alreadySent, 0, sizeof(bool) * nbProcess);
+
+      typename OctreeClass::Iterator octreeIterator(tree);
+      octreeIterator.moveDown();
+      typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+      // for each levels
+      for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+	if(idProcess != 0
+	   && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
+	  avoidGotoLeftIterator.moveDown();
+	  octreeIterator = avoidGotoLeftIterator;
+
+	  continue;
+	}
+
+	int numberOfCells = 0;
+
+	while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
+	  octreeIterator.moveRight();
+	}
+
+	// for each cells
+	do{
+	  iterArray[numberOfCells] = octreeIterator;
+	  ++numberOfCells;
+	} while(octreeIterator.moveRight());
+	avoidGotoLeftIterator.moveDown();
+	octreeIterator = avoidGotoLeftIterator;
+
+	leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells);
+
+
+	// Which cell potentialy needs other data and in the same time
+	// are potentialy needed by other
+	MortonIndex neighborsIndexes[189];
+	for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
+	  // Find the M2L neigbors of a cell
+	  const int counter = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndexes);
+
+	  memset(alreadySent, false, sizeof(bool) * nbProcess);
+	  bool needOther = false;
+	  // Test each negibors to know which one do not belong to us
+	  for(int idxNeigh = 0 ; idxNeigh < counter ; ++idxNeigh){
+	    if(neighborsIndexes[idxNeigh] < getWorkingInterval(idxLevel , idProcess).min
+	       || (getWorkingInterval(idxLevel , idProcess).max) < neighborsIndexes[idxNeigh]){
+	      int procToReceive = idProcess;
+	      while( 0 != procToReceive && neighborsIndexes[idxNeigh] < getWorkingInterval(idxLevel , procToReceive).min ){
+		--procToReceive;
+	      }
+	      while( procToReceive != nbProcess -1 && (getWorkingInterval(idxLevel , procToReceive).max) < neighborsIndexes[idxNeigh]){
+		++procToReceive;
+	      }
+	      // Maybe already sent to that proc?
+	      if( !alreadySent[procToReceive]
+		  && getWorkingInterval(idxLevel , procToReceive).min <= neighborsIndexes[idxNeigh]
+		  && neighborsIndexes[idxNeigh] <= getWorkingInterval(idxLevel , procToReceive).max){
+
+		alreadySent[procToReceive] = true;
+
+		needOther = true;
+
+		toSend[idxLevel * nbProcess + procToReceive].push(iterArray[idxCell]);
+		++indexToSend[idxLevel * nbProcess + procToReceive];
+	      }
+	    }
+	  }
+	  if(needOther){
+	    leafsNeedOther[idxLevel]->set(idxCell,true);
+	  }
+
+	}
+
+      }
+      FLOG(prepareCounter.tac());
+
+      delete[] alreadySent;
     }
 
-    /** Does the current proc has some work at this level */
-    bool hasWorkAtLevel( int level){
-        return idProcess == 0 || (getWorkingInterval(level, idProcess - 1).max) < (getWorkingInterval(level, idProcess).max);
+    //////////////////////////////////////////////////////////////////
+    // Gather this information
+    //////////////////////////////////////////////////////////////////
+
+    FLOG(gatherCounter.tic());
+    // All process say to each others
+    // what the will send to who
+    int*const globalReceiveMap = new int[nbProcess * nbProcess * OctreeHeight];
+    memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess * OctreeHeight);
+    FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_INT, comm.getComm()),  __LINE__ );
+    FLOG(gatherCounter.tac());
+
+
+    //////////////////////////////////////////////////////////////////
+    // Send and receive for real
+    //////////////////////////////////////////////////////////////////
+
+    FLOG(sendCounter.tic());
+    // Then they can send and receive (because they know what they will receive)
+    // To send in asynchrone way
+    MPI_Request*const requests = new MPI_Request[2 * nbProcess * OctreeHeight];
+    MPI_Status*const status = new MPI_Status[2 * nbProcess * OctreeHeight];
+    int iterRequest = 0;
+
+    const int SizeOfCellToSend = sizeof(MortonIndex) + sizeof(int) + MaxSizePerCell;
+
+    FMpiBufferWriter**const sendBuffer = new FMpiBufferWriter*[nbProcess * OctreeHeight];
+    memset(sendBuffer, 0, sizeof(FMpiBufferWriter*) * nbProcess * OctreeHeight);
+
+    FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight];
+    memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight);
+
+
+    for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+      for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+	const int toSendAtProcAtLevel = indexToSend[idxLevel * nbProcess + idxProc];
+	if(toSendAtProcAtLevel != 0){
+	  sendBuffer[idxLevel * nbProcess + idxProc] = new FMpiBufferWriter(comm.getComm(),toSendAtProcAtLevel * SizeOfCellToSend);
+
+	  for(int idxLeaf = 0 ; idxLeaf < toSendAtProcAtLevel; ++idxLeaf){
+	    const MortonIndex cellIndex = toSend[idxLevel * nbProcess + idxProc][idxLeaf].getCurrentGlobalIndex();
+	    sendBuffer[idxLevel * nbProcess + idxProc]->write(cellIndex);
+	    toSend[idxLevel * nbProcess + idxProc][idxLeaf].getCurrentCell()->serializeUp(*sendBuffer[idxLevel * nbProcess + idxProc]);
+	  }
+	  
+	  FMpi::MpiAssert( MPI_Isend( sendBuffer[idxLevel * nbProcess + idxProc]->data(), 
+				      sendBuffer[idxLevel * nbProcess + idxProc]->getSize(),MPI_PACKED, idxProc, 
+				      FMpi::TagLast + idxLevel, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
+	}
+
+	const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
+	if(toReceiveFromProcAtLevel){
+	  recvBuffer[idxLevel * nbProcess + idxProc] = new FMpiBufferReader(comm.getComm(),toReceiveFromProcAtLevel * SizeOfCellToSend);
+
+	  FMpi::MpiAssert( MPI_Irecv(recvBuffer[idxLevel * nbProcess + idxProc]->data(),
+				     recvBuffer[idxLevel * nbProcess + idxProc]->getCapacity(), MPI_PACKED,idxProc, 
+				     FMpi::TagLast + idxLevel, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
+	}
+      }
     }
+    FLOG(sendCounter.tac());
 
-    /** The constructor need the octree and the kernels used for computation
-      * @param inTree the octree to work on
-      * @param inKernels the kernels to call
-      * An assert is launched if one of the arguments is null
-      */
-    FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels)
-        : tree(inTree) , kernels(0), comm(inComm), iterArray(nullptr),numberOfLeafs(0),
-          MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()),
-          OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
-          workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]){
-
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-
-        this->kernels = new KernelClass*[MaxThreads];
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-            this->kernels[idxThread] = new KernelClass(*inKernels);
-        }
-
-        FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
-        FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
+    //////////////////////////////////////////////////////////////////
+    // Do M2L
+    //////////////////////////////////////////////////////////////////
+
+    {
+      FTRACE( FTrace::FRegion regionTrace("Compute", __FUNCTION__ , __FILE__ , __LINE__) );
+      typename OctreeClass::Iterator octreeIterator(tree);
+      octreeIterator.moveDown();
+      typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+      // Now we can compute all the data
+      // for each levels
+      for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+	if(idProcess != 0
+	   && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
+
+	  avoidGotoLeftIterator.moveDown();
+	  octreeIterator = avoidGotoLeftIterator;
+
+	  continue;
+	}
+
+	int numberOfCells = 0;
+	while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
+	  octreeIterator.moveRight();
+	}
+	// for each cells
+	do{
+	  iterArray[numberOfCells] = octreeIterator;
+	  ++numberOfCells;
+	} while(octreeIterator.moveRight());
+	avoidGotoLeftIterator.moveDown();
+	octreeIterator = avoidGotoLeftIterator;
+
+	FLOG(computationCounter.tic());
+#pragma omp parallel
+	{
+	  KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
+	  const CellClass* neighbors[343];
+
+#pragma omp for  schedule(dynamic) nowait
+	  for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
+	    const int counter = tree->getInteractionNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel);
+	    if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
+	  }
+
+	  myThreadkernels->finishedLevelM2L(idxLevel);
+	}
+	FLOG(computationCounter.tac());
+      }
     }
-    /** Default destructor */
-    virtual ~FFmmAlgorithmThreadProc(){
-        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
-            delete this->kernels[idxThread];
-        }
-        delete [] this->kernels;
-
-        delete [] intervals;
-        delete [] workingIntervalsPerLevel;
+
+    //////////////////////////////////////////////////////////////////
+    // Wait received data and compute
+    //////////////////////////////////////////////////////////////////
+
+    // Wait to receive every things (and send every things)
+    MPI_Waitall(iterRequest, requests, status);
+
+    {
+      FTRACE( FTrace::FRegion regionTrace("Compute Received data", __FUNCTION__ , __FILE__ , __LINE__) );
+      FLOG(receiveCounter.tic());
+      typename OctreeClass::Iterator octreeIterator(tree);
+      octreeIterator.moveDown();
+      typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+      // compute the second time
+      // for each levels
+      for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+	if(idProcess != 0
+	   && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
+
+	  avoidGotoLeftIterator.moveDown();
+	  octreeIterator = avoidGotoLeftIterator;
+
+	  continue;
+	}
+
+	// put the received data into a temporary tree
+	FLightOctree<CellClass> tempTree;
+	for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+	  const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
+
+	  for(int idxCell = 0 ; idxCell < toReceiveFromProcAtLevel ; ++idxCell){
+	    const MortonIndex cellIndex = recvBuffer[idxLevel * nbProcess + idxProc]->FMpiBufferReader::getValue<MortonIndex>();
+
+	    CellClass* const newCell = new CellClass;
+	    newCell->setMortonIndex(cellIndex);
+	    newCell->deserializeUp(*recvBuffer[idxLevel * nbProcess + idxProc]);
+	    
+	    tempTree.insertCell(cellIndex, idxLevel, newCell);
+	  }
+	}
+
+
+	// take cells from our octree only if they are
+	// linked to received data
+	int numberOfCells = 0;
+	int realCellId = 0;
+
+	while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
+	  octreeIterator.moveRight();
+	}
+	// for each cells
+	do{
+	  // copy cells that need data from others
+	  if(leafsNeedOther[idxLevel]->get(realCellId++)){
+	    iterArray[numberOfCells++] = octreeIterator;
+	  }
+	} while(octreeIterator.moveRight());
+	avoidGotoLeftIterator.moveDown();
+	octreeIterator = avoidGotoLeftIterator;
+
+	delete leafsNeedOther[idxLevel];
+	leafsNeedOther[idxLevel] = 0;
+
+	// Compute this cells
+	FLOG(computationCounter.tic());
+#pragma omp parallel
+	{
+	  KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
+	  MortonIndex neighborsIndex[189];
+	  int neighborsPosition[189];
+	  const CellClass* neighbors[343];
+
+#pragma omp for schedule(dynamic) nowait
+	  for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
+	    // compute indexes
+	    memset(neighbors, 0, 343 * sizeof(CellClass*));
+	    const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition);
+
+	    int counter = 0;
+	    // does we receive this index from someone?
+	    for(int idxNeig = 0 ;idxNeig < counterNeighbors ; ++idxNeig){
+	      if(neighborsIndex[idxNeig] < (getWorkingInterval(idxLevel , idProcess).min)
+		 || (getWorkingInterval(idxLevel , idProcess).max) < neighborsIndex[idxNeig]){
+
+		CellClass*const otherCell = tempTree.getCell(neighborsIndex[idxNeig], idxLevel);
+
+		if(otherCell){
+		  //otherCell->setMortonIndex(neighborsIndex[idxNeig]);
+		  neighbors[ neighborsPosition[idxNeig] ] = otherCell;
+		  ++counter;
+		}
+	      }
+	    }
+	    // need to compute
+	    if(counter){
+	      myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
+	    }
+	  }
+
+	  myThreadkernels->finishedLevelM2L(idxLevel);
+	}
+	FLOG(computationCounter.tac());
+      }
+      FLOG(receiveCounter.tac());
     }
 
-    /**
-      * To execute the fmm algorithm
-      * Call this function to run the complete algorithm
-      */
-    void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
-        FTRACE( FTrace::FFunction functionTrace( __FUNCTION__, "Fmm" , __FILE__ , __LINE__ ) );
-
-        // Count leaf
-        this->numberOfLeafs = 0;
-        {
-            FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
-
-            Interval myLastInterval;
-            {
-                typename OctreeClass::Iterator octreeIterator(tree);
-                octreeIterator.gotoBottomLeft();
-                myLastInterval.min = octreeIterator.getCurrentGlobalIndex();
-                do{
-                    ++this->numberOfLeafs;
-                } while(octreeIterator.moveRight());
-                myLastInterval.max = octreeIterator.getCurrentGlobalIndex();
-            }
-            iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
-            fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
-
-            // We get the min/max indexes from each procs
-            FMpi::MpiAssert( MPI_Allgather( &myLastInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()),  __LINE__ );
-
-            Interval*const myIntervals = new Interval[OctreeHeight];
-            myIntervals[OctreeHeight - 1] = myLastInterval;
-            for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
-                myIntervals[idxLevel].min = myIntervals[idxLevel+1].min >> 3;
-                myIntervals[idxLevel].max = myIntervals[idxLevel+1].max >> 3;
-            }
-            if(idProcess != 0){
-                typename OctreeClass::Iterator octreeIterator(tree);
-                octreeIterator.gotoBottomLeft();
-                octreeIterator.moveUp();
-
-                MortonIndex currentLimit = intervals[idProcess-1].max >> 3;
-
-                for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 ; --idxLevel){
-                    while(octreeIterator.getCurrentGlobalIndex() <= currentLimit){
-                        if( !octreeIterator.moveRight() ) break;
-                    }
-                    myIntervals[idxLevel].min = octreeIterator.getCurrentGlobalIndex();
-                    octreeIterator.moveUp();
-                    currentLimit >>= 3;
-                }
-            }
-
-            // We get the min/max indexes from each procs
-            FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
-                                            workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()),  __LINE__ );
-            delete[] myIntervals;
-        }
-
-        // run;
-        if(operationsToProceed & FFmmP2M) bottomPass();
-
-        if(operationsToProceed & FFmmM2M) upwardPass();
-
-        if(operationsToProceed & FFmmM2L) transferPass();
-
-        if(operationsToProceed & FFmmL2L) downardPass();
-
-        if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass();
-
-        // delete array
-        delete [] iterArray;
-        iterArray = 0;
+    for(int idxComm = 0 ; idxComm < nbProcess * OctreeHeight; ++idxComm){
+      delete sendBuffer[idxComm];
+      delete recvBuffer[idxComm];
+    }
+    for(int idxComm = 0 ; idxComm < OctreeHeight; ++idxComm){
+      delete leafsNeedOther[idxComm];
+    }
+    delete[] sendBuffer;
+    delete[] recvBuffer;
+    delete[] indexToSend;
+    delete[] leafsNeedOther;
+    delete[] globalReceiveMap;
+    delete[] requests;
+    delete[] status;
+
+    FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
+    FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Gather : " << gatherCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
+  }
+
+  //////////////////////////////////////////////////////////////////
+  // ---------------- L2L ---------------
+  //////////////////////////////////////////////////////////////////
+
+  void downardPass(){ // second L2L
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
+    FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); );
+    FLOG(FTic counterTime);
+    FLOG(FTic computationCounter);
+    FLOG(FTic prepareCounter);
+    FLOG(FTic waitCounter);
+
+    // Start from leal level - 1
+    typename OctreeClass::Iterator octreeIterator(tree);
+    octreeIterator.moveDown();
+    typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+
+    MPI_Request*const requests = new MPI_Request[nbProcess];
+    MPI_Status*const status = new MPI_Status[nbProcess];
+
+    const int heightMinusOne = OctreeHeight - 1;
+
+    FMpiBufferWriter sendBuffer(comm.getComm());
+    FMpiBufferReader recvBuffer(comm.getComm(),MaxSizePerCell);
+
+    // for each levels exepted leaf level
+    for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
+      if(idProcess != 0
+	 && getWorkingInterval((idxLevel+1) , idProcess).max <= getWorkingInterval((idxLevel+1) , idProcess - 1).max){
+
+	avoidGotoLeftIterator.moveDown();
+	octreeIterator = avoidGotoLeftIterator;
+
+	continue;
+      }
+
+      // copy cells to work with
+      int numberOfCells = 0;
+      // for each cells
+      do{
+	iterArray[numberOfCells++] = octreeIterator;
+      } while(octreeIterator.moveRight());
+      avoidGotoLeftIterator.moveDown();
+      octreeIterator = avoidGotoLeftIterator;
+
+      int firstCellWork = -1;
+      while(iterArray[firstCellWork+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel , idProcess).min){
+	++firstCellWork;
+      }
+
+      bool needToRecv = false;
+      int iterRequests = 0;
+
+      FLOG(prepareCounter.tic());
+
+      // do we need to receive one or zeros cell
+      if(idProcess != 0
+	 && (getWorkingInterval((idxLevel + 1) , idProcess).min >> 3 ) <= (getWorkingInterval((idxLevel+1) , idProcess - 1).max >> 3 ) ){
+	needToRecv = true;
+
+
+	MPI_Irecv( recvBuffer.data(), recvBuffer.getCapacity(), MPI_PACKED, MPI_ANY_SOURCE,
+		   FMpi::TagFmmL2L, comm.getComm(), &requests[iterRequests++]);
+      }
+
+
+      if(idProcess != nbProcess - 1){
+	int firstProcThatRecv = idProcess + 1;
+	while( firstProcThatRecv < nbProcess &&
+	       getWorkingInterval((idxLevel + 1) , firstProcThatRecv).max <= getWorkingInterval((idxLevel+1) , idProcess).max){
+	  ++firstProcThatRecv;
+	}
+
+	int endProcThatRecv = firstProcThatRecv;
+	while( endProcThatRecv < nbProcess &&
+	       (getWorkingInterval((idxLevel + 1) , endProcThatRecv).min >> 3) <= (getWorkingInterval((idxLevel+1) , idProcess).max >> 3) ){
+	  ++endProcThatRecv;
+	}
+
+	if(firstProcThatRecv != endProcThatRecv){
+	  iterArray[numberOfCells - 1].getCurrentCell()->serializeDown(sendBuffer);
+
+	  for(int idxProc = firstProcThatRecv ; idxProc < endProcThatRecv ; ++idxProc ){
+
+	    MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, idxProc,
+		      FMpi::TagFmmL2L, comm.getComm(), &requests[iterRequests++]);
+	  }
+
+	}
+      }
+      FLOG(prepareCounter.tac());
+
+      FLOG(computationCounter.tic());
+#pragma omp parallel
+      {
+	KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
+#pragma omp for nowait
+	for(int idxCell = firstCellWork + 1 ; idxCell < numberOfCells ; ++idxCell){
+	  myThreadkernels.L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
+	}
+      }
+      FLOG(computationCounter.tac());
+
+      // are we sending or receiving?
+      if(iterRequests){
+
+	// process
+	FLOG(waitCounter.tic());
+	MPI_Waitall( iterRequests, requests, status);
+	FLOG(waitCounter.tac());
+
+	if(needToRecv){
+	  // Need to compute
+	  FLOG(computationCounter.tic());
+	  iterArray[firstCellWork].getCurrentCell()->deserializeDown(recvBuffer);
+
+	  kernels[0]->L2L( iterArray[firstCellWork].getCurrentCell() , iterArray[firstCellWork].getCurrentChild(), idxLevel);
+	  FLOG(computationCounter.tac());
+	}
+      }
+
+      sendBuffer.reset();
+      recvBuffer.seek(0);
     }
 
-private:
+    delete[] requests;
+    delete[] status;
+
+    FLOG( FLog::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
+    FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.cumulated() << " s\n" );
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Direct
+  /////////////////////////////////////////////////////////////////////////////
+  struct LeafData{
+    FTreeCoordinate coord;
+    CellClass* cell;
+    ContainerClass* targets;
+    ContainerClass* sources;
+  };
+  /** P2P */
+  void directPass(){
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
+    FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); );
+    FLOG( FTic counterTime);
+    FLOG( FTic prepareCounter);
+    FLOG( FTic gatherCounter);
+    FLOG( FTic waitCounter);
+
+    ///////////////////////////////////////////////////
+    // Prepare data to send receive
+    ///////////////////////////////////////////////////
+    FLOG(prepareCounter.tic());
+
+    // To send in asynchrone way
+    MPI_Request requests[2 * nbProcess];
+    MPI_Status status[2 * nbProcess];
+    int iterRequest = 0;
+    int nbMessagesToRecv = 0;
+
+    FMpiBufferWriter**const sendBuffer = new FMpiBufferWriter*[nbProcess];
+    memset(sendBuffer, 0, sizeof(FMpiBufferWriter*) * nbProcess);
+
+    FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess];
+    memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess);
+
+    /* This a nbProcess x nbProcess matrix of integer
+     * let U and V be id of processes :
+     * globalReceiveMap[U*nbProcess + V] == size of information needed by V and own by U
+     */
+    int*const globalReceiveMap = new int[nbProcess * nbProcess];
+    memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess);
+
+    FBoolArray leafsNeedOther(this->numberOfLeafs);
+    int countNeedOther = 0;
+
+    {
+      FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
+      // Copy leafs
+      {
+	typename OctreeClass::Iterator octreeIterator(tree);
+	octreeIterator.gotoBottomLeft();
+	int idxLeaf = 0;
+	do{
+	  this->iterArray[idxLeaf++] = octreeIterator;
+	} while(octreeIterator.moveRight());
+      }
+
+      // Number of cells max
+      //const int limite = 1 << (this->OctreeHeight - 1);
+      // pointer to send
+      FVector<typename OctreeClass::Iterator>*const toSend = new FVector<typename OctreeClass::Iterator>[nbProcess];
+
+      // array that will be send to other processus for them to build the globalReceiveMap
+      int partsToSend[nbProcess];
+      memset(partsToSend, 0, sizeof(int) * nbProcess);
+
+      // To know if a leaf has been already sent to a proc
+      int alreadySent[nbProcess];
+
+      //Will store the indexes of the neighbors of current cell
+      MortonIndex indexesNeighbors[26];
+      //Obviously unused
+      //int uselessIndexArray[26];
+
+      for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){
+	memset(alreadySent, 0, sizeof(int) * nbProcess);
+	bool needOther = false;
+	//Get the neighbors of current cell in indexesNeighbors, and their number in neighCount
+	const int neighCount = (iterArray[idxLeaf].getCurrentGlobalCoordinate()).getNeighborsIndexes(OctreeHeight,indexesNeighbors);
+	//Loop over the neighbor leafs
+	for(int idxNeigh = 0 ; idxNeigh < neighCount ; ++idxNeigh){
+	  //Test if leaf belongs to someone else (false if it's mine)
+	  if(indexesNeighbors[idxNeigh] < (intervals[idProcess].min) || (intervals[idProcess].max) < indexesNeighbors[idxNeigh]){
+	    needOther = true;
+	    
+	    // find the proc that will need current leaf
+	    int procToReceive = idProcess;
+	    while( procToReceive != 0 && indexesNeighbors[idxNeigh] < intervals[procToReceive].min){
+	      --procToReceive; //scroll process "before" current process
+	    }
+	    
+	    while( procToReceive != nbProcess - 1 && (intervals[procToReceive].max) < indexesNeighbors[idxNeigh]){
+	      ++procToReceive;//scroll process "after" current process
+	    }
+	    //  Test : Not Already Send && USELESS TEST ?
+	    if( !alreadySent[procToReceive] && intervals[procToReceive].min <= indexesNeighbors[idxNeigh] && indexesNeighbors[idxNeigh] <= intervals[procToReceive].max){
+
+	      alreadySent[procToReceive] = 1;
+	      toSend[procToReceive].push( iterArray[idxLeaf] );
+	      partsToSend[procToReceive] += iterArray[idxLeaf].getCurrentListSrc()->getSavedSize();
+	      partsToSend[procToReceive] += int(sizeof(MortonIndex));
+	    }
+	  }
+	}
+
+	if(needOther){ //means that something need to be sent (or received)
+	  leafsNeedOther.set(idxLeaf,true);
+	  ++countNeedOther;
+	}
+      }
+
+      // No idea why it is mandatory there, could it be a few line before, 
+      for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+	if(partsToSend[idxProc]){
+	  partsToSend[idxProc] += int(sizeof(int));
+	}
+      }
+      
+      //Share to all processus globalReceiveMap
+      FLOG(gatherCounter.tic());
+      FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, comm.getComm()),  __LINE__ );
+      FLOG(gatherCounter.tac());
+      
+      {//TODO : remove 
+	//Print the globalReceiveMap for Process 0
+	// if(idProcess == 0)
+	//   {
+	//     printf("\n Proc 0 :: \n");
+	//     for(int u = 0 ; u < nbProcess ; ++u){
+	//       for(int v = 0 ; v < nbProcess ; ++v){
+	// 	printf("\t %d",globalReceiveMap[u*nbProcess+v]);
+	//       }
+	//       printf("\n");
+	//     }
+	//   }
+      }
+      
+
+      //Prepare receive
+      for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+      	if(globalReceiveMap[idxProc * nbProcess + idProcess]){ //if idxProc has sth for me.
+      	  //allocate buffer of right size
+      	  recvBuffer[idxProc] = new FMpiBufferReader(comm.getComm(),globalReceiveMap[idxProc * nbProcess + idProcess]);
+      	  FMpi::MpiAssert( MPI_Irecv(recvBuffer[idxProc]->data(), recvBuffer[idxProc]->getCapacity(), MPI_PACKED,
+      				     idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
+      	}
+      }
+      
+      nbMessagesToRecv = iterRequest;
+      // Prepare send
+      for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+	if(toSend[idxProc].getSize() != 0){
+	  sendBuffer[idxProc] = new FMpiBufferWriter(comm.getComm(),globalReceiveMap[idProcess*nbProcess+idxProc]); 
+	  // << is equivalent to write().
+	  (*sendBuffer[idxProc]) << toSend[idxProc].getSize();
+	  for(int idxLeaf = 0 ; idxLeaf < toSend[idxProc].getSize() ; ++idxLeaf){
+	    (*sendBuffer[idxProc]) << toSend[idxProc][idxLeaf].getCurrentGlobalIndex();
+	    toSend[idxProc][idxLeaf].getCurrentListSrc()->save(*sendBuffer[idxProc]);
+	  }
+	  
+	  //TEST BERENGER
+	  //if(sendBuffer[idxProc]->getSize() != partsToSend[idxProc]){
+	  FMpi::MpiAssert( MPI_Isend( sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize() , MPI_PACKED ,
+				      idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
+	  
+	}
+      }
+      
+      delete[] toSend;
+    }
+    FLOG(prepareCounter.tac());
 
-    /////////////////////////////////////////////////////////////////////////////
-    // P2M
-    /////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////
+    // Prepare data for thread P2P
+    ///////////////////////////////////////////////////
 
-    /** P2M Bottom Pass */
-    void bottomPass(){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
-        FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
-        FLOG(FTic counterTime);
+    // init
+    const int LeafIndex = OctreeHeight - 1;
+    const int SizeShape = 3*3*3;
 
-        typename OctreeClass::Iterator octreeIterator(tree);
+    int shapeLeaf[SizeShape];
+    memset(shapeLeaf,0,SizeShape*sizeof(int));
 
-        // Iterate on leafs
-        octreeIterator.gotoBottomLeft();
-        int leafs = 0;
-        do{
-            iterArray[leafs++] = octreeIterator;
-        } while(octreeIterator.moveRight());
+    LeafData* const leafsDataArray = new LeafData[this->numberOfLeafs];
 
-        FLOG(FTic computationCounter);
-        #pragma omp parallel
-        {
-            KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
-            #pragma omp for nowait
-            for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
-                myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
-            }
-        }
-        FLOG(computationCounter.tac());
+    FVector<LeafData> leafsNeedOtherData(countNeedOther);
 
+    // split data
+    {
+      FTRACE( FTrace::FRegion regionTrace( "Split" , __FUNCTION__ , __FILE__ , __LINE__) );
 
-        FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
-        FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
+      typename OctreeClass::Iterator octreeIterator(tree);
+      octreeIterator.gotoBottomLeft();
 
-    }
+      // to store which shape for each leaf
+      typename OctreeClass::Iterator* const myLeafs = new typename OctreeClass::Iterator[this->numberOfLeafs];
+      int*const shapeType = new int[this->numberOfLeafs];
 
-    /////////////////////////////////////////////////////////////////////////////
-    // Upward
-    /////////////////////////////////////////////////////////////////////////////
-
-    /** M2M */
-    void upwardPass(){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
-        FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
-        FLOG(FTic counterTime);
-        FLOG(FTic computationCounter);
-        FLOG(FTic prepareCounter);
-        FLOG(FTic waitCounter);
-
-        // Start from leal level - 1
-        typename OctreeClass::Iterator octreeIterator(tree);
-        octreeIterator.gotoBottomLeft();
-        octreeIterator.moveUp();
-        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
-
-        // This variable is the proc responsible
-        // of the shared cells
-        int sendToProc = idProcess;
-
-        // There are a maximum of 8-1 sends and 8-1 receptions
-        MPI_Request requests[14];
-        MPI_Status status[14];
-
-        // Maximum data per message is:
-        FBufferWriter sendBuffer;
-        const int recvBufferOffset = 8 * MaxSizePerCell + 1;
-        FBufferReader recvBuffer(nbProcess * recvBufferOffset);
-        CellClass recvBufferCells[8];
-
-        int firstProcThatSend = idProcess + 1;
-
-        // for each levels
-        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
-            // No more work for me
-            if(idProcess != 0
-                    && getWorkingInterval((idxLevel+1), idProcess).max <= getWorkingInterval((idxLevel+1), idProcess - 1).max){
-                break;
-            }
-
-            // copy cells to work with
-            int numberOfCells = 0;
-            // for each cells
-            do{
-                iterArray[numberOfCells++] = octreeIterator;
-            } while(octreeIterator.moveRight());
-            avoidGotoLeftIterator.moveUp();
-            octreeIterator = avoidGotoLeftIterator;
-
-            // We may need to send something
-            int iterRequests = 0;
-            int cellsToSend = -1;
-
-            while(iterArray[cellsToSend+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel, idProcess).min){
-                ++cellsToSend;
-            }
-
-            FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
-
-            FLOG(prepareCounter.tic());
-            if(idProcess != 0
-                    && (getWorkingInterval((idxLevel+1), idProcess).min >>3) <= (getWorkingInterval((idxLevel+1), idProcess - 1).max >>3)){
-
-                char state = 0;
-                sendBuffer.write(char(0));
-
-                const CellClass* const* const child = iterArray[cellsToSend].getCurrentChild();
-                for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
-                    if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).min <= child[idxChild]->getMortonIndex() ){
-                        child[idxChild]->serializeUp(sendBuffer);
-
-                        state = char(state | (0x1 << idxChild));
-                    }
-                }
-                sendBuffer.writeAt(0,state);
-
-                while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() == getWorkingInterval(idxLevel , sendToProc - 1).max){
-                    --sendToProc;
-                }
-
-                MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_BYTE, sendToProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
-            }
-
-            // We may need to receive something
-            bool hasToReceive = false;
-            int endProcThatSend = firstProcThatSend;
-
-            if(idProcess != nbProcess - 1){
-                while(firstProcThatSend < nbProcess
-                      && (getWorkingInterval((idxLevel+1), firstProcThatSend).max) < (getWorkingInterval((idxLevel+1), idProcess).max)){
-                    ++firstProcThatSend;
-                }
-
-                if(firstProcThatSend < nbProcess &&
-                        (getWorkingInterval((idxLevel+1), firstProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3) ){
-
-                    endProcThatSend = firstProcThatSend;
-
-                    while( endProcThatSend < nbProcess &&
-                           (getWorkingInterval((idxLevel+1) ,endProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3)){
-                        ++endProcThatSend;
-                    }
-
-
-                    if(firstProcThatSend != endProcThatSend){
-                        hasToReceive = true;
-
-                        for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
-                            MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_BYTE,
-                                      idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
-                        }
-                    }
-                }
-            }
-            FLOG(prepareCounter.tac());
-            FTRACE( regionTrace.end() );
-
-            // Compute
-            const int endIndex = (hasToReceive?numberOfCells-1:numberOfCells);
-            FLOG(computationCounter.tic());
-            #pragma omp parallel
-            {
-                KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
-                #pragma omp for nowait
-                for( int idxCell = cellsToSend + 1 ; idxCell < endIndex ; ++idxCell){
-                    myThreadkernels.M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
-                }
-            }
-            FLOG(computationCounter.tac());
-
-            // Are we sending or waiting anything?
-            if(iterRequests){
-                FLOG(waitCounter.tic());
-                MPI_Waitall( iterRequests, requests, status);
-                FLOG(waitCounter.tac());
-
-                // we were receiving data
-                if( hasToReceive ){
-                    CellClass* currentChild[8];
-                    memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
-
-                    // retreive data and merge my child and the child from others
-                    for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
-                        recvBuffer.seek(idxProc * recvBufferOffset);
-                        int state = int(recvBuffer.getValue<char>());
-
-                        int position = 0;
-                        while( state && position < 8){
-                            while(!(state & 0x1)){
-                                state >>= 1;
-                                ++position;
-                            }
-
-                            fassert(!currentChild[position], "Already has a cell here", __LINE__, __FILE__);
-
-                            recvBufferCells[position].deserializeUp(recvBuffer);
-                            currentChild[position] = (CellClass*) &recvBufferCells[position];
-
-                            state >>= 1;
-                            ++position;
-                        }
-                    }
-
-                    // Finally compute
-                    FLOG(computationCounter.tic());
-                    (*kernels[0]).M2M( iterArray[numberOfCells - 1].getCurrentCell() , currentChild, idxLevel);
-                    FLOG(computationCounter.tac());
-
-
-                    firstProcThatSend = endProcThatSend - 1;
-                }
-            }
-
-            sendBuffer.reset();
-            recvBuffer.seek(0);
-        }
-
-
-        FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
-        FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.cumulated() << " s\n" );
+      for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){
+	myLeafs[idxLeaf] = octreeIterator;
 
-    }
+	const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
+	const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
+	shapeType[idxLeaf] = shape;
 
-    /////////////////////////////////////////////////////////////////////////////
-    // Downard
-    /////////////////////////////////////////////////////////////////////////////
-
-    /** M2L  */
-    void transferPass(){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
-
-        FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
-        FLOG(FTic counterTime);
-        FLOG(FTic computationCounter);
-        FLOG(FTic sendCounter);
-        FLOG(FTic receiveCounter);
-        FLOG(FTic prepareCounter);
-        FLOG(FTic gatherCounter);
-
-        //////////////////////////////////////////////////////////////////
-        // First know what to send to who
-        //////////////////////////////////////////////////////////////////
-
-        // pointer to send
-        FVector<typename OctreeClass::Iterator> toSend[nbProcess * OctreeHeight];
-        // index
-        int*const indexToSend = new int[nbProcess * OctreeHeight];
-        memset(indexToSend, 0, sizeof(int) * nbProcess * OctreeHeight);
-        // To know which one has need someone
-        FBoolArray** const leafsNeedOther = new FBoolArray*[OctreeHeight];
-        memset(leafsNeedOther, 0, sizeof(FBoolArray*) * OctreeHeight);
-
-        {
-            FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
-            FLOG(prepareCounter.tic());
-
-            // To know if a leaf has been already sent to a proc
-            bool*const alreadySent = new bool[nbProcess];
-            memset(alreadySent, 0, sizeof(bool) * nbProcess);
-
-            typename OctreeClass::Iterator octreeIterator(tree);
-            octreeIterator.moveDown();
-            typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
-            // for each levels
-            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
-                if(idProcess != 0
-                        && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
-                    avoidGotoLeftIterator.moveDown();
-                    octreeIterator = avoidGotoLeftIterator;
-
-                    continue;
-                }
-
-                int numberOfCells = 0;
-
-                while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
-                    octreeIterator.moveRight();
-                }
-
-                // for each cells
-                do{
-                    iterArray[numberOfCells] = octreeIterator;
-                    ++numberOfCells;
-                } while(octreeIterator.moveRight());
-                avoidGotoLeftIterator.moveDown();
-                octreeIterator = avoidGotoLeftIterator;
-
-                leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells);
-
-
-                // Which cell potentialy needs other data and in the same time
-                // are potentialy needed by other
-                int neighborsPosition[189];
-                MortonIndex neighborsIndexes[189];
-                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
-                    // Find the M2L neigbors of a cell
-                    const int counter = getInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndexes, neighborsPosition);
-
-                    memset(alreadySent, false, sizeof(bool) * nbProcess);
-                    bool needOther = false;
-                    // Test each negibors to know which one do not belong to us
-                    for(int idxNeigh = 0 ; idxNeigh < counter ; ++idxNeigh){
-                        if(neighborsIndexes[idxNeigh] < getWorkingInterval(idxLevel , idProcess).min
-                                || (getWorkingInterval(idxLevel , idProcess).max) < neighborsIndexes[idxNeigh]){
-                            int procToReceive = idProcess;
-                            while( 0 != procToReceive && neighborsIndexes[idxNeigh] < getWorkingInterval(idxLevel , procToReceive).min ){
-                                --procToReceive;
-                            }
-                            while( procToReceive != nbProcess -1 && (getWorkingInterval(idxLevel , procToReceive).max) < neighborsIndexes[idxNeigh]){
-                                ++procToReceive;
-                            }
-                            // Maybe already sent to that proc?
-                            if( !alreadySent[procToReceive]
-                                    && getWorkingInterval(idxLevel , procToReceive).min <= neighborsIndexes[idxNeigh]
-                                    && neighborsIndexes[idxNeigh] <= getWorkingInterval(idxLevel , procToReceive).max){
-
-                                alreadySent[procToReceive] = true;
-
-                                needOther = true;
-
-                                toSend[idxLevel * nbProcess + procToReceive].push(iterArray[idxCell]);
-                                ++indexToSend[idxLevel * nbProcess + procToReceive];
-                            }
-                        }
-                    }
-                    if(needOther){
-                        leafsNeedOther[idxLevel]->set(idxCell,true);
-                    }
-
-                }
-
-            }
-            FLOG(prepareCounter.tac());
-
-            delete[] alreadySent;
-        }
-
-        //////////////////////////////////////////////////////////////////
-        // Gather this information
-        //////////////////////////////////////////////////////////////////
-
-        FLOG(gatherCounter.tic());
-        // All process say to each others
-        // what the will send to who
-        int*const globalReceiveMap = new int[nbProcess * nbProcess * OctreeHeight];
-        memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess * OctreeHeight);
-        FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_INT, comm.getComm()),  __LINE__ );
-        FLOG(gatherCounter.tac());
-
-
-        //////////////////////////////////////////////////////////////////
-        // Send and receive for real
-        //////////////////////////////////////////////////////////////////
-
-        FLOG(sendCounter.tic());
-        // Then they can send and receive (because they know what they will receive)
-        // To send in asynchrone way
-        MPI_Request*const requests = new MPI_Request[2 * nbProcess * OctreeHeight];
-        MPI_Status*const status = new MPI_Status[2 * nbProcess * OctreeHeight];
-        int iterRequest = 0;
-
-        const int SizeOfCellToSend = sizeof(MortonIndex) + sizeof(int) + MaxSizePerCell;
-
-        FBufferWriter**const sendBuffer = new FBufferWriter*[nbProcess * OctreeHeight];
-        memset(sendBuffer, 0, sizeof(FBufferWriter*) * nbProcess * OctreeHeight);
-
-        FBufferReader**const recvBuffer = new FBufferReader*[nbProcess * OctreeHeight];
-        memset(recvBuffer, 0, sizeof(FBufferReader*) * nbProcess * OctreeHeight);
-
-
-        for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
-            for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                const int toSendAtProcAtLevel = indexToSend[idxLevel * nbProcess + idxProc];
-                if(toSendAtProcAtLevel != 0){
-                    sendBuffer[idxLevel * nbProcess + idxProc] = new FBufferWriter(toSendAtProcAtLevel * SizeOfCellToSend);
-
-                    for(int idxLeaf = 0 ; idxLeaf < toSendAtProcAtLevel; ++idxLeaf){
-                        const MortonIndex cellIndex = toSend[idxLevel * nbProcess + idxProc][idxLeaf].getCurrentGlobalIndex();
-                        sendBuffer[idxLevel * nbProcess + idxProc]->write(cellIndex);
-                        toSend[idxLevel * nbProcess + idxProc][idxLeaf].getCurrentCell()->serializeUp(*sendBuffer[idxLevel * nbProcess + idxProc]);
-                    }
-
-                    FMpi::MpiAssert( MPI_Isend( sendBuffer[idxLevel * nbProcess + idxProc]->data(), sendBuffer[idxLevel * nbProcess + idxProc]->getSize()
-                                                , MPI_BYTE , idxProc, FMpi::TagLast + idxLevel, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
-                }
-
-                const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
-                if(toReceiveFromProcAtLevel){
-                    recvBuffer[idxLevel * nbProcess + idxProc] = new FBufferReader(toReceiveFromProcAtLevel * SizeOfCellToSend);
-
-                    FMpi::MpiAssert( MPI_Irecv(recvBuffer[idxLevel * nbProcess + idxProc]->data(), recvBuffer[idxLevel * nbProcess + idxProc]->getSize(), MPI_BYTE,
-                                               idxProc, FMpi::TagLast + idxLevel, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
-                }
-            }
-        }
-        FLOG(sendCounter.tac());
-
-        //////////////////////////////////////////////////////////////////
-        // Do M2L
-        //////////////////////////////////////////////////////////////////
-
-        {
-            FTRACE( FTrace::FRegion regionTrace("Compute", __FUNCTION__ , __FILE__ , __LINE__) );
-            typename OctreeClass::Iterator octreeIterator(tree);
-            octreeIterator.moveDown();
-            typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
-            // Now we can compute all the data
-            // for each levels
-            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
-                if(idProcess != 0
-                        && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
-
-                    avoidGotoLeftIterator.moveDown();
-                    octreeIterator = avoidGotoLeftIterator;
-
-                    continue;
-                }
-
-                int numberOfCells = 0;
-                while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
-                    octreeIterator.moveRight();
-                }
-                // for each cells
-                do{
-                    iterArray[numberOfCells] = octreeIterator;
-                    ++numberOfCells;
-                } while(octreeIterator.moveRight());
-                avoidGotoLeftIterator.moveDown();
-                octreeIterator = avoidGotoLeftIterator;
-
-                FLOG(computationCounter.tic());
-                #pragma omp parallel
-                {
-                    KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
-                    const CellClass* neighbors[343];
-
-                    #pragma omp for  schedule(dynamic) nowait
-                    for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
-                        const int counter = tree->getInteractionNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel);
-                        if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
-                    }
-
-                    myThreadkernels->finishedLevelM2L(idxLevel);
-                }
-                FLOG(computationCounter.tac());
-            }
-        }
-
-        //////////////////////////////////////////////////////////////////
-        // Wait received data and compute
-        //////////////////////////////////////////////////////////////////
-
-        // Wait to receive every things (and send every things)
-        MPI_Waitall(iterRequest, requests, status);
-
-        {
-            FTRACE( FTrace::FRegion regionTrace("Compute Received data", __FUNCTION__ , __FILE__ , __LINE__) );
-            FLOG(receiveCounter.tic());
-            typename OctreeClass::Iterator octreeIterator(tree);
-            octreeIterator.moveDown();
-            typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
-            // compute the second time
-            // for each levels
-            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
-                if(idProcess != 0
-                        && getWorkingInterval(idxLevel, idProcess).max <= getWorkingInterval(idxLevel, idProcess - 1).max){
-
-                    avoidGotoLeftIterator.moveDown();
-                    octreeIterator = avoidGotoLeftIterator;
-
-                    continue;
-                }
-
-                // put the received data into a temporary tree
-                FLightOctree<CellClass> tempTree;
-                for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                    const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
-
-                    for(int idxCell = 0 ; idxCell < toReceiveFromProcAtLevel ; ++idxCell){
-                        const MortonIndex cellIndex = recvBuffer[idxLevel * nbProcess + idxProc]->FBufferReader::getValue<MortonIndex>();
-
-                        CellClass* const newCell = new CellClass;
-                        newCell->setMortonIndex(cellIndex);
-                        newCell->deserializeUp(*recvBuffer[idxLevel * nbProcess + idxProc]);
-
-                        tempTree.insertCell(cellIndex, idxLevel, newCell);
-                    }
-                }
-
-
-                // take cells from our octree only if they are
-                // linked to received data
-                int numberOfCells = 0;
-                int realCellId = 0;
-
-                while(octreeIterator.getCurrentGlobalIndex() <  getWorkingInterval(idxLevel , idProcess).min){
-                    octreeIterator.moveRight();
-                }
-                // for each cells
-                do{
-                    // copy cells that need data from others
-                    if(leafsNeedOther[idxLevel]->get(realCellId++)){
-                        iterArray[numberOfCells++] = octreeIterator;
-                    }
-                } while(octreeIterator.moveRight());
-                avoidGotoLeftIterator.moveDown();
-                octreeIterator = avoidGotoLeftIterator;
-
-                delete leafsNeedOther[idxLevel];
-                leafsNeedOther[idxLevel] = 0;
-
-                // Compute this cells
-                FLOG(computationCounter.tic());
-                #pragma omp parallel
-                {
-                    KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
-                    MortonIndex neighborsIndex[189];
-                    int neighborsPosition[189];
-                    const CellClass* neighbors[343];
-
-                    #pragma omp for schedule(dynamic) nowait
-                    for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
-                        // compute indexes
-                        memset(neighbors, 0, 343 * sizeof(CellClass*));
-                        const int counterNeighbors = getInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex, neighborsPosition);
-
-                        int counter = 0;
-                        // does we receive this index from someone?
-                        for(int idxNeig = 0 ;idxNeig < counterNeighbors ; ++idxNeig){
-                            if(neighborsIndex[idxNeig] < (getWorkingInterval(idxLevel , idProcess).min)
-                                    || (getWorkingInterval(idxLevel , idProcess).max) < neighborsIndex[idxNeig]){
-
-                                CellClass*const otherCell = tempTree.getCell(neighborsIndex[idxNeig], idxLevel);
-
-                                if(otherCell){
-                                    //otherCell->setMortonIndex(neighborsIndex[idxNeig]);
-                                    neighbors[ neighborsPosition[idxNeig] ] = otherCell;
-                                    ++counter;
-                                }
-                            }
-                        }
-                        // need to compute
-                        if(counter){
-                            myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
-                        }
-                    }
-
-                    myThreadkernels->finishedLevelM2L(idxLevel);
-                }
-                FLOG(computationCounter.tac());
-            }
-            FLOG(receiveCounter.tac());
-        }
-
-        for(int idxComm = 0 ; idxComm < nbProcess * OctreeHeight; ++idxComm){
-            delete sendBuffer[idxComm];
-            delete recvBuffer[idxComm];
-        }
-        for(int idxComm = 0 ; idxComm < OctreeHeight; ++idxComm){
-            delete leafsNeedOther[idxComm];
-        }
-        delete[] sendBuffer;
-        delete[] recvBuffer;
-        delete[] indexToSend;
-        delete[] leafsNeedOther;
-        delete[] globalReceiveMap;
-        delete[] requests;
-        delete[] status;
-
-        FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
-        FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Gather : " << gatherCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
-    }
+	++shapeLeaf[shape];
 
-    //////////////////////////////////////////////////////////////////
-    // ---------------- L2L ---------------
-    //////////////////////////////////////////////////////////////////
+	octreeIterator.moveRight();
+      }
 
-    void downardPass(){ // second L2L
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
-        FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); );
-        FLOG(FTic counterTime);
-        FLOG(FTic computationCounter);
-        FLOG(FTic prepareCounter);
-        FLOG(FTic waitCounter);
-
-        // Start from leal level - 1
-        typename OctreeClass::Iterator octreeIterator(tree);
-        octreeIterator.moveDown();
-        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
-
-        MPI_Request*const requests = new MPI_Request[nbProcess];
-        MPI_Status*const status = new MPI_Status[nbProcess];
-
-        const int heightMinusOne = OctreeHeight - 1;
-
-        FBufferWriter sendBuffer;
-        FBufferReader recvBuffer(MaxSizePerCell);
-
-        // for each levels exepted leaf level
-        for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
-            if(idProcess != 0
-                    && getWorkingInterval((idxLevel+1) , idProcess).max <= getWorkingInterval((idxLevel+1) , idProcess - 1).max){
-
-                avoidGotoLeftIterator.moveDown();
-                octreeIterator = avoidGotoLeftIterator;
-
-                continue;
-            }
-
-            // copy cells to work with
-            int numberOfCells = 0;
-            // for each cells
-            do{
-                iterArray[numberOfCells++] = octreeIterator;
-            } while(octreeIterator.moveRight());
-            avoidGotoLeftIterator.moveDown();
-            octreeIterator = avoidGotoLeftIterator;
-
-            int firstCellWork = -1;
-            while(iterArray[firstCellWork+1].getCurrentGlobalIndex() < getWorkingInterval(idxLevel , idProcess).min){
-                ++firstCellWork;
-            }
-
-            bool needToRecv = false;
-            int iterRequests = 0;
-
-            FLOG(prepareCounter.tic());
-
-            // do we need to receive one or zeros cell
-            if(idProcess != 0
-                    && (getWorkingInterval((idxLevel + 1) , idProcess).min >> 3 ) <= (getWorkingInterval((idxLevel+1) , idProcess - 1).max >> 3 ) ){
-                needToRecv = true;
-
-
-                MPI_Irecv( recvBuffer.data(), recvBuffer.getSize(), MPI_BYTE, MPI_ANY_SOURCE,
-                           FMpi::TagFmmL2L, comm.getComm(), &requests[iterRequests++]);
-            }
-
-
-            if(idProcess != nbProcess - 1){
-                int firstProcThatRecv = idProcess + 1;
-                while( firstProcThatRecv < nbProcess &&
-                       getWorkingInterval((idxLevel + 1) , firstProcThatRecv).max <= getWorkingInterval((idxLevel+1) , idProcess).max){
-                    ++firstProcThatRecv;
-                }
-
-                int endProcThatRecv = firstProcThatRecv;
-                while( endProcThatRecv < nbProcess &&
-                       (getWorkingInterval((idxLevel + 1) , endProcThatRecv).min >> 3) <= (getWorkingInterval((idxLevel+1) , idProcess).max >> 3) ){
-                    ++endProcThatRecv;
-                }
-
-                if(firstProcThatRecv != endProcThatRecv){
-                    iterArray[numberOfCells - 1].getCurrentCell()->serializeDown(sendBuffer);
-
-                    for(int idxProc = firstProcThatRecv ; idxProc < endProcThatRecv ; ++idxProc ){
-
-                        MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_BYTE, idxProc,
-                                  FMpi::TagFmmL2L, comm.getComm(), &requests[iterRequests++]);
-                    }
-
-                }
-            }
-            FLOG(prepareCounter.tac());
-
-            FLOG(computationCounter.tic());
-            #pragma omp parallel
-            {
-                KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
-                #pragma omp for nowait
-                for(int idxCell = firstCellWork + 1 ; idxCell < numberOfCells ; ++idxCell){
-                    myThreadkernels.L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
-                }
-            }
-            FLOG(computationCounter.tac());
-
-            // are we sending or receiving?
-            if(iterRequests){
-
-                // process
-                FLOG(waitCounter.tic());
-                MPI_Waitall( iterRequests, requests, status);
-                FLOG(waitCounter.tac());
-
-                if(needToRecv){
-                    // Need to compute
-                    FLOG(computationCounter.tic());
-                    iterArray[firstCellWork].getCurrentCell()->deserializeDown(recvBuffer);
-
-                    kernels[0]->L2L( iterArray[firstCellWork].getCurrentCell() , iterArray[firstCellWork].getCurrentChild(), idxLevel);
-                    FLOG(computationCounter.tac());
-                }
-            }
-
-            sendBuffer.reset();
-            recvBuffer.seek(0);
-        }
-
-        delete[] requests;
-        delete[] status;
-
-        FLOG( FLog::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
-        FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Prepare : " << prepareCounter.cumulated() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.cumulated() << " s\n" );
-    }
+      int startPosAtShape[SizeShape];
+      startPosAtShape[0] = 0;
+      for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
+	startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaf[idxShape-1];
+      }
+
+      int idxInArray = 0;
+      for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf, ++idxInArray){
+	const int shapePosition = shapeType[idxInArray];
 
+	leafsDataArray[startPosAtShape[shapePosition]].coord = myLeafs[idxInArray].getCurrentGlobalCoordinate();
+	leafsDataArray[startPosAtShape[shapePosition]].cell = myLeafs[idxInArray].getCurrentCell();
+	leafsDataArray[startPosAtShape[shapePosition]].targets = myLeafs[idxInArray].getCurrentListTargets();
+	leafsDataArray[startPosAtShape[shapePosition]].sources = myLeafs[idxInArray].getCurrentListSrc();
+	if( leafsNeedOther.get(idxLeaf) ) leafsNeedOtherData.push(leafsDataArray[startPosAtShape[shapePosition]]);
 
-    /////////////////////////////////////////////////////////////////////////////
-    // Direct
-    /////////////////////////////////////////////////////////////////////////////
-    struct LeafData{
-        FTreeCoordinate coord;
-        CellClass* cell;
-        ContainerClass* targets;
-        ContainerClass* sources;
-    };
-    /** P2P */
-    void directPass(){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
-        FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); );
-        FLOG( FTic counterTime);
-        FLOG( FTic prepareCounter);
-        FLOG( FTic gatherCounter);
-        FLOG( FTic waitCounter);
-
-        ///////////////////////////////////////////////////
-        // Prepare data to send receive
-        ///////////////////////////////////////////////////
-        FLOG(prepareCounter.tic());
-
-        // To send in asynchrone way
-        MPI_Request requests[2 * nbProcess];
-        MPI_Status status[2 * nbProcess];
-        int iterRequest = 0;
-        int nbMessagesToRecv = 0;
-
-        FBufferWriter**const sendBuffer = new FBufferWriter*[nbProcess];
-        memset(sendBuffer, 0, sizeof(FBufferWriter*) * nbProcess);
-
-        FBufferReader**const recvBuffer = new FBufferReader*[nbProcess];
-        memset(recvBuffer, 0, sizeof(FBufferReader*) * nbProcess);
-
-        int*const globalReceiveMap = new int[nbProcess * nbProcess];
-        memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess);
-
-        FBoolArray leafsNeedOther(this->numberOfLeafs);
-        int countNeedOther = 0;
-
-        {
-            FTRACE( FTrace::FRegion regionTrace( "Preprocess" , __FUNCTION__ , __FILE__ , __LINE__) );
-            // Copy leafs
-            {
-                typename OctreeClass::Iterator octreeIterator(tree);
-                octreeIterator.gotoBottomLeft();
-                int idxLeaf = 0;
-                do{
-                    this->iterArray[idxLeaf++] = octreeIterator;
-                } while(octreeIterator.moveRight());
-            }
-
-            // Box limite
-            const int limite = 1 << (this->OctreeHeight - 1);
-            // pointer to send
-            FVector<typename OctreeClass::Iterator>*const toSend = new FVector<typename OctreeClass::Iterator>[nbProcess];
-
-            // index
-            int partsToSend[nbProcess];
-            memset(partsToSend, 0, sizeof(int) * nbProcess);
-
-            // To know if a leaf has been already sent to a proc
-            int alreadySent[nbProcess];
-
-            MortonIndex indexesNeighbors[26];
-            int uselessIndexArray[26];
-
-            for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){
-                memset(alreadySent, 0, sizeof(int) * nbProcess);
-                bool needOther = false;
-
-                const int neighCount = getNeighborsIndexes(iterArray[idxLeaf].getCurrentGlobalCoordinate(), limite, indexesNeighbors, uselessIndexArray);
-
-                for(int idxNeigh = 0 ; idxNeigh < neighCount ; ++idxNeigh){
-                    if(indexesNeighbors[idxNeigh] < (intervals[idProcess].min) || (intervals[idProcess].max) < indexesNeighbors[idxNeigh]){
-                        needOther = true;
-
-                        // find the proc that need this information
-                        int procToReceive = idProcess;
-                        while( procToReceive != 0 && indexesNeighbors[idxNeigh] < intervals[procToReceive].min){
-                            --procToReceive;
-                        }
-
-                        while( procToReceive != nbProcess - 1 && (intervals[procToReceive].max) < indexesNeighbors[idxNeigh]){
-                            ++procToReceive;
-                        }
-
-                        if( !alreadySent[procToReceive] && intervals[procToReceive].min <= indexesNeighbors[idxNeigh] && indexesNeighbors[idxNeigh] <= intervals[procToReceive].max){
-
-                            alreadySent[procToReceive] = 1;
-                            toSend[procToReceive].push( iterArray[idxLeaf] );
-                            partsToSend[procToReceive] += iterArray[idxLeaf].getCurrentListSrc()->getSavedSize();
-                            partsToSend[procToReceive] += int(sizeof(MortonIndex));
-                        }
-                    }
-                }
-
-                if(needOther){
-                    leafsNeedOther.set(idxLeaf,true);
-                    ++countNeedOther;
-                }
-            }
-
-            for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                if(partsToSend[idxProc]){
-                    partsToSend[idxProc] += int(sizeof(int));
-                }
-            }
-
-            FLOG(gatherCounter.tic());
-            FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, comm.getComm()),  __LINE__ );
-            FLOG(gatherCounter.tac());
-
-            // Prepare receive
-            for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                if(globalReceiveMap[idxProc * nbProcess + idProcess]){
-                    recvBuffer[idxProc] = new FBufferReader(globalReceiveMap[idxProc * nbProcess + idProcess]);
-                    FMpi::MpiAssert( MPI_Irecv(recvBuffer[idxProc]->data(), recvBuffer[idxProc]->getSize(), MPI_BYTE,
-                                               idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
-                }
-            }
-
-            nbMessagesToRecv = iterRequest;
-            // Prepare send
-            for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                if(toSend[idxProc].getSize() != 0){
-                    sendBuffer[idxProc] = new FBufferWriter(partsToSend[idxProc]);
-
-                    (*sendBuffer[idxProc]) << toSend[idxProc].getSize();
-
-                    for(int idxLeaf = 0 ; idxLeaf < toSend[idxProc].getSize() ; ++idxLeaf){
-                        (*sendBuffer[idxProc]) << toSend[idxProc][idxLeaf].getCurrentGlobalIndex();
-                        toSend[idxProc][idxLeaf].getCurrentListSrc()->save(*sendBuffer[idxProc]);
-                    }
-#ifdef FUSE_DEBUG
-                    // TODO clean test
-                    if(sendBuffer[idxProc]->getSize() != partsToSend[idxProc]){
-                        printf("Error 1056 fmm algo proc\n");
-                    }
-#endif
-                    FMpi::MpiAssert( MPI_Isend( sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize() , MPI_BYTE ,
-                                                idxProc, FMpi::TagFmmP2P, comm.getComm(), &requests[iterRequest++]) , __LINE__ );
-
-                }
-            }
-
-            delete[] toSend;
-        }
-        FLOG(prepareCounter.tac());
-
-        ///////////////////////////////////////////////////
-        // Prepare data for thread P2P
-        ///////////////////////////////////////////////////
-
-        // init
-        const int LeafIndex = OctreeHeight - 1;
-        const int SizeShape = 3*3*3;
-
-        int shapeLeaf[SizeShape];
-        memset(shapeLeaf,0,SizeShape*sizeof(int));
-
-        LeafData* const leafsDataArray = new LeafData[this->numberOfLeafs];
-
-        FVector<LeafData> leafsNeedOtherData(countNeedOther);
-
-        // split data
-        {
-            FTRACE( FTrace::FRegion regionTrace( "Split" , __FUNCTION__ , __FILE__ , __LINE__) );
-
-            typename OctreeClass::Iterator octreeIterator(tree);
-            octreeIterator.gotoBottomLeft();
-
-            // to store which shape for each leaf
-            typename OctreeClass::Iterator* const myLeafs = new typename OctreeClass::Iterator[this->numberOfLeafs];
-            int*const shapeType = new int[this->numberOfLeafs];
-
-            for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf){
-                myLeafs[idxLeaf] = octreeIterator;
-
-                const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
-                const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
-                shapeType[idxLeaf] = shape;
-
-                ++shapeLeaf[shape];
-
-                octreeIterator.moveRight();
-            }
-
-            int startPosAtShape[SizeShape];
-            startPosAtShape[0] = 0;
-            for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
-                startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaf[idxShape-1];
-            }
-
-            int idxInArray = 0;
-            for(int idxLeaf = 0 ; idxLeaf < this->numberOfLeafs ; ++idxLeaf, ++idxInArray){
-                const int shapePosition = shapeType[idxInArray];
-
-                leafsDataArray[startPosAtShape[shapePosition]].coord = myLeafs[idxInArray].getCurrentGlobalCoordinate();
-                leafsDataArray[startPosAtShape[shapePosition]].cell = myLeafs[idxInArray].getCurrentCell();
-                leafsDataArray[startPosAtShape[shapePosition]].targets = myLeafs[idxInArray].getCurrentListTargets();
-                leafsDataArray[startPosAtShape[shapePosition]].sources = myLeafs[idxInArray].getCurrentListSrc();
-                if( leafsNeedOther.get(idxLeaf) ) leafsNeedOtherData.push(leafsDataArray[startPosAtShape[shapePosition]]);
-
-                ++startPosAtShape[shapePosition];
-            }
-
-            delete[] shapeType;
-            delete[] myLeafs;
-        }
-
-
-        //////////////////////////////////////////////////////////
-        // Computation P2P that DO NOT need others data
-        //////////////////////////////////////////////////////////
-        FTRACE( FTrace::FRegion regionP2PTrace("Compute P2P", __FUNCTION__ , __FILE__ , __LINE__) );
-
-        FLOG(FTic computationCounter);
-
-        #pragma omp parallel
-        {
-            KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
-            // There is a maximum of 26 neighbors
-            ContainerClass* neighbors[27];
-            int previous = 0;
-
-            for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
-                const int endAtThisShape = shapeLeaf[idxShape] + previous;
-
-                #pragma omp for
-                for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
-                    LeafData& currentIter = leafsDataArray[idxLeafs];
-                    myThreadkernels.L2P(currentIter.cell, currentIter.targets);
-
-                    // need the current particles and neighbors particles
-                    const int counter = tree->getLeafsNeighbors(neighbors, currentIter.coord, LeafIndex);
-                    myThreadkernels.P2P( currentIter.coord,currentIter.targets,
-                                         currentIter.sources, neighbors, counter);
-                }
-
-                previous = endAtThisShape;
-            }
-        }
-        FLOG(computationCounter.tac());
-        FTRACE( regionP2PTrace.end() );
-
-        //////////////////////////////////////////////////////////
-        // Wait send receive
-        //////////////////////////////////////////////////////////
-
-        FLOG(FTic computation2Counter);
-
-        // Create an octree with leaves from others
-        OctreeClass otherP2Ptree( tree->getHeight(), tree->getSubHeight(), tree->getBoxWidth(), tree->getBoxCenter() );
-        int complete = 0;
-        int*const indexMessage = new int[nbProcess * 2];
-        while( complete != iterRequest){
-            memset(indexMessage, 0, sizeof(int) * nbProcess * 2);
-            int countMessages = 0;
-            // Wait data
-            FLOG(waitCounter.tic());
-            MPI_Waitsome(iterRequest, requests, &countMessages, indexMessage, status);
-            FLOG(waitCounter.tac());
-            complete += countMessages;
-
-
-            for(int idxRcv = 0 ; idxRcv < countMessages ; ++idxRcv){
-                if( indexMessage[idxRcv] < nbMessagesToRecv ){
-                    const int idxProc = status[idxRcv].MPI_SOURCE;
-                    int nbLeaves;
-                    (*recvBuffer[idxProc]) >> nbLeaves;
-                    for(int idxLeaf = 0 ; idxLeaf < nbLeaves ; ++idxLeaf){
-                        MortonIndex leafIndex;
-                        (*recvBuffer[idxProc]) >> leafIndex;
-                        otherP2Ptree.createLeaf(leafIndex)->getSrc()->restore((*recvBuffer[idxProc]));
-                    }
-                    delete recvBuffer[idxProc];
-                    recvBuffer[idxProc] = 0;
-                }
-            }
-        }
-        delete[] indexMessage;
-
-        //////////////////////////////////////////////////////////
-        // Computation P2P that need others data
-        //////////////////////////////////////////////////////////
-
-        FTRACE( FTrace::FRegion regionOtherTrace("Compute P2P Other", __FUNCTION__ , __FILE__ , __LINE__) );
-        FLOG( computation2Counter.tic() );
-
-        #pragma omp parallel
-        {
-            KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
-            // There is a maximum of 26 neighbors
-            ContainerClass* neighbors[27];
-            MortonIndex indexesNeighbors[27];
-            int indexArray[26];
-            // Box limite
-            const int limite = 1 << (this->OctreeHeight - 1);
-            const int nbLeafToProceed = leafsNeedOtherData.getSize();
-
-            #pragma omp for
-            for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){
-                LeafData currentIter = leafsNeedOtherData[idxLeafs];
-
-                // need the current particles and neighbors particles
-                int counter = 0;
-                memset( neighbors, 0, sizeof(ContainerClass*) * 27);
-
-                // Take possible data
-                const int nbNeigh = getNeighborsIndexes(currentIter.coord, limite, indexesNeighbors, indexArray);
-
-                for(int idxNeigh = 0 ; idxNeigh < nbNeigh ; ++idxNeigh){
-                    if(indexesNeighbors[idxNeigh] < (intervals[idProcess].min) || (intervals[idProcess].max) < indexesNeighbors[idxNeigh]){
-                        ContainerClass*const hypotheticNeighbor = otherP2Ptree.getLeafSrc(indexesNeighbors[idxNeigh]);
-                        if(hypotheticNeighbor){
-                            neighbors[ indexArray[idxNeigh] ] = hypotheticNeighbor;
-                            ++counter;
-                        }
-                    }
-                }
-
-                myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets,
-                                     currentIter.sources, neighbors, counter);
-            }
-
-        }
-
-        for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-            delete sendBuffer[idxProc];
-            delete recvBuffer[idxProc];
-        }
-        delete[] globalReceiveMap;
-        delete[] leafsDataArray;
-
-        FLOG(computation2Counter.tac());
-
-
-        FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
-        FLOG( FLog::Controller << "\t\t Computation L2P + P2P : " << computationCounter.elapsed() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Computation P2P 2 : " << computation2Counter.elapsed() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Prepare P2P : " << prepareCounter.elapsed() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Gather P2P : " << gatherCounter.elapsed() << " s\n" );
-        FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.elapsed() << " s\n" );
+	++startPosAtShape[shapePosition];
+      }
 
+      delete[] shapeType;
+      delete[] myLeafs;
     }
 
 
-    int getNeighborsIndexes(const FTreeCoordinate& center, const int limite, MortonIndex indexes[26], int indexInArray[26]) const{
-        int idxNeig = 0;
-        // We test all cells around
-        for(int idxX = -1 ; idxX <= 1 ; ++idxX){
-            if(!FMath::Between(center.getX() + idxX,0, limite)) continue;
-
-            for(int idxY = -1 ; idxY <= 1 ; ++idxY){
-                if(!FMath::Between(center.getY() + idxY,0, limite)) continue;
-
-                for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
-                    if(!FMath::Between(center.getZ() + idxZ,0, limite)) continue;
-
-                    // if we are not on the current cell
-                    if( idxX || idxY || idxZ ){
-                        const FTreeCoordinate other(center.getX() + idxX,center.getY() + idxY,center.getZ() + idxZ);
-                        indexes[ idxNeig ] = other.getMortonIndex(this->OctreeHeight - 1);
-                        indexInArray[ idxNeig ] = ((idxX+1)*3 + (idxY+1)) * 3 + (idxZ+1);
-                        ++idxNeig;
-                    }
-                }
-            }
-        }
-        return idxNeig;
+    //////////////////////////////////////////////////////////
+    // Computation P2P that DO NOT need others data
+    //////////////////////////////////////////////////////////
+    FTRACE( FTrace::FRegion regionP2PTrace("Compute P2P", __FUNCTION__ , __FILE__ , __LINE__) );
+
+    FLOG(FTic computationCounter);
+
+#pragma omp parallel
+    {
+      KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
+      // There is a maximum of 26 neighbors
+      ContainerClass* neighbors[27];
+      int previous = 0;
+
+      for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
+	const int endAtThisShape = shapeLeaf[idxShape] + previous;
+
+#pragma omp for
+	for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
+	  LeafData& currentIter = leafsDataArray[idxLeafs];
+	  myThreadkernels.L2P(currentIter.cell, currentIter.targets);
+
+	  // need the current particles and neighbors particles
+	  const int counter = tree->getLeafsNeighbors(neighbors, currentIter.coord, LeafIndex);
+	  myThreadkernels.P2P( currentIter.coord,currentIter.targets,
+			       currentIter.sources, neighbors, counter);
+	}
+
+	previous = endAtThisShape;
+      }
+    }
+    FLOG(computationCounter.tac());
+    FTRACE( regionP2PTrace.end() );
+
+    //////////////////////////////////////////////////////////
+    // Waitsend receive
+    //////////////////////////////////////////////////////////
+
+    FLOG(FTic computation2Counter);
+
+    // Create an octree with leaves from others
+    OctreeClass otherP2Ptree( tree->getHeight(), tree->getSubHeight(), tree->getBoxWidth(), tree->getBoxCenter() );
+    int complete = 0;
+    int*const indexMessage = new int[nbProcess * 2];
+    while( complete != iterRequest){
+      memset(indexMessage, 0, sizeof(int) * nbProcess * 2);
+      int countMessages = 0;
+      // Wait data
+      FLOG(waitCounter.tic());
+      MPI_Waitsome(iterRequest, requests, &countMessages, indexMessage, status);
+      
+      FLOG(waitCounter.tac());
+      complete += countMessages;
+
+
+      for(int idxRcv = 0 ; idxRcv < countMessages ; ++idxRcv){
+	if( indexMessage[idxRcv] < nbMessagesToRecv ){
+	  const int idxProc = status[idxRcv].MPI_SOURCE;
+	  int nbLeaves;
+	  (*recvBuffer[idxProc]) >> nbLeaves;
+	  for(int idxLeaf = 0 ; idxLeaf < nbLeaves ; ++idxLeaf){
+	    MortonIndex leafIndex;
+	    (*recvBuffer[idxProc]) >> leafIndex;
+	    otherP2Ptree.createLeaf(leafIndex)->getSrc()->restore((*recvBuffer[idxProc]));
+	  }
+	  delete recvBuffer[idxProc];
+	  recvBuffer[idxProc] = 0;
+	}
+      }
     }
+    delete[] indexMessage;
+
+    //////////////////////////////////////////////////////////
+    // Computation P2P that need others data
+    //////////////////////////////////////////////////////////
+
+    FTRACE( FTrace::FRegion regionOtherTrace("Compute P2P Other", __FUNCTION__ , __FILE__ , __LINE__) );
+    FLOG( computation2Counter.tic() );
+
+#pragma omp parallel
+    {
+      KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
+      // There is a maximum of 26 neighbors
+      ContainerClass* neighbors[27];
+      MortonIndex indexesNeighbors[27];
+      int indexArray[26];
+      // Box limite
+      const int nbLeafToProceed = leafsNeedOtherData.getSize();
+
+#pragma omp for
+      for(int idxLeafs = 0 ; idxLeafs < nbLeafToProceed ; ++idxLeafs){
+	LeafData currentIter = leafsNeedOtherData[idxLeafs];
+
+	// need the current particles and neighbors particles
+	int counter = 0;
+	memset( neighbors, 0, sizeof(ContainerClass*) * 27);
+
+	// Take possible data
+	const int nbNeigh = currentIter.coord.getNeighborsIndexes(OctreeHeight, indexesNeighbors, indexArray);
+
+	for(int idxNeigh = 0 ; idxNeigh < nbNeigh ; ++idxNeigh){
+	  if(indexesNeighbors[idxNeigh] < (intervals[idProcess].min) || (intervals[idProcess].max) < indexesNeighbors[idxNeigh]){
+	    ContainerClass*const hypotheticNeighbor = otherP2Ptree.getLeafSrc(indexesNeighbors[idxNeigh]);
+	    if(hypotheticNeighbor){
+	      neighbors[ indexArray[idxNeigh] ] = hypotheticNeighbor;
+	      ++counter;
+	    }
+	  }
+	}
+
+	myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets,
+				   currentIter.sources, neighbors, counter);
+      }
 
-    int getInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{
-
-        // Then take each child of the parent's neighbors if not in directNeighbors
-        // Father coordinate
-        const FTreeCoordinate parentCell(workingCell.getX()>>1,workingCell.getY()>>1,workingCell.getZ()>>1);
-
-        // Limite at parent level number of box (split by 2 by level)
-        const int limite = FMath::pow2(inLevel-1);
-
-        int idxNeighbors = 0;
-        // We test all cells around
-        for(int idxX = -1 ; idxX <= 1 ; ++idxX){
-            if(!FMath::Between(parentCell.getX() + idxX,0,limite)) continue;
-
-            for(int idxY = -1 ; idxY <= 1 ; ++idxY){
-                if(!FMath::Between(parentCell.getY() + idxY,0,limite)) continue;
-
-                for(int idxZ = -1 ; idxZ <= 1 ; ++idxZ){
-                    if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue;
-
-                    // if we are not on the current cell
-                    if( idxX || idxY || idxZ ){
-                        const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ);
-                        const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1);
-
-                        // For each child
-                        for(int idxCousin = 0 ; idxCousin < 8 ; ++idxCousin){
-                            const int xdiff  = ((otherParent.getX()<<1) | ( (idxCousin>>2) & 1)) - workingCell.getX();
-                            const int ydiff  = ((otherParent.getY()<<1) | ( (idxCousin>>1) & 1)) - workingCell.getY();
-                            const int zdiff  = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ();
-
-                            // Test if it is a direct neighbor
-                            if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){
-                                // add to neighbors
-                                inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3;
-                                inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        return idxNeighbors;
     }
+
+    for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+      delete sendBuffer[idxProc];
+      delete recvBuffer[idxProc];
+    }
+    delete[] globalReceiveMap;
+    delete[] leafsDataArray;
+
+    FLOG(computation2Counter.tac());
+
+
+    FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
+    FLOG( FLog::Controller << "\t\t Computation L2P + P2P : " << computationCounter.elapsed() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Computation P2P 2 : " << computation2Counter.elapsed() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Prepare P2P : " << prepareCounter.elapsed() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Gather P2P : " << gatherCounter.elapsed() << " s\n" );
+    FLOG( FLog::Controller << "\t\t Wait : " << waitCounter.elapsed() << " s\n" );
+
+  }
+
 };
 
 
diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
index 7f35ac76f90094c3d403b55240788329cfa67f6e..4459907b18d9c9f10ba0aaf46a838776f5a56c98 100755
--- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
@@ -17,7 +17,7 @@
 #define FFMMALGORITHMTHREADPROCPPERIODIC_HPP
 
 
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -58,7 +58,7 @@
 * ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmThreadProcPeriodic : protected FAssertable, public FAbstractAlgorithm {
+class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm {
 
     static const int MaxSizePerCell = 2048;
 
@@ -132,8 +132,8 @@ public:
           OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]),
           workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(-1 <= inUpperLevel, "inUpperLevel cannot be < -1", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1");
 
         FLOG(FLog::Controller << "FFmmAlgorithmThreadProcPeriodic\n");
         FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
@@ -173,7 +173,7 @@ public:
                 myLastInterval.max = octreeIterator.getCurrentGlobalIndex();
             }
             iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
-            fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
+            FAssertLF(iterArray, "iterArray bad alloc");
 
             // We get the min/max indexes from each procs
             FMpi::MpiAssert( MPI_Allgather( &myLastInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()),  __LINE__ );
@@ -418,7 +418,7 @@ private:
                                 ++position;
                             }
 
-                            fassert(!currentChild[position], "Already has a cell here", __LINE__, __FILE__);
+                            FAssertLF(!currentChild[position], "Already has a cell here");
 
                             recvBufferCells[position].deserializeUp(recvBuffer);
                             currentChild[position] = (CellClass*) &recvBufferCells[position];
@@ -481,7 +481,7 @@ private:
                             state >>= 1;
                             ++position;
                         }
-                        fassert(!currentChild[position], "Already has a cell here", __LINE__, __FILE__);
+                        FAssertLF(!currentChild[position], "Already has a cell here");
 
                         recvBufferCells[position].deserializeUp(recvBuffer);
 
diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp
index 6340a2bbc4a430e77b5c629fdf37705d78766cc4..e42e8f3be508842c2f651e38ac60c2b1834e51ef 100755
--- a/Src/Core/FFmmAlgorithmThreadTsm.hpp
+++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp
@@ -17,7 +17,7 @@
 #define FFMMALGORITHMTHREADTSM_HPP
 
 
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -45,7 +45,7 @@
 * You should not write on sources in the P2P method!
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmThreadTsm : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm{
     OctreeClass* const tree;                  //< The octree to work on
     KernelClass** kernels;                    //< The kernels
 
@@ -65,7 +65,7 @@ public:
                       : tree(inTree) , kernels(0), iterArray(0),
                       MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()) {
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
 
         this->kernels = new KernelClass*[MaxThreads];
         for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
@@ -98,7 +98,7 @@ public:
             ++numberOfLeafs;
         } while(octreeIterator.moveRight());
         iterArray = new typename OctreeClass::Iterator[numberOfLeafs];
-        fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
+        FAssertLF(iterArray, "iterArray bad alloc");
 
         if(operationsToProceed & FFmmP2M) bottomPass();
 
@@ -313,18 +313,20 @@ public:
                     KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                     #pragma omp for nowait
                     for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
-                        CellClass* potentialChild[8];
-                        CellClass** const realChild = iterArray[idxCell].getCurrentChild();
-                        CellClass* const currentCell = iterArray[idxCell].getCurrentCell();
-                        for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
-                            if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){
-                                potentialChild[idxChild] = realChild[idxChild];
-                            }
-                            else{
-                                potentialChild[idxChild] = 0;
+                        if( iterArray[idxCell].getCurrentCell()->hasTargetsChild() ){
+                            CellClass* potentialChild[8];
+                            CellClass** const realChild = iterArray[idxCell].getCurrentChild();
+                            CellClass* const currentCell = iterArray[idxCell].getCurrentCell();
+                            for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
+                                if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){
+                                    potentialChild[idxChild] = realChild[idxChild];
+                                }
+                                else{
+                                    potentialChild[idxChild] = 0;
+                                }
                             }
+                            myThreadkernels->L2L( currentCell , potentialChild, idxLevel);
                         }
-                        myThreadkernels->L2L( currentCell , potentialChild, idxLevel);
                     }
                 }
                 FLOG(computationCounter.tac());
@@ -362,12 +364,14 @@ public:
 
             #pragma omp for schedule(dynamic) nowait
             for(int idxLeafs = 0 ; idxLeafs < numberOfLeafs ; ++idxLeafs){
-                myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets());
-                // need the current particles and neighbors particles
-                const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne);
-                neighbors[13] = iterArray[idxLeafs].getCurrentListSrc();
-                myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
-                                      iterArray[idxLeafs].getCurrentListSrc() , neighbors, counter);
+                if( iterArray[idxLeafs].getCurrentCell()->hasTargetsChild() ){
+                    myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets());
+                    // need the current particles and neighbors particles
+                    const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne);
+                    neighbors[13] = iterArray[idxLeafs].getCurrentListSrc();
+                    myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
+                                      iterArray[idxLeafs].getCurrentListSrc() , neighbors, counter + 1);
+                }
             }
         }
         FLOG(computationCounter.tac());
diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp
index e8c573082efb2ff0495fb4567e674a8cb266abb4..22a745b2fb71b8fe741a7c60d74956db68cf1844 100755
--- a/Src/Core/FFmmAlgorithmTsm.hpp
+++ b/Src/Core/FFmmAlgorithmTsm.hpp
@@ -17,7 +17,7 @@
 #define FFMMALGORITHMTSM_HPP
 
 
-#include "../Utils/FAssertable.hpp"
+#include "../Utils/FAssert.hpp"
 #include "../Utils/FLog.hpp"
 #include "../Utils/FTrace.hpp"
 #include "../Utils/FTic.hpp"
@@ -39,7 +39,7 @@
 * The differences with FmmAlgorithm is that it used target source model.
 */
 template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
-class FFmmAlgorithmTsm : protected FAssertable, public FAbstractAlgorithm{
+class FFmmAlgorithmTsm : public FAbstractAlgorithm{
 
     OctreeClass* const tree;                                                     //< The octree to work on
     KernelClass* const kernels;    //< The kernels
@@ -58,8 +58,8 @@ public:
     FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels)
         : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()){
 
-        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
-        fassert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+        FAssertLF(tree, "tree cannot be null");
+        FAssertLF(kernels, "kernels cannot be null");
 
         FLOG(FLog::Controller << "FFmmAlgorithmTsm\n");
     }
@@ -195,6 +195,7 @@ public:
             do{
                 FLOG(computationCounter.tic());
                 CellClass* const currentCell = octreeIterator.getCurrentCell();
+
                 if(currentCell->hasTargetsChild()){
                     const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),idxLevel);
                     if( counter ){
@@ -247,21 +248,23 @@ public:
             FLOG(FTic counterTimeLevel);
             // for each cells
             do{
-                FLOG(computationCounter.tic());
-                CellClass* potentialChild[8];
-                CellClass** const realChild = octreeIterator.getCurrentChild();
-                CellClass* const currentCell = octreeIterator.getCurrentCell();
-                for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
-                    if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){
-                        potentialChild[idxChild] = realChild[idxChild];
-                    }
-                    else{
-                        potentialChild[idxChild] = 0;
+                if( octreeIterator.getCurrentCell()->hasTargetsChild() ){
+                    FLOG(computationCounter.tic());
+                    CellClass* potentialChild[8];
+                    CellClass** const realChild = octreeIterator.getCurrentChild();
+                    CellClass* const currentCell = octreeIterator.getCurrentCell();
+                    for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
+                        if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){
+                            potentialChild[idxChild] = realChild[idxChild];
+                        }
+                        else{
+                            potentialChild[idxChild] = 0;
+                        }
                     }
+                    kernels->L2L( currentCell , potentialChild, idxLevel);
+                    FLOG(computationCounter.tac());
+                    FLOG(totalComputation += computationCounter.elapsed());
                 }
-                kernels->L2L( currentCell , potentialChild, idxLevel);
-                FLOG(computationCounter.tac());
-                FLOG(totalComputation += computationCounter.elapsed());
             } while(octreeIterator.moveRight());
 
             avoidGotoLeftIterator.moveDown();
@@ -291,15 +294,17 @@ public:
         ContainerClass* neighbors[27];
         // for each leafs
         do{
-            FLOG(computationCounter.tic());
-            kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
-            // need the current particles and neighbors particles
-            const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne);
-            neighbors[13] = octreeIterator.getCurrentListSrc();
-            kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
-                          octreeIterator.getCurrentListSrc() , neighbors, counter);
-            FLOG(computationCounter.tac());
-            FLOG(totalComputation += computationCounter.elapsed());
+            if( octreeIterator.getCurrentCell()->hasTargetsChild() ){
+                FLOG(computationCounter.tic());
+                kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
+                // need the current particles and neighbors particles
+                const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne);
+                neighbors[13] = octreeIterator.getCurrentListSrc();
+                kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
+                              octreeIterator.getCurrentListSrc() , neighbors, counter + 1);
+                FLOG(computationCounter.tac());
+                FLOG(totalComputation += computationCounter.elapsed());
+            }
         } while(octreeIterator.moveRight());
 
         FLOG( counterTime.tac() );
diff --git a/Src/Extensions/FExtendCellType.hpp b/Src/Extensions/FExtendCellType.hpp
index c795a7e9df245ec055d6314da0d030cdb567fd49..2f427496394e5640a8ab785cee5d7bb043ed5739 100755
--- a/Src/Extensions/FExtendCellType.hpp
+++ b/Src/Extensions/FExtendCellType.hpp
@@ -16,9 +16,6 @@
 #ifndef FEXTENDCELLTYPE_HPP
 #define FEXTENDCELLTYPE_HPP
 
-#include "../Containers/FBufferReader.hpp"
-#include "../Containers/FBufferWriter.hpp"
-
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
 * @class FExtendCellType
@@ -30,57 +27,64 @@
 */
 class FExtendCellType {
 protected:
-    /** Particle potential type */
-    static const int Neither = 0;
-    static const int ContainsSrc = 1;
-    static const int ContainsTargets = 2;
-
     /** Current type */
-    int type;
+    bool containsTargets;
+    bool containsSources;
 
 public:
     /** Default constructor */
-    FExtendCellType() : type(Neither) {
+    FExtendCellType() : containsTargets(false), containsSources(false) {
     }
 
     /** Copy constructor */
-    FExtendCellType(const FExtendCellType& other) : type(other.type) {
+    FExtendCellType(const FExtendCellType& other) : containsTargets(other.containsTargets),
+            containsSources(other.containsSources){
     }
 
     /** Copy operator */
     FExtendCellType& operator=(const FExtendCellType& other) {
-        this->type = other.type;
+        this->containsTargets = other.containsTargets;
+        this->containsSources = other.containsSources;
         return *this;
     }
 
     /** To know if a cell has sources */
     bool hasSrcChild() const {
-        return this->type & ContainsSrc;
+        return containsSources;
     }
 
     /** To know if a cell has targets */
     bool hasTargetsChild() const {
-        return this->type & ContainsTargets;
+        return containsTargets;
     }
 
     /** To set cell as sources container */
     void setSrcChildTrue() {
-        this->type |= ContainsSrc;
+        containsSources = true;
     }
 
     /** To set cell as targets container */
     void setTargetsChildTrue() {
-        this->type |= ContainsTargets;
+        containsTargets = true;
     }
 
 public:
     /** Save current object */
-    void save(FBufferWriter& buffer) const {
-        buffer << type;
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const {
+        buffer << containsTargets;
+        buffer << containsSources;
     }
     /** Retrieve current object */
-    void restore(FBufferReader& buffer) {
-        buffer >> type;
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer) {
+        buffer >> containsTargets;
+        buffer >> containsSources;
+    }
+    /** reset to unknown type */
+    void resetToInitialState(){
+        containsTargets = false;
+        containsSources = false;
     }
 };
 
diff --git a/Src/Extensions/FExtendCoordinate.hpp b/Src/Extensions/FExtendCoordinate.hpp
index 3ab18a2559126e0d39519761e1cd77b89bd2fbcd..882eb83323225e6099d67e5cc7dcb49f539380d0 100755
--- a/Src/Extensions/FExtendCoordinate.hpp
+++ b/Src/Extensions/FExtendCoordinate.hpp
@@ -19,8 +19,6 @@
 
 #include "../Utils/FGlobal.hpp"
 #include "../Containers/FTreeCoordinate.hpp"
-#include "../Containers/FBufferReader.hpp"
-#include "../Containers/FBufferWriter.hpp"
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -67,13 +65,16 @@ public:
 
 
     /** Save current object */
-    void save(FBufferWriter& buffer) const {
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const {
         coordinate.save(buffer);
     }
     /** Retrieve current object */
-    void restore(FBufferReader& buffer) {
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer) {
         coordinate.restore(buffer);
     }
+  
 };
 
 
diff --git a/Src/Extensions/FExtendMortonIndex.hpp b/Src/Extensions/FExtendMortonIndex.hpp
index 0738b675d2807b99c1bb52b8c224cc2760f56306..45676acd84e8280002e4b8befc46937230f34ab9 100755
--- a/Src/Extensions/FExtendMortonIndex.hpp
+++ b/Src/Extensions/FExtendMortonIndex.hpp
@@ -19,8 +19,6 @@
 
 #include "../Utils/FGlobal.hpp"
 #include "../Containers/FTreeCoordinate.hpp"
-#include "../Containers/FBufferReader.hpp"
-#include "../Containers/FBufferWriter.hpp"
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -59,11 +57,13 @@ public:
     }
 
     /** Save current object */
-    void save(FBufferWriter& buffer) const {
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const {
         buffer << mortonIndex;
     }
     /** Retrieve current object */
-    void restore(FBufferReader& buffer) {
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer) {
         buffer >> mortonIndex;
     }
 };
diff --git a/Src/Files/FMpiFmaLoader.hpp b/Src/Files/FMpiFmaLoader.hpp
index 50878a4353a254ae616c6e82374dc09929197c0f..ec1184e26f160d9d278e6f14d5b9194f0b6e5c79 100755
--- a/Src/Files/FMpiFmaLoader.hpp
+++ b/Src/Files/FMpiFmaLoader.hpp
@@ -81,7 +81,7 @@ public:
                 MPI_Status status;
                 if( MPI_File_read(file, &sizeOfElement, 1, MPI_INT, &status) == MPI_SUCCESS
                     && MPI_File_read(file, &this->totalNbParticles, 1, MPI_LONG_LONG, &status) == MPI_SUCCESS
-                    && MPI_File_read(file, xyzBoxWidth, 4, MPI_FLOAT, &status) == MPI_SUCCESS ){
+                        && MPI_File_read(file, xyzBoxWidth, 4, FMpi::GetType(xyzBoxWidth[0]), &status) == MPI_SUCCESS ){
 
                     FLOG(if(sizeOfElement != sizeof(FReal)){)
                         FLOG( FLog::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); )
@@ -110,13 +110,7 @@ public:
                     // local number to read
                     particles = new FReal[bufsize];
 
-                    if( sizeof(FReal) == sizeof(float) ){
-                        MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_FLOAT, &status);
-                    }
-                    else{
-                        MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_DOUBLE, &status);
-                    }
-
+                    MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), FMpi::GetType(xyzBoxWidth[0]), &status);
 
                     // check if needed
                     int count(0);
@@ -142,7 +136,6 @@ public:
                     FLOG( FLog::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); )
                 FLOG(})
                 removeWarning += fread(&this->totalNbParticles, sizeof(FSize), 1, file);
-
                 removeWarning += fread(&this->boxWidth, sizeof(FReal), 1, file);
                 this->boxWidth *= 2;
 
@@ -160,6 +153,7 @@ public:
 
                 if(filesize/4 != this->totalNbParticles){
                     printf("Error fileSize %ld, nbPart %lld\n", filesize/4, this->totalNbParticles);
+                    exit(0);
                 }
 
                 // in number of floats
@@ -172,7 +166,10 @@ public:
 
                 fseek(file, long(headDataOffSet + startPart * 4 * sizeof(FReal)), SEEK_SET);
 
-                removeWarning += fread(particles, sizeof(FReal), int(bufsize), file);
+                if( fread(particles, sizeof(FReal), int(bufsize), file) != unsigned(bufsize)){
+                    printf("Error when reading file.\n");
+                    exit(0);
+                }
 
                 fclose(file);
             }
@@ -226,6 +223,10 @@ public:
       * @param the particle to fill
       */
     void fillParticle(FPoint*const inParticlePositions, FReal*const inPhysicalValue){
+        if(nbParticles*4 <= idxParticles){
+            printf("Error you're loading too much particles.\n");
+            exit(0);
+        }
         inParticlePositions->setPosition(particles[idxParticles],particles[idxParticles+1],particles[idxParticles+2]);
         (*inPhysicalValue) = (particles[idxParticles+3]);
         idxParticles += 4;
diff --git a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
index 09b051eff9345100f40e4887aeff5a67b5d1caae..c65e4a598d7fdde90ad9a09456c723d7fd587ea9 100755
--- a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
+++ b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp
@@ -22,12 +22,10 @@
 
 #include "../../Components/FAbstractKernels.hpp"
 
-#include "../P2P/FP2P.hpp"
-
+#include "FChebP2PKernels.hpp"
 #include "./FChebInterpolator.hpp"
 
-class FTreeCoordinate;
-template <KERNEL_FUNCCTION_IDENTIFIER Identifier, int NVALS> struct DirectInteactionComputer;
+#include "../../Containers/FTreeCoordinate.hpp"
 
 /**
  * @author Matthias Messner(matthias.messner@inria.fr)
@@ -126,113 +124,20 @@ public:
 	
 	
 
-	void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
+    virtual void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
 					 ContainerClass* const FRestrict TargetParticles,
                      const ContainerClass* const FRestrict /*SourceParticles*/,
 					 ContainerClass* const NeighborSourceParticles[27],
-					 const int /* size */)
-	{
-        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
-	}
+                     const int /* size */) = 0;
 
 
-    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
+    virtual void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                    ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
-                   ContainerClass* const inNeighbors[27], const int /*inSize*/){
-        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
-    }
-
-};
-
-
-///////////////////////////////////////////////////////
-// P2P Wrappers
-///////////////////////////////////////////////////////
+                   ContainerClass* const inNeighbors[27], const int /*inSize*/) = 0;
 
-/*! Specialization for Laplace potential */
-template <>
-struct DirectInteactionComputer<ONE_OVER_R, 1>
-{
-    template <typename ContainerClass>
-    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
-                     ContainerClass* const NeighborSourceParticles[27]){
-        FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14);
-    }
-
-    template <typename ContainerClass>
-    static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                           ContainerClass* const inNeighbors[27],
-                           const int inSize){
-        FP2P::FullRemote(inTargets,inNeighbors,inSize);
-    }
 };
 
 
-/*! Specialization for Leonard-Jones potential */
-template <>
-struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, 1>
-{
-    template <typename ContainerClass>
-    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
-                     ContainerClass* const NeighborSourceParticles[27]){
-        FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14);
-    }
-
-    template <typename ContainerClass>
-    static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                           ContainerClass* const inNeighbors[27],
-                           const int inSize){
-        FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize);
-    }
-};
-
-///////////////////////////////////////////////////////
-// In case of multi right hand side
-///////////////////////////////////////////////////////
-
-template <int NVALS>
-struct DirectInteactionComputer<ONE_OVER_R, NVALS>
-{
-    template <typename ContainerClass>
-    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
-                     ContainerClass* const NeighborSourceParticles[27]){
-        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-            FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14);
-        }
-    }
-
-    template <typename ContainerClass>
-    static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                           ContainerClass* const inNeighbors[27],
-                           const int inSize){
-        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-            FP2P::FullRemote(inTargets,inNeighbors,inSize);
-        }
-    }
-};
-
-
-/*! Specialization for Leonard-Jones potential */
-template <int NVALS>
-struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, NVALS>
-{
-    template <typename ContainerClass>
-    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
-                     ContainerClass* const NeighborSourceParticles[27]){
-        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-            FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14);
-        }
-    }
-
-    template <typename ContainerClass>
-    static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                           ContainerClass* const inNeighbors[27],
-                           const int inSize){
-        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-            FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize);
-        }
-    }
-};
 
 
 
diff --git a/Src/Kernels/Chebyshev/FChebCell.hpp b/Src/Kernels/Chebyshev/FChebCell.hpp
index 5bce71e4c7ba0a88e1a4b502ec2c07bdcd50bd08..4a4e53e9561075f372ac72685827d0fce29c84e9 100755
--- a/Src/Kernels/Chebyshev/FChebCell.hpp
+++ b/Src/Kernels/Chebyshev/FChebCell.hpp
@@ -21,6 +21,7 @@
 #include "../../Extensions/FExtendCoordinate.hpp"
 
 #include "./FChebTensor.hpp"
+#include "../../Extensions/FExtendCellType.hpp"
 
 /**
 * @author Matthias Messner (matthias.messner@inria.fr)
@@ -76,6 +77,24 @@ public:
     }
 };
 
+template <int ORDER, int NVALS = 1>
+class FTypedChebCell : public FChebCell<ORDER,NVALS>, public FExtendCellType {
+public:
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
+        FChebCell<ORDER,NVALS>::save(buffer);
+        FExtendCellType::save(buffer);
+    }
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
+        FChebCell<ORDER,NVALS>::restore(buffer);
+        FExtendCellType::restore(buffer);
+    }
+    void resetToInitialState(){
+        FChebCell<ORDER,NVALS>::resetToInitialState();
+        FExtendCellType::resetToInitialState();
+    }
+};
 
 #endif //FCHEBCELL_HPP
 
diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp
index 579abcfe99ddeda01f288197ee265b58973b6242..c064d568d882628154dd3862d7327b5e386abc15 100755
--- a/Src/Kernels/Chebyshev/FChebKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebKernel.hpp
@@ -203,6 +203,22 @@ public:
         }
 	}
 
+    void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
+                     ContainerClass* const FRestrict TargetParticles,
+                     const ContainerClass* const FRestrict /*SourceParticles*/,
+                     ContainerClass* const NeighborSourceParticles[27],
+                     const int /* size */)
+    {
+        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
+    }
+
+
+    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
+                   ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+                   ContainerClass* const inNeighbors[27], const int /*inSize*/){
+        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
+    }
+
 };
 
 
diff --git a/Src/Kernels/Chebyshev/FChebP2PKernels.hpp b/Src/Kernels/Chebyshev/FChebP2PKernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f54b41c669e098477460e2388b83d6ad6a47c340
--- /dev/null
+++ b/Src/Kernels/Chebyshev/FChebP2PKernels.hpp
@@ -0,0 +1,101 @@
+#ifndef FCHEBP2PKERNELS_HPP
+#define FCHEBP2PKERNELS_HPP
+
+
+#include "../P2P/FP2P.hpp"
+
+template <KERNEL_FUNCCTION_IDENTIFIER Identifier, int NVALS>
+struct DirectInteactionComputer;
+
+///////////////////////////////////////////////////////
+// P2P Wrappers
+///////////////////////////////////////////////////////
+
+/*! Specialization for Laplace potential */
+template <>
+struct DirectInteactionComputer<ONE_OVER_R, 1>
+{
+    template <typename ContainerClass>
+    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
+                     ContainerClass* const NeighborSourceParticles[27]){
+        FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14);
+    }
+
+    template <typename ContainerClass>
+    static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                           ContainerClass* const inNeighbors[27],
+                           const int inSize){
+        FP2P::FullRemote(inTargets,inNeighbors,inSize);
+    }
+};
+
+
+/*! Specialization for Leonard-Jones potential */
+template <>
+struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, 1>
+{
+    template <typename ContainerClass>
+    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
+                     ContainerClass* const NeighborSourceParticles[27]){
+        FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14);
+    }
+
+    template <typename ContainerClass>
+    static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                           ContainerClass* const inNeighbors[27],
+                           const int inSize){
+        FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize);
+    }
+};
+
+///////////////////////////////////////////////////////
+// In case of multi right hand side
+///////////////////////////////////////////////////////
+
+
+
+template <int NVALS>
+struct DirectInteactionComputer<ONE_OVER_R, NVALS>
+{
+    template <typename ContainerClass>
+    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
+                     ContainerClass* const NeighborSourceParticles[27]){
+        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+            FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14);
+        }
+    }
+
+    template <typename ContainerClass>
+    static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                           ContainerClass* const inNeighbors[27],
+                           const int inSize){
+        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+            FP2P::FullRemote(inTargets,inNeighbors,inSize);
+        }
+    }
+};
+
+
+/*! Specialization for Leonard-Jones potential */
+template <int NVALS>
+struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, NVALS>
+{
+    template <typename ContainerClass>
+    static void P2P(		 ContainerClass* const FRestrict TargetParticles,
+                     ContainerClass* const NeighborSourceParticles[27]){
+        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+            FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14);
+        }
+    }
+
+    template <typename ContainerClass>
+    static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                           ContainerClass* const inNeighbors[27],
+                           const int inSize){
+        for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+            FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize);
+        }
+    }
+};
+
+#endif // FCHEBP2PKERNELS_HPP
diff --git a/Src/Kernels/Chebyshev/FChebRoots.cpp b/Src/Kernels/Chebyshev/FChebRoots.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..134ad00780bd30be11e922b7dc3db112ad126875
--- /dev/null
+++ b/Src/Kernels/Chebyshev/FChebRoots.cpp
@@ -0,0 +1,135 @@
+#include "FChebInterpolator.hpp"
+
+// In case of multiple include of FChebInterpolator.hpp, this has to be defined only once!!
+
+// order 2
+template<> const double FChebRoots<2>::roots[] = {-0.707106781186548,
+                                                                                                    0.707106781186547};
+
+// order 3
+template<> const double FChebRoots<3>::roots[] = {-8.66025403784439e-01,
+                                                                                                    0.0,
+                                                                                                    8.66025403784438e-01};
+
+// order 4
+template<> const double FChebRoots<4>::roots[] = {-0.923879532511287,
+                                                                                                    -0.382683432365090,
+                                                                                                    0.382683432365090,
+                                                                                                    0.923879532511287};
+
+// order 5
+template<> const double FChebRoots<5>::roots[] = {-9.51056516295154e-01,
+                                                                                                 -5.87785252292473e-01,
+                                                                                                    0.0,
+                                                                                                    5.87785252292473e-01,
+                                                                                                    9.51056516295154e-01};
+
+// order 6
+template<> const double FChebRoots<6>::roots[] = {-0.965925826289068,
+                                                                                                    -0.707106781186548,
+                                                                                                    -0.258819045102521,
+                                                                                                    0.258819045102521,
+                                                                                                    0.707106781186547,
+                                                                                                    0.965925826289068};
+
+// order 7
+template<> const double FChebRoots<7>::roots[] = {-9.74927912181824e-01,
+                                                                                                    -7.81831482468030e-01,
+                                                                                                    -4.33883739117558e-01,
+                                                                                                    0.0,
+                                                                                                    4.33883739117558e-01,
+                                                                                                    7.81831482468030e-01,
+                                                                                                    9.74927912181824e-01};
+
+// order 8
+template<> const double FChebRoots<8>::roots[] = {-0.980785280403230,
+                                                                                                    -0.831469612302545,
+                                                                                                    -0.555570233019602,
+                                                                                                    -0.195090322016128,
+                                                                                                    0.195090322016128,
+                                                                                                    0.555570233019602,
+                                                                                                    0.831469612302545,
+                                                                                                    0.980785280403230};
+
+// order 9
+template<> const double FChebRoots<9>::roots[] = {-9.84807753012208e-01,
+                                                                                                    -8.66025403784439e-01,
+                                                                                                    -6.42787609686539e-01,
+                                                                                                    -3.42020143325669e-01,
+                                                                                                    0.0,
+                                                                                                    3.42020143325669e-01,
+                                                                                                    6.42787609686539e-01,
+                                                                                                    8.66025403784438e-01,
+                                                                                                    9.84807753012208e-01,};
+
+// order 10
+template<> const double FChebRoots<10>::roots[] = {-0.987688340595138,
+                                                                                                     -0.891006524188368,
+                                                                                                     -0.707106781186548,
+                                                                                                     -0.453990499739547,
+                                                                                                     -0.156434465040231,
+                                                                                                     0.156434465040231,
+                                                                                                     0.453990499739547,
+                                                                                                     0.707106781186547,
+                                                                                                     0.891006524188368,
+                                                                                                     0.987688340595138};
+
+// order 11
+template<> const double FChebRoots<11>::roots[] = {-9.89821441880933e-01,
+                                                                                                     -9.09631995354518e-01,
+                                                                                                     -7.55749574354258e-01,
+                                                                                                     -5.40640817455598e-01,
+                                                                                                     -2.81732556841430e-01,
+                                                                                                     0.0,
+                                                                                                     2.81732556841430e-01,
+                                                                                                     5.40640817455597e-01,
+                                                                                                     7.55749574354258e-01,
+                                                                                                     9.09631995354518e-01,
+                                                                                                     9.89821441880933e-01};
+
+// order 12
+template<> const double FChebRoots<12>::roots[] = {-0.991444861373810,
+                                                                                                     -0.923879532511287,
+                                                                                                     -0.793353340291235,
+                                                                                                     -0.608761429008721,
+                                                                                                     -0.382683432365090,
+                                                                                                     -0.130526192220052,
+                                                                                                     0.130526192220052,
+                                                                                                     0.382683432365090,
+                                                                                                     0.608761429008721,
+                                                                                                     0.793353340291235,
+                                                                                                     0.923879532511287,
+                                                                                                     0.991444861373810};
+
+
+// order 13
+template<> const double FChebRoots<13>::roots[] = {-9.92708874098054e-01,
+                                                                                                     -9.35016242685415e-01,
+                                                                                                     -8.22983865893656e-01,
+                                                                                                     -6.63122658240795e-01,
+                                                                                                     -4.64723172043769e-01,
+                                                                                                     -2.39315664287558e-01,
+                                                                                                     0.0,
+                                                                                                     2.39315664287557e-01,
+                                                                                                     4.64723172043769e-01,
+                                                                                                     6.63122658240795e-01,
+                                                                                                     8.22983865893656e-01,
+                                                                                                     9.35016242685415e-01,
+                                                                                                     9.92708874098054e-01};
+
+// order 14
+template<> const double FChebRoots<14>::roots[] = {
+                    -0.99371220989324258353,
+                    -0.94388333030836756290,
+                    -0.84672419922828416835,
+                    -0.70710678118654752440,
+                    -0.53203207651533656356,
+                    -0.33027906195516708177,
+                    -0.11196447610330785847,
+                     0.11196447610330785847,
+                     0.33027906195516708177,
+                     0.53203207651533656356,
+                     0.70710678118654752440,
+                     0.84672419922828416835,
+                     0.94388333030836756290,
+            0.99371220989324258353};
diff --git a/Src/Kernels/Chebyshev/FChebRoots.hpp b/Src/Kernels/Chebyshev/FChebRoots.hpp
index 7b4dc5995f9af2dd2458e5f16bef8983b1e32850..062a52936cbd24d1ca78ba6e18404c54ca4f62e3 100755
--- a/Src/Kernels/Chebyshev/FChebRoots.hpp
+++ b/Src/Kernels/Chebyshev/FChebRoots.hpp
@@ -91,141 +91,46 @@ struct FChebRoots : FNoCopyable
   }
 };
 
-
-
-
+// We declare the roots here only once Please look to .cpp for definitions
 
 // order 2
-template<> const double FChebRoots<2>::roots[] = {-0.707106781186548,
-																									0.707106781186547};
+template<> const double FChebRoots<2>::roots[];
 
 // order 3
-template<> const double FChebRoots<3>::roots[] = {-8.66025403784439e-01,
-																									0.0,
-																									8.66025403784438e-01};
+template<> const double FChebRoots<3>::roots[];
 
 // order 4
-template<> const double FChebRoots<4>::roots[] = {-0.923879532511287,
-																									-0.382683432365090,
-																									0.382683432365090,
-																									0.923879532511287};
+template<> const double FChebRoots<4>::roots[];
 
 // order 5
-template<> const double FChebRoots<5>::roots[] = {-9.51056516295154e-01,
-																								 -5.87785252292473e-01,
-																									0.0,
-																									5.87785252292473e-01,
-																									9.51056516295154e-01};
+template<> const double FChebRoots<5>::roots[];
 
 // order 6
-template<> const double FChebRoots<6>::roots[] = {-0.965925826289068,
-																									-0.707106781186548,
-																									-0.258819045102521,
-																									0.258819045102521,
-																									0.707106781186547,
-																									0.965925826289068};
+template<> const double FChebRoots<6>::roots[];
 
 // order 7
-template<> const double FChebRoots<7>::roots[] = {-9.74927912181824e-01,
-																									-7.81831482468030e-01,
-																									-4.33883739117558e-01,
-																									0.0,
-																									4.33883739117558e-01,
-																									7.81831482468030e-01,
-																									9.74927912181824e-01};
+template<> const double FChebRoots<7>::roots[];
 
 // order 8
-template<> const double FChebRoots<8>::roots[] = {-0.980785280403230,
-																									-0.831469612302545,
-																									-0.555570233019602,
-																									-0.195090322016128,
-																									0.195090322016128,
-																									0.555570233019602,
-																									0.831469612302545,
-																									0.980785280403230};
+template<> const double FChebRoots<8>::roots[];
 
 // order 9
-template<> const double FChebRoots<9>::roots[] = {-9.84807753012208e-01,
-																									-8.66025403784439e-01,
-																									-6.42787609686539e-01,
-																									-3.42020143325669e-01,
-																									0.0,
-																									3.42020143325669e-01,
-																									6.42787609686539e-01,
-																									8.66025403784438e-01,
-																									9.84807753012208e-01,};
+template<> const double FChebRoots<9>::roots[];
 
 // order 10
-template<> const double FChebRoots<10>::roots[] = {-0.987688340595138,
-																									 -0.891006524188368,
-																									 -0.707106781186548,
-																									 -0.453990499739547,
-																									 -0.156434465040231,
-																									 0.156434465040231,
-																									 0.453990499739547,
-																									 0.707106781186547,
-																									 0.891006524188368,
-																									 0.987688340595138};
+template<> const double FChebRoots<10>::roots[];
 
 // order 11
-template<> const double FChebRoots<11>::roots[] = {-9.89821441880933e-01,
-																									 -9.09631995354518e-01,
-																									 -7.55749574354258e-01,
-																									 -5.40640817455598e-01,
-																									 -2.81732556841430e-01,
-																									 0.0,
-																									 2.81732556841430e-01,
-																									 5.40640817455597e-01,
-																									 7.55749574354258e-01,
-																									 9.09631995354518e-01,
-																									 9.89821441880933e-01};
+template<> const double FChebRoots<11>::roots[];
 
 // order 12
-template<> const double FChebRoots<12>::roots[] = {-0.991444861373810,
-																									 -0.923879532511287,
-																									 -0.793353340291235,
-																									 -0.608761429008721,
-																									 -0.382683432365090,
-																									 -0.130526192220052,
-																									 0.130526192220052,
-																									 0.382683432365090,
-																									 0.608761429008721,
-																									 0.793353340291235,
-																									 0.923879532511287,
-																									 0.991444861373810};
-
+template<> const double FChebRoots<12>::roots[];
 
 // order 13
-template<> const double FChebRoots<13>::roots[] = {-9.92708874098054e-01,
-																									 -9.35016242685415e-01,
-																									 -8.22983865893656e-01,
-																									 -6.63122658240795e-01,
-																									 -4.64723172043769e-01,
-																									 -2.39315664287558e-01,
-																									 0.0,
-																									 2.39315664287557e-01,
-																									 4.64723172043769e-01,
-																									 6.63122658240795e-01,
-																									 8.22983865893656e-01,
-																									 9.35016242685415e-01,
-																									 9.92708874098054e-01};
+template<> const double FChebRoots<13>::roots[];
 
 // order 14
-template<> const double FChebRoots<14>::roots[] = {
-                    -0.99371220989324258353,
-                    -0.94388333030836756290,
-                    -0.84672419922828416835,
-                    -0.70710678118654752440,
-                    -0.53203207651533656356,
-                    -0.33027906195516708177,
-                    -0.11196447610330785847,
-                     0.11196447610330785847,
-                     0.33027906195516708177,
-                     0.53203207651533656356,
-                     0.70710678118654752440,
-                     0.84672419922828416835,
-                     0.94388333030836756290,
-		    0.99371220989324258353};
+template<> const double FChebRoots<14>::roots[];
 
 
 #endif
diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
index a9181c68f39baef449bf5d1a6fbc2add1a3c045c..b54556a86e095806d7084380a59a5e63671f0a24 100755
--- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
@@ -448,6 +448,21 @@ public:
         }
 	}
 
+    void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
+                     ContainerClass* const FRestrict TargetParticles,
+                     const ContainerClass* const FRestrict /*SourceParticles*/,
+                     ContainerClass* const NeighborSourceParticles[27],
+                     const int /* size */)
+    {
+        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
+    }
+
+
+    void P2PRemote(const FTreeCoordinate& /*inPosition*/,
+                   ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+                   ContainerClass* const inNeighbors[27], const int /*inSize*/){
+        DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
+    }
 
 };
 
diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp
index 37adf8ea5f3f43de8c8c69ea7a4645b65b2df9e2..e7d6776a9a22bd158321e2990cd81141e2675826 100644
--- a/Src/Kernels/P2P/FP2P.hpp
+++ b/Src/Kernels/P2P/FP2P.hpp
@@ -524,7 +524,7 @@ public:
 
         for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
             if( inNeighbors[idxNeighbors] ){
-                const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles()/2;
+                const int nbParticlesSources = (inNeighbors[idxNeighbors]->getNbParticles()+1)/2;
                 const __m128d*const sourcesPhysicalValues = (const __m128d*)inNeighbors[idxNeighbors]->getPhysicalValues();
                 const __m128d*const sourcesX = (const __m128d*)inNeighbors[idxNeighbors]->getPositions()[0];
                 const __m128d*const sourcesY = (const __m128d*)inNeighbors[idxNeighbors]->getPositions()[1];
diff --git a/Src/Kernels/Rotation/FRotationCell.hpp b/Src/Kernels/Rotation/FRotationCell.hpp
index 1aee3cee5f012148e8994e873fccb1c32074ac18..404d925b36c2f9b44c0e79a27662cb0ff161c870 100755
--- a/Src/Kernels/Rotation/FRotationCell.hpp
+++ b/Src/Kernels/Rotation/FRotationCell.hpp
@@ -16,8 +16,6 @@
 #ifndef FROTATIONCELL_HPP
 #define FROTATIONCELL_HPP
 
-#include "../../Components/FAbstractSerializable.hpp"
-#include "../../Components/FAbstractSendable.hpp"
 #include "../../Utils/FComplexe.hpp"
 #include "../../Utils/FMemUtils.hpp"
 
@@ -25,8 +23,6 @@
 
 #include "../../Components/FBasicCell.hpp"
 
-#include "../../Containers/FBufferWriter.hpp"
-#include "../../Containers/FBufferReader.hpp"
 
 /** This class is a cell used for the rotation based kernel
   * The size of the multipole and local vector are based on a template
@@ -109,33 +105,58 @@ public:
     ///////////////////////////////////////////////////////
     // to extend FAbstractSendable
     ///////////////////////////////////////////////////////
-    void serializeUp(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void serializeUp(BufferWriterClass& buffer) const{
         buffer.write(multipole_exp, MultipoleSize);
     }
-    void deserializeUp(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeUp(BufferReaderClass& buffer){
         buffer.fillArray(multipole_exp, MultipoleSize);
     }
 
-    void serializeDown(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void serializeDown(BufferWriterClass& buffer) const{
         buffer.write(local_exp, LocalSize);
     }
-    void deserializeDown(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeDown(BufferReaderClass& buffer){
         buffer.fillArray(local_exp, LocalSize);
     }
 
     ///////////////////////////////////////////////////////
     // to extend Serializable
     ///////////////////////////////////////////////////////
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         FBasicCell::save(buffer);
         buffer.write(multipole_exp, MultipoleSize);
         buffer.write(local_exp, LocalSize);
     }
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         FBasicCell::restore(buffer);
         buffer.fillArray(multipole_exp, MultipoleSize);
         buffer.fillArray(local_exp, LocalSize);
     }
 };
 
+template <int P>
+class FTypedRotationCell : public FRotationCell<P>, public FExtendCellType {
+public:
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
+        FRotationCell<P>::save(buffer);
+        FExtendCellType::save(buffer);
+    }
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
+        FRotationCell<P>::restore(buffer);
+        FExtendCellType::restore(buffer);
+    }
+    void resetToInitialState(){
+        FRotationCell<P>::resetToInitialState();
+        FExtendCellType::resetToInitialState();
+    }
+};
+
 #endif // FROTATIONCELL_HPP
diff --git a/Src/Kernels/Spherical/FSphericalCell.hpp b/Src/Kernels/Spherical/FSphericalCell.hpp
index e04564b1b6de21f6c7560e100b9f8ee9853ad56c..6064ba9c82c566fd2e9577dbc92878cb432c460b 100755
--- a/Src/Kernels/Spherical/FSphericalCell.hpp
+++ b/Src/Kernels/Spherical/FSphericalCell.hpp
@@ -16,9 +16,6 @@
 #ifndef FSPHERICALCELL_HPP
 #define FSPHERICALCELL_HPP
 
-
-#include "../../Components/FAbstractSerializable.hpp"
-#include "../../Components/FAbstractSendable.hpp"
 #include "../../Utils/FComplexe.hpp"
 #include "../../Utils/FMemUtils.hpp"
 
@@ -26,9 +23,6 @@
 
 #include "../../Components/FBasicCell.hpp"
 
-#include "../../Containers/FBufferWriter.hpp"
-#include "../../Containers/FBufferReader.hpp"
-
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
 */
@@ -124,29 +118,35 @@ public:
     ///////////////////////////////////////////////////////
     // to extend FAbstractSendable
     ///////////////////////////////////////////////////////
-    void serializeUp(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void serializeUp(BufferWriterClass& buffer) const{
         buffer.write(multipole_exp, PoleSize);
     }
-    void deserializeUp(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeUp(BufferReaderClass& buffer){
         buffer.fillArray(multipole_exp, PoleSize);
     }
 
-    void serializeDown(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void serializeDown(BufferWriterClass& buffer) const{
         buffer.write(local_exp, LocalSize);
     }
-    void deserializeDown(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void deserializeDown(BufferReaderClass& buffer){
         buffer.fillArray(local_exp, LocalSize);
     }
 
     ///////////////////////////////////////////////////////
     // to extend Serializable
     ///////////////////////////////////////////////////////
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         FBasicCell::save(buffer);
         buffer.write(multipole_exp, PoleSize);
         buffer.write(local_exp, LocalSize);
     }
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         FBasicCell::restore(buffer);
         buffer.fillArray(multipole_exp, PoleSize);
         buffer.fillArray(local_exp, LocalSize);
@@ -163,14 +163,20 @@ int FSphericalCell::PoleSize(-1);
 */
 class FTypedSphericalCell : public FSphericalCell, public FExtendCellType {
 public:
-    void save(FBufferWriter& buffer) const{
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
         FSphericalCell::save(buffer);
         FExtendCellType::save(buffer);
     }
-    void restore(FBufferReader& buffer){
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
         FSphericalCell::restore(buffer);
         FExtendCellType::restore(buffer);
     }
+    void resetToInitialState(){
+        FSphericalCell::resetToInitialState();
+        FExtendCellType::resetToInitialState();
+    }
 };
 
 
diff --git a/Src/Kernels/Taylor/FTaylorCell.hpp b/Src/Kernels/Taylor/FTaylorCell.hpp
index f6c8b6bef8afd507fb37c37d0087abf6bf4dba59..e04eeda397419d530ddc667b90d130ce0a498ace 100644
--- a/Src/Kernels/Taylor/FTaylorCell.hpp
+++ b/Src/Kernels/Taylor/FTaylorCell.hpp
@@ -19,6 +19,7 @@
 #include "../../Components/FBasicCell.hpp"
 #include "../../Containers/FVector.hpp"
 #include "../../Utils/FMemUtils.hpp"
+#include "../../Extensions/FExtendCellType.hpp"
 
 /**
  *@author Cyrille Piacibello
@@ -82,4 +83,23 @@ public:
 
 };
 
+template <int P, int order>
+class FTypedTaylorCell : public FTaylorCell<P,order>, public FExtendCellType {
+public:
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const{
+        FTaylorCell<P,order>::save(buffer);
+        FExtendCellType::save(buffer);
+    }
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer){
+        FTaylorCell<P,order>::restore(buffer);
+        FExtendCellType::restore(buffer);
+    }
+    void resetToInitialState(){
+        FTaylorCell<P,order>::resetToInitialState();
+        FExtendCellType::resetToInitialState();
+    }
+};
+
 #endif
diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake
index 0cdeca23f158cb2db3d7933c0b769d536d4ea0f0..70a99008be553ffa1b5ee216a00ddfebd048fdfc 100755
--- a/Src/ScalFmmConfig.h.cmake
+++ b/Src/ScalFmmConfig.h.cmake
@@ -68,4 +68,10 @@
 
 #cmakedefine ScalFMM_USE_SSE
 
+///////////////////////////////////////////////////////
+// Assert tests
+///////////////////////////////////////////////////////
+
+#cmakedefine ScalFMM_USE_ASSERT
+
 #endif // CONFIG_H
diff --git a/Src/Utils/FAssert.hpp b/Src/Utils/FAssert.hpp
new file mode 100755
index 0000000000000000000000000000000000000000..ee81244e78a77d190c357dcb45d5083120e9ef01
--- /dev/null
+++ b/Src/Utils/FAssert.hpp
@@ -0,0 +1,104 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.  
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info". 
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#ifndef FASSERT_HPP
+#define FASSERT_HPP
+
+#include <iostream>
+#include <cassert>
+
+#include "FGlobal.hpp"
+
+
+/**
+ * @brief The SpError class
+ * It is recommendede to use the macro:
+ * FAssertLF( aTest , "some data ", "to ", plot);
+ */
+class SpError {
+protected:
+    /**
+     * @brief ErrPrint private method to end print
+     */
+    static void ErrPrint(){
+        std::cerr << '\n';
+    }
+
+    /**
+     * @brief ErrPrint private methdo to print
+     */
+    template<typename T, typename... Args>
+    static void ErrPrint(const T& toPrint, Args... args){
+        std::cerr << toPrint;
+        ErrPrint( args... );
+    }
+
+
+public:
+    //////////////////////////////////////////////////////////////
+    // Should not be called directly
+    //////////////////////////////////////////////////////////////
+
+    /** Nothing to print */
+    static void Print(){
+    }
+
+    /** One or more things to print */
+    template<typename T, typename... Args>
+    static void Print(const T& toPrint, Args... args){
+        std::cerr << "[ERROR] ";
+        ErrPrint( toPrint, args... );
+    }
+};
+
+#ifdef SCALFMM_USE_ASSERT
+
+//////////////////////////////////////////////////////////////
+// Sp error activated
+//////////////////////////////////////////////////////////////
+
+#define SpErrorAssertExit(TEST, args...) \
+    if( !(TEST) ){ \
+        SpError::Print( args ); \
+        throw std::exception(); \
+    }
+
+
+#else
+
+//////////////////////////////////////////////////////////////
+// Sp error desactivated
+//////////////////////////////////////////////////////////////
+
+#define SpErrorAssertExit(TEST, args...) \
+    if( !(TEST) ){}
+
+
+#endif
+
+//////////////////////////////////////////////////////////////
+// Shortcut macro
+//////////////////////////////////////////////////////////////
+
+#define SPARSETD_ERROR_LINE " At line " , __LINE__ , "."
+#define SPARSETD_ERROR_FILE " In file " , __FILE__ , "."
+
+#define FAssert SpErrorAssertExit
+
+#define FAssertLF(args...) FAssert(args, SPARSETD_ERROR_LINE, SPARSETD_ERROR_FILE)
+
+#endif //FASSERT_HPP
+
+
diff --git a/Src/Utils/FAssertable.hpp b/Src/Utils/FAssertable.hpp
deleted file mode 100755
index b5af95c7b4035586885e1155b26b1054c076a489..0000000000000000000000000000000000000000
--- a/Src/Utils/FAssertable.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-// ===================================================================================
-// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
-// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
-// This software is a computer program whose purpose is to compute the FMM.
-//
-// This software is governed by the CeCILL-C and LGPL licenses and
-// abiding by the rules of distribution of free software.  
-// 
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public and CeCILL-C Licenses for more details.
-// "http://www.cecill.info". 
-// "http://www.gnu.org/licenses".
-// ===================================================================================
-#ifndef FASSERTABLE_HPP
-#define FASSERTABLE_HPP
-
-
-#include <cstdlib>
-#include <sstream>
-#include <iostream>
-
-/**
-* @author Berenger Bramas (berenger.bramas@inria.fr)
-* @class FAssertable
-* Please read the license
-*
-* This class is an interface for managing error.
-*
-* Please refere to testAssert.cpp to see an example
-* <code>
-* </code>
-*/
-class FAssertable {
-protected:
-	/** Empty Destructor */
-	virtual ~FAssertable(){}
-
-	/**
-	* to write debug data with line & file
-	* @param inTest if false, application will stop
-	* @param inMessage a message - from any type - to print
-	* @param inLinePosition line number
-	* @param inFilePosition file name
-	* @param inExitCode an exit code
-	*
-	* @code
-	* fassert(toto == titi, "problem : toto is not equal titi!", __LINE__, __FILE__);
-	* @endcode
-	* To prevent use from multiple thread we use a ostringstream before printing
-	*/
-	template <class Tmess, class Tline, class Tfile>
-        void fassert(const bool inTest, const Tmess& inMessage, const Tline& inLinePosition, const Tfile& inFilePosition, const int inExitCode = 1) const {
-		if(!inTest){
-                        std::ostringstream oss;
-			oss << "Error in " << inFilePosition << " at line " << inLinePosition <<" :\n";
-			oss << inMessage << "\n";
-		
-			std::cerr << oss.str();
-                        exit(inExitCode);
-		}
-	}
-
-};
-
-#endif //FASSERTABLE_HPP
-
-
diff --git a/Src/Utils/FComplexe.hpp b/Src/Utils/FComplexe.hpp
index d39880d862ec8033b7e173888d6e0a73e2935223..3b425a4b863b50afc95312af293e45780671f21c 100755
--- a/Src/Utils/FComplexe.hpp
+++ b/Src/Utils/FComplexe.hpp
@@ -56,8 +56,8 @@ public:
 
     /** Copy operator */
     FComplexe& operator=(const FComplexe& other){
-        this->complex[1] = other.complex[1];
         this->complex[0] = other.complex[0];
+        this->complex[1] = other.complex[1];
         return *this;
     }
 
diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp
index b7eff1bb5df30cc043189df65cae117abb999387..15c1b2dd4f93944c9ea4f09214eee7f6c77f77a6 100755
--- a/Src/Utils/FMpi.hpp
+++ b/Src/Utils/FMpi.hpp
@@ -23,6 +23,8 @@
 #include "FNoCopyable.hpp"
 #include "FMath.hpp"
 
+//Need that for converting datas
+#include "FComplexe.hpp"
 
 
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -43,291 +45,301 @@
 
 
 /**
-* @author Berenger Bramas (berenger.bramas@inria.fr)
-* @class FMpi
-* Please read the license
-*
-*/
+ * @author Berenger Bramas (berenger.bramas@inria.fr)
+ * @class FMpi
+ * Please read the license
+ *
+ */
 
 class FMpi {
 public:
-////////////////////////////////////////////////////////
-// MPI Flag
-////////////////////////////////////////////////////////
-    enum FMpiTag {
-        // FMpiTreeBuilder
-        TagExchangeIndexs,
-        TagSplittedLeaf,
-        TagExchangeNbLeafs,
-        TagSandSettling,
-
-        // FQuickSort
-        TagQuickSort,
-
-        // FMM
-        TagFmmM2M,
-        TagFmmL2L,
-        TagFmmP2P,
-
-        // Bitonic,
-        TagBitonicMin,
-        TagBitonicMax,
-        TagBitonicMinMess,
-        TagBitonicMaxMess,
-
-        // Last defined tag
-        TagLast,
-    };
-
-////////////////////////////////////////////////////////
-// FComm to factorize MPI_Comm work
-////////////////////////////////////////////////////////
-
-    /** This class is used to put all the usual method
-      * related mpi comm
-      */
-    class FComm : public FNoCopyable {
-        int rank;   //< rank related to the comm
-        int nbProc; //< nb proc in this group
-
-        MPI_Comm communicator;  //< current mpi communicator
-        MPI_Group group;        //< current mpi group
-
-
-        // reset : get rank and nb proc from mpi
-        void reset(){
-            FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
-            FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
-        }
-
-    public:
-        /** Constructor : dup the comm given in parameter */
-        explicit FComm(MPI_Comm inCommunicator ) {
-            FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
-            FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
-
-            reset();
-        }
-
-        /** Free communicator and group */
-        virtual ~FComm(){
-            FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
-            FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
-        }
-
-        /** To get the mpi comm needed for communication */
-        MPI_Comm getComm() const {
-            return communicator;
-        }
-
-        /** The current rank */
-        int processId() const {
-            return rank;
-        }
-
-        /** The current number of procs in the group */
-        int processCount() const {
-            return nbProc;
-        }
-
-        ////////////////////////////////////////////////////////////
-        // Split/Chunk functions
-        ////////////////////////////////////////////////////////////
-
-        /** Get a left index related to a size */
-        template< class T >
-        T getLeft(const T inSize)  const {
-            const double step = (double(inSize) / double(processCount()));
-            return T(FMath::Ceil(step * double(processId())));
-        }
-
-        /** Get a right index related to a size */
-        template< class T >
-        T getRight(const T inSize)  const {
-            const double step = (double(inSize) / double(processCount()));
-            const T res = T(FMath::Ceil(step * double(processId()+1)));
-            if(res > inSize) return inSize;
-            else return res;
-        }
-
-        /** Get a right index related to a size and another id */
-        template< class T >
-        T getOtherRight(const T inSize, const int other)  const {
-            const double step = (double(inSize) / double(processCount()));
-            const T res = T(FMath::Ceil(step * double(other+1)));
-            if(res > inSize) return inSize;
-            else return res;
-        }
-
-        /** Get a left index related to a size and another id */
-        template< class T >
-        T getOtherLeft(const T inSize, const int other) const {
-            const double step = (double(inSize) / double(processCount()));
-            return T(FMath::Ceil(step * double(other)));
-        }
-
-        /** Get a proc id from and index */
-        template< class T >
-        int getProc(const int position, const T inSize) const {
-            const double step = (double(inSize) / processCount());
-            return int(position/step);
-        }
-
-        ////////////////////////////////////////////////////////////
-        // Mpi interface functions
-        ////////////////////////////////////////////////////////////
-
-
-        /** Reduce a value for proc == 0 */
-        template< class T >
-        T reduceSum(T data) const {
-            T result(0);
-            FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__);
-            return result;
-        }
-
-        /** Reduce an average */
-        template< class T >
-        T reduceAverageAll(T data) const {
-            T result[processCount()];
-            FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()),  __LINE__ );
-
-            T average = 0;
-            for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){
-                average += result[idxProc] / processCount();
-            }
-            return average;
-        }
-
-        /** Change the group size */
-        void groupReduce(const int from , const int to){
-            int * procsIdArray = new int [to - from + 1];
-            for(int idxProc = from ;idxProc <= to ; ++idxProc){
-                procsIdArray[idxProc - from] = idxProc;
-            }
-
-            MPI_Group previousGroup = group;
-            FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group),  __LINE__ );
-
-            MPI_Comm previousComm = communicator;
-            FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
-
-            MPI_Comm_free(&previousComm);
-            MPI_Group_free(&previousGroup);
-
-            reset();
-	    delete  procsIdArray ;
-        }
-    };
-
-////////////////////////////////////////////////////////
-// FMpi methods
-////////////////////////////////////////////////////////
-
-    /*
-    We use init with thread because of an openmpi error:
+  ////////////////////////////////////////////////////////
+  // MPI Flag
+  ////////////////////////////////////////////////////////
+  enum FMpiTag {
+    // FMpiTreeBuilder
+    TagExchangeIndexs,
+    TagSplittedLeaf,
+    TagExchangeNbLeafs,
+    TagSandSettling,
+
+    // FQuickSort
+    TagQuickSort,
+
+    // FMM
+    TagFmmM2M,
+    TagFmmL2L,
+    TagFmmP2P,
+
+    // Bitonic,
+    TagBitonicMin,
+    TagBitonicMax,
+    TagBitonicMinMess,
+    TagBitonicMaxMess,
+
+    // Last defined tag
+    TagLast,
+  };
+
+  ////////////////////////////////////////////////////////
+  // FComm to factorize MPI_Comm work
+  ////////////////////////////////////////////////////////
+
+  /** This class is used to put all the usual method
+   * related mpi comm
+   */
+  class FComm : public FNoCopyable {
+    int rank;   //< rank related to the comm
+    int nbProc; //< nb proc in this group
+
+    MPI_Comm communicator;  //< current mpi communicator
+    MPI_Group group;        //< current mpi group
+
+
+    // reset : get rank and nb proc from mpi
+    void reset(){
+      FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
+      FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
+    }
 
-    [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-    [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-    [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+  public:
+    /** Constructor : dup the comm given in parameter */
+    explicit FComm(MPI_Comm inCommunicator ) {
+      FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
+      FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
 
-    Erreur pour le proc1
-    [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
-    Tous on la meme erreur le 2e 1 est remplacé par le rang.
-    */
-    FMpi(int inArgc, char **  inArgv ) : communicator(0) {
-        int provided = 0;
-        FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided), __LINE__);
-        communicator = new FComm(MPI_COMM_WORLD);
+      reset();
+    }
+
+    /** Free communicator and group */
+    virtual ~FComm(){
+      FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
+      FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
     }
 
-    /** Delete the communicator and call mpi finalize */
-    ~FMpi(){
-        delete communicator;
-        MPI_Finalize();
+    /** To get the mpi comm needed for communication */
+    MPI_Comm getComm() const {
+      return communicator;
     }
 
-    /** Get the global communicator */
-    const FComm& global() {
-        return (*communicator);
+    /** The current rank */
+    int processId() const {
+      return rank;
+    }
+
+    /** The current number of procs in the group */
+    int processCount() const {
+      return nbProc;
     }
 
     ////////////////////////////////////////////////////////////
-    // Mpi Types meta function
+    // Split/Chunk functions
     ////////////////////////////////////////////////////////////
 
-    static MPI_Datatype GetType(long long&){
-        return MPI_LONG_LONG;
+    /** Get a left index related to a size */
+    template< class T >
+    T getLeft(const T inSize)  const {
+      const double step = (double(inSize) / double(processCount()));
+      return T(FMath::Ceil(step * double(processId())));
     }
 
-    static MPI_Datatype GetType(long int&){
-        return MPI_LONG;
+    /** Get a right index related to a size */
+    template< class T >
+    T getRight(const T inSize)  const {
+      const double step = (double(inSize) / double(processCount()));
+      const T res = T(FMath::Ceil(step * double(processId()+1)));
+      if(res > inSize) return inSize;
+      else return res;
     }
 
-    static MPI_Datatype GetType(double&){
-        return MPI_DOUBLE;
+    /** Get a right index related to a size and another id */
+    template< class T >
+    T getOtherRight(const T inSize, const int other)  const {
+      const double step = (double(inSize) / double(processCount()));
+      const T res = T(FMath::Ceil(step * double(other+1)));
+      if(res > inSize) return inSize;
+      else return res;
     }
 
-    static MPI_Datatype GetType(float&){
-        return MPI_FLOAT;
+    /** Get a left index related to a size and another id */
+    template< class T >
+    T getOtherLeft(const T inSize, const int other) const {
+      const double step = (double(inSize) / double(processCount()));
+      return T(FMath::Ceil(step * double(other)));
     }
 
-    static MPI_Datatype GetType(int&){
-        return MPI_INT;
+    /** Get a proc id from and index */
+    template< class T >
+    int getProc(const int position, const T inSize) const {
+      const double step = (double(inSize) / processCount());
+      return int(position/step);
     }
 
     ////////////////////////////////////////////////////////////
     // Mpi interface functions
     ////////////////////////////////////////////////////////////
 
-    /** generic mpi assert function */
-    static void Assert(const int test, const unsigned line, const char* const message = 0){
-        if(test != MPI_SUCCESS){
-            printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
-            if(message) printf(", message: %s",message);
-            printf("\n");
-            fflush(stdout);
-            MPI_Abort(MPI_COMM_WORLD, int(line) );
-        }
-    }
 
-    /** Compute a left index from data */
-    template <class T>
-    static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) {
-        const double step = (double(inSize) / inNbProc);
-        return T(ceil(step * inIdProc));
+    /** Reduce a value for proc == 0 */
+    template< class T >
+    T reduceSum(T data) const {
+      T result(0);
+      FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__);
+      return result;
     }
 
-    /** Compute a right index from data */
-    template <class T>
-    static T GetRight(const T inSize, const int inIdProc, const int inNbProc) {
-        const double step = (double(inSize) / inNbProc);
-        const T res = T(ceil(step * (inIdProc+1)));
-        if(res > inSize) return inSize;
-        else return res;
+    /** Reduce an average */
+    template< class T >
+    T reduceAverageAll(T data) const {
+      T result[processCount()];
+      FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()),  __LINE__ );
+
+      T average = 0;
+      for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){
+	average += result[idxProc] / processCount();
+      }
+      return average;
     }
 
-    /** Compute a proc id from index & data */
-    template <class T>
-    static int GetProc(const T position, const T inSize, const int inNbProc) {
-        const double step = double(inSize) / double(inNbProc);
-        return int(double(position)/step);
+    /** Change the group size */
+    void groupReduce(const int from , const int to){
+      int * procsIdArray = new int [to - from + 1];
+      for(int idxProc = from ;idxProc <= to ; ++idxProc){
+	procsIdArray[idxProc - from] = idxProc;
+      }
+
+      MPI_Group previousGroup = group;
+      FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group),  __LINE__ );
+
+      MPI_Comm previousComm = communicator;
+      FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
+
+      MPI_Comm_free(&previousComm);
+      MPI_Group_free(&previousGroup);
+
+      reset();
+      delete  procsIdArray ;
     }
+  };
+
+  ////////////////////////////////////////////////////////
+  // FMpi methods
+  ////////////////////////////////////////////////////////
+
+  /*
+    We use init with thread because of an openmpi error:
+
+    [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
 
-    /** assert if mpi error */
-    static void MpiAssert(const int test, const unsigned line, const char* const message = 0){
-        if(test != MPI_SUCCESS){
-            printf("[ERROR] Test failled at line %d, result is %d", line, test);
-            if(message) printf(", message: %s",message);
-            printf("\n");
-            fflush(stdout);
-            MPI_Abort(MPI_COMM_WORLD, int(line) );
-        }
+    Erreur pour le proc1
+    [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
+    Tous on la meme erreur le 2e 1 est remplacé par le rang.
+  */
+  FMpi(int inArgc, char **  inArgv ) : communicator(0) {
+    int provided = 0;
+    FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided), __LINE__);
+    communicator = new FComm(MPI_COMM_WORLD);
+  }
+
+  /** Delete the communicator and call mpi finalize */
+  ~FMpi(){
+    delete communicator;
+    MPI_Finalize();
+  }
+
+  /** Get the global communicator */
+  const FComm& global() {
+    return (*communicator);
+  }
+
+  ////////////////////////////////////////////////////////////
+  // Mpi Types meta function
+  ////////////////////////////////////////////////////////////
+
+  static const MPI_Datatype GetType(const long long&){
+    return MPI_LONG_LONG;
+  }
+
+  static const MPI_Datatype GetType(const long int&){
+    return MPI_LONG;
+  }
+
+  static const MPI_Datatype GetType(const double&){
+    return MPI_DOUBLE;
+  }
+
+  static const MPI_Datatype GetType(const float&){
+    return MPI_FLOAT;
+  }
+
+  static const MPI_Datatype GetType(const int&){
+    return MPI_INT;
+  }
+
+  static const MPI_Datatype GetType(const char&){
+    return MPI_CHAR;
+  }
+
+  static const MPI_Datatype GetType(const FComplexe& a){
+    MPI_Datatype FMpiComplexe;
+    MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe);
+    return FMpiComplexe;
+  }
+
+  ////////////////////////////////////////////////////////////
+  // Mpi interface functions
+  ////////////////////////////////////////////////////////////
+
+  /** generic mpi assert function */
+  static void Assert(const int test, const unsigned line, const char* const message = 0){
+    if(test != MPI_SUCCESS){
+      printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
+      if(message) printf(", message: %s",message);
+      printf("\n");
+      fflush(stdout);
+      MPI_Abort(MPI_COMM_WORLD, int(line) );
+    }
+  }
+
+  /** Compute a left index from data */
+  template <class T>
+  static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) {
+    const double step = (double(inSize) / inNbProc);
+    return T(ceil(step * inIdProc));
+  }
+
+  /** Compute a right index from data */
+  template <class T>
+  static T GetRight(const T inSize, const int inIdProc, const int inNbProc) {
+    const double step = (double(inSize) / inNbProc);
+    const T res = T(ceil(step * (inIdProc+1)));
+    if(res > inSize) return inSize;
+    else return res;
+  }
+
+  /** Compute a proc id from index & data */
+  template <class T>
+  static int GetProc(const T position, const T inSize, const int inNbProc) {
+    const double step = double(inSize) / double(inNbProc);
+    return int(double(position)/step);
+  }
+
+  /** assert if mpi error */
+  static void MpiAssert(const int test, const unsigned line, const char* const message = 0){
+    if(test != MPI_SUCCESS){
+      printf("[ERROR] Test failled at line %d, result is %d", line, test);
+      if(message) printf(", message: %s",message);
+      printf("\n");
+      fflush(stdout);
+      MPI_Abort(MPI_COMM_WORLD, int(line) );
     }
+  }
 
 private:
-    /** The original communicator */
-    FComm* communicator;
+  /** The original communicator */
+  FComm* communicator;
 };
 
 
diff --git a/Src/Utils/FParameters.hpp b/Src/Utils/FParameters.hpp
index fb12c3bd002947bca12a5eca22e13bd9bad98c3b..b3c67e936988c6426d9056274db9f25ca02212a5 100755
--- a/Src/Utils/FParameters.hpp
+++ b/Src/Utils/FParameters.hpp
@@ -36,7 +36,7 @@ namespace FParameters{
 	 * <code> const int argInt = userParemetersAt<int>(1,-1); </code>
 	 */
 	template <class VariableType>
-	const VariableType StrToOther(const char* const str, const VariableType& defaultValue = VariableType()){
+    inline const VariableType StrToOther(const char* const str, const VariableType& defaultValue = VariableType()){
 		std::istringstream iss(str,std::istringstream::in);
 		VariableType value;
 		iss >> value;
@@ -47,21 +47,21 @@ namespace FParameters{
     /** To put a char into lower format
       *
       */
-    char toLower(const char c){
+    inline char toLower(const char c){
         return char('A' <= c && c <= 'Z' ? (c - 'A') + 'a' : c);
     }
 
     /** To know if two char are equals
       *
       */
-    bool areCharsEquals(const char c1, const char c2, const bool caseSensible = false){
+    inline bool areCharsEquals(const char c1, const char c2, const bool caseSensible = false){
         return (caseSensible && c1 == c2) || (!caseSensible && toLower(c1) == toLower(c2));
     }
 
     /** To know if two str are equals
       *
       */
-    bool areStrEquals(const char* const inStr1, const char* const inStr2, const bool caseSensible = false){
+    inline bool areStrEquals(const char* const inStr1, const char* const inStr2, const bool caseSensible = false){
         int idxStr = 0;
         while(inStr1[idxStr] != '\0' && inStr2[idxStr] != '\0'){
             if(!areCharsEquals(inStr1[idxStr] ,inStr2[idxStr],caseSensible)){
@@ -75,7 +75,7 @@ namespace FParameters{
     /** To find a parameters from user format char parameters
       *
       */
-    int findParameter(const int argc, const char* const * const argv, const char* const inName, const bool caseSensible = false){
+    inline int findParameter(const int argc, const char* const * const argv, const char* const inName, const bool caseSensible = false){
         for(int idxArg = 0; idxArg < argc ; ++idxArg){
             if(areStrEquals(inName, argv[idxArg], caseSensible)){
                 return idxArg;
@@ -87,7 +87,7 @@ namespace FParameters{
     /** To know if a parameter exist from user format char parameters
       *
       */
-    bool existParameter(const int argc, const char* const * const argv, const char* const inName, const bool caseSensible = false){
+    inline bool existParameter(const int argc, const char* const * const argv, const char* const inName, const bool caseSensible = false){
         return NotFound != findParameter( argc, argv, inName, caseSensible);
     }
 
@@ -97,7 +97,7 @@ namespace FParameters{
       * else 0
       */
     template <class VariableType>
-    const VariableType getValue(const int argc, const char* const * const argv, const char* const inName, const VariableType& defaultValue = VariableType(), const bool caseSensible = false){
+    inline const VariableType getValue(const int argc, const char* const * const argv, const char* const inName, const VariableType& defaultValue = VariableType(), const bool caseSensible = false){
         const int position = findParameter(argc,argv,inName,caseSensible);
         if(position == NotFound || position == argc - 1){
             return defaultValue;
@@ -107,7 +107,7 @@ namespace FParameters{
 
     /** Get a str from argv
       */
-    const char* getStr(const int argc, const char* const * const argv, const char* const inName, const char* const inDefault, const bool caseSensible = false){
+    inline const char* getStr(const int argc, const char* const * const argv, const char* const inName, const char* const inDefault, const bool caseSensible = false){
         const int position = findParameter(argc,argv,inName,caseSensible);
         if(position == NotFound || position == argc - 1){
             return inDefault;
diff --git a/Src/Utils/FPoint.hpp b/Src/Utils/FPoint.hpp
index 86bfc8ac1ba18ca13908eb9305601efe8e24a2e0..a999485f9584e4eb393aa4f902a0e6e0f75df93b 100755
--- a/Src/Utils/FPoint.hpp
+++ b/Src/Utils/FPoint.hpp
@@ -18,8 +18,6 @@
 
 #include "FMath.hpp"
 #include "FGlobal.hpp"
-#include "../Containers/FBufferReader.hpp"
-#include "../Containers/FBufferWriter.hpp"
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -300,12 +298,14 @@ public:
         return output;  // for multiple << operators.
     }
 
-    /** Save current object */
-    void save(FBufferWriter& buffer) const {
+    /** Save current object */    
+    template <class BufferWriterClass>
+    void save(BufferWriterClass& buffer) const {
         buffer << data[0] << data[1] << data[2];
     }
     /** Retrieve current object */
-    void restore(FBufferReader& buffer) {
+    template <class BufferReaderClass>
+    void restore(BufferReaderClass& buffer) {
         buffer >> data[0] >> data[1] >> data[2];
     }
 };
diff --git a/Src/Utils/FSse.hpp b/Src/Utils/FSse.hpp
index e63708781f946473f923134e13e936b81ad7838e..638731712df06ccee140cf90eab576f3eb37c7e4 100644
--- a/Src/Utils/FSse.hpp
+++ b/Src/Utils/FSse.hpp
@@ -5,7 +5,7 @@
 #include <emmintrin.h>  //SSE2
 #include <pmmintrin.h> //SSE3
 #include <tmmintrin.h>  //SSSE3
-#include <smmintrin.h> // SSE4
+//#include <smmintrin.h> // SSE4
 
 #ifdef __INTEL_COMPILER
 
diff --git a/Tests/Kernels/testSphericalProcAlgorithm.cpp b/Tests/Kernels/testSphericalProcAlgorithm.cpp
index 9f5c92ddbef0963939233b395dab9959cb1b86b8..5706acbe66f1c5552548148fd4035678d40fd663 100755
--- a/Tests/Kernels/testSphericalProcAlgorithm.cpp
+++ b/Tests/Kernels/testSphericalProcAlgorithm.cpp
@@ -29,6 +29,9 @@
 #include "../../Src/Kernels/Spherical/FSphericalKernel.hpp"
 #include "../../Src/Kernels/Spherical/FSphericalCell.hpp"
 
+#include "../../Src/Kernels/Rotation/FRotationKernel.hpp"
+#include "../../Src/Kernels/Rotation/FRotationCell.hpp"
+
 #include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
 #include "../../Src/Core/FFmmAlgorithmThread.hpp"
 
@@ -73,7 +76,7 @@ bool isEqualPole(const CellClass& me, const CellClass& other, FReal*const cumul)
 }
 
 /** To compare data */
-bool isEqualLocal(const FSphericalCell& me, const FSphericalCell& other, FReal*const cumul){
+bool isEqualLocal(const FSphericalCell& me, const FSphericalCell& other,FReal*const cumul){
     FMath::FAccurater accurate;
     for(int idx = 0; idx < FSphericalCell::GetLocalSize(); ++idx){
         accurate.add(me.getLocal()[idx].getImag(),other.getLocal()[idx].getImag());
@@ -210,202 +213,206 @@ void ValidateFMMAlgoProc(OctreeClass* const badTree,
 
 // Simply create particles and try the kernels
 int main(int argc, char ** argv){
-    typedef FSphericalCell         CellClass;
-    typedef FP2PParticleContainer         ContainerClass;
-
-    typedef FSimpleLeaf< ContainerClass >                     LeafClass;
-    typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
-    typedef FSphericalKernel< CellClass, ContainerClass >          KernelClass;
-
-    typedef FFmmAlgorithmThreadProc<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
-    typedef FFmmAlgorithmThread<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClassNoProc;
-    ///////////////////////What we do/////////////////////////////
-    std::cout << ">> This executable has to be used to test Spherical algorithm.\n";
-    //////////////////////////////////////////////////////////////
-
-    FMpi app( argc, argv);
-
-    const int DevP = FParameters::getValue(argc,argv,"-p", 8);
-    const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
-    const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
-    FTic counter;
-    const char* const defaultFilename = (sizeof(FReal) == sizeof(float))?
-                                    "../../Data/test20k.bin.fma.single":
-                                    "../../Data/test20k.bin.fma.double";
-    const char* const filename = FParameters::getStr(argc,argv,"-f", defaultFilename);
-
-    std::cout << "Opening : " << filename << "\n";
-
-    FMpiFmaLoader loader(filename, app.global());
-    if(!loader.isOpen()){
-        std::cout << "Loader Error, " << filename << " is missing\n";
-        return 1;
-    }
+  typedef FSphericalCell         CellClass;
+  typedef FP2PParticleContainer         ContainerClass;
 
-    // -----------------------------------------------------
-    CellClass::Init(DevP);
-    OctreeClass tree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
+  typedef FSimpleLeaf< ContainerClass >                     LeafClass;
+  typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
+  typedef FSphericalKernel< CellClass, ContainerClass >     KernelClass;
 
-    // -----------------------------------------------------
+  typedef FFmmAlgorithmThreadProc<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
+  typedef FFmmAlgorithmThread<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClassNoProc;
 
-    std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl;
-    std::cout << "\tHeight : " << NbLevels << " \t sub-height : " << SizeSubLevels << std::endl;
-    counter.tic();
 
-    if( app.global().processCount() != 1){
-        //////////////////////////////////////////////////////////////////////////////////
-        // Build tree from mpi loader
-        //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Build Tree ..." << std::endl;
-        counter.tic();
-
-        struct TestParticle{
-            FPoint position;
-            FReal physicalValue;
-            const FPoint& getPosition(){
-                return position;
-            }
-        };
+  ///////////////////////What we do/////////////////////////////
+  std::cout << ">> This executable has to be used to test Spherical algorithm.\n";
+  //////////////////////////////////////////////////////////////
 
-        TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
-        memset(particles, 0, sizeof(TestParticle) * loader.getNumberOfParticles());
+  FMpi app( argc, argv);
 
-        for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-            loader.fillParticle(&particles[idxPart].position,&particles[idxPart].physicalValue);
-        }
+  const int DevP = FParameters::getValue(argc,argv,"-p", 8);
+  const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
+  const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
+  FTic counter;
+  const char* const defaultFilename = (sizeof(FReal) == sizeof(float))?
+    "../Data/test20k.bin.fma.single":
+    "../Data/test20k.bin.fma.double";
+  const char* const filename = FParameters::getStr(argc,argv,"-f", defaultFilename);
 
-        FVector<TestParticle> finalParticles;
-        FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
-                                                                           tree.getBoxCenter(),
-                                                                           tree.getBoxWidth(),
-                                                     tree.getHeight(), &finalParticles);
+  std::cout << "Opening : " << filename << "\n";
 
-        for(int idx = 0 ; idx < finalParticles.getSize(); ++idx){
-            tree.insert(finalParticles[idx].position);
-        }
+  FMpiFmaLoader loader(filename, app.global());
+  if(!loader.isOpen()){
+    std::cout << "Loader Error, " << filename << " is missing\n";
+    return 1;
+  }
+
+  CellClass::Init(DevP);
+  
+  
+  OctreeClass tree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
 
-        delete[] particles;
+  // -----------------------------------------------------
 
-        counter.tac();
-        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+  std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl;
+  std::cout << "\tHeight : " << NbLevels << " \t sub-height : " << SizeSubLevels << std::endl;
+  counter.tic();
+
+  if( app.global().processCount() != 1){
+    //////////////////////////////////////////////////////////////////////////////////
+    // Build tree from mpi loader
+    //////////////////////////////////////////////////////////////////////////////////
+    std::cout << "Build Tree ..." << std::endl;
+    counter.tic();
 
-        //////////////////////////////////////////////////////////////////////////////////
+    struct TestParticle{
+      FPoint position;
+      FReal physicalValue;
+      const FPoint& getPosition(){
+	return position;
+      }
+    };
+
+    TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
+    memset(particles, 0, sizeof(TestParticle) * loader.getNumberOfParticles());
+
+    for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+      loader.fillParticle(&particles[idxPart].position,&particles[idxPart].physicalValue);
     }
-    else{
-        FPoint position;
-        FReal physicalValue;
-        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-            loader.fillParticle(&position,&physicalValue);
-            tree.insert(position, physicalValue);
-        }
+
+    FVector<TestParticle> finalParticles;
+    FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
+						 tree.getBoxCenter(),
+						 tree.getBoxWidth(),
+						 tree.getHeight(), &finalParticles);
+
+    for(int idx = 0 ; idx < finalParticles.getSize(); ++idx){
+      tree.insert(finalParticles[idx].position,finalParticles[idx].physicalValue);
+
     }
 
+    delete[] particles;
+
     counter.tac();
-    std::cout << "Done  " << "(@Creating and Inserting Particles = " << counter.elapsed() << "s)." << std::endl;
+    std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+
+    //////////////////////////////////////////////////////////////////////////////////
+  }
+  else{
+    FPoint position;
+    FReal physicalValue;
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+      loader.fillParticle(&position,&physicalValue);
+      tree.insert(position, physicalValue);
+    }
+  }
 
-    // -----------------------------------------------------
-    std::cout << "Create kernel..." << std::endl;
+  counter.tac();
+  std::cout << "Done  " << "(@Creating and Inserting Particles = " << counter.elapsed() << "s)." << std::endl;
 
-    KernelClass kernels(DevP, NbLevels,loader.getBoxWidth(), loader.getCenterOfBox());
+  // -----------------------------------------------------
+  std::cout << "Create kernel..." << std::endl;
 
-    std::cout << "Done  " << " in " << counter.elapsed() << "s)." << std::endl;
+  KernelClass kernels(DevP, NbLevels,loader.getBoxWidth(), loader.getCenterOfBox());
 
-    // -----------------------------------------------------
+  std::cout << "Done  " << " in " << counter.elapsed() << "s)." << std::endl;
 
-    std::cout << "Working on particles ..." << std::endl;
+  // -----------------------------------------------------
 
-    FmmClass algo(app.global(),&tree,&kernels);
+  std::cout << "Working on particles ..." << std::endl;
 
-    counter.tic();
-    algo.execute();
-    counter.tac();
+  FmmClass algo(app.global(),&tree,&kernels);
 
-    std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
+  counter.tic();
+  algo.execute();
+  counter.tac();
 
-    { // get sum forces&potential
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) );
+  std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
 
-        FReal potential = 0;
-        FReal fx = 0.0, fy = 0.0, fz = 0.0;
+  { // get sum forces&potential
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) );
 
-        tree.forEachLeaf([&](LeafClass* leaf){
-            const FReal*const potentials = leaf->getTargets()->getPotentials();
-            const FReal*const forcesX = leaf->getTargets()->getForcesX();
-            const FReal*const forcesY = leaf->getTargets()->getForcesY();
-            const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
-            const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+    FReal potential = 0;
+    FReal fx = 0.0, fy = 0.0, fz = 0.0;
 
-            for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
-                potential += potentials[idxPart];
-                fx += forcesX[idxPart];
-                fy += forcesY[idxPart];
-                fz += forcesZ[idxPart];
-            }
-        });
+    tree.forEachLeaf([&](LeafClass* leaf){
+	const FReal*const potentials = leaf->getTargets()->getPotentials();
+	const FReal*const forcesX = leaf->getTargets()->getForcesX();
+	const FReal*const forcesY = leaf->getTargets()->getForcesY();
+	const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
+	const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
 
-        std::cout << "My potential is " << potential << std::endl;
+	for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
+	  potential += potentials[idxPart];
+	  fx += forcesX[idxPart];
+	  fy += forcesY[idxPart];
+	  fz += forcesZ[idxPart];
+	}
+      });
 
-        potential = app.global().reduceSum(potential);
-        fx = app.global().reduceSum(fx);
-        fy = app.global().reduceSum(fy);
-        fz = app.global().reduceSum(fz);
+    std::cout << "My potential is " << potential << std::endl;
 
+    potential = app.global().reduceSum(potential);
+    fx = app.global().reduceSum(fx);
+    fy = app.global().reduceSum(fy);
+    fz = app.global().reduceSum(fz);
 
-        if(app.global().processId() == 0){
-            std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
-            std::cout << "Potential Sum = " << potential << std::endl;
-        }
+
+    if(app.global().processId() == 0){
+      std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
+      std::cout << "Potential Sum = " << potential << std::endl;
     }
+  }
 
 #ifdef VALIDATE_FMM
+  {
+    OctreeClass treeValide(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
     {
-        OctreeClass treeValide(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
-        {
-            FFmaBinLoader loaderSeq(filename);
-            FPoint position;
-            FReal physicalValue;
-            for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
-                loaderSeq.fillParticle(&position,&physicalValue);
-                treeValide.insert(position,physicalValue);
-            }
-        }
-
-        std::cout << "Working on particles ..." << std::endl;
-        FmmClassNoProc algoValide(&treeValide,&kernels);
-        counter.tic();
-        algoValide.execute();
-        counter.tac();
-        std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
-
-        FReal potential = 0;
-        FReal fx = 0.0, fy = 0.0, fz = 0.0;
-
-        tree.forEachLeaf([&](LeafClass* leaf){
-            const FReal*const potentials = leaf->getTargets()->getPotentials();
-            const FReal*const forcesX = leaf->getTargets()->getForcesX();
-            const FReal*const forcesY = leaf->getTargets()->getForcesY();
-            const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
-            const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
-
-            for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
-                potential += potentials[idxPart];
-                fx += forcesX[idxPart];
-                fy += forcesY[idxPart];
-                fz += forcesZ[idxPart];
-            }
-        });
-
-        std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
-        std::cout << "Potential = " << potential << std::endl;
-
-        ValidateFMMAlgoProc<OctreeClass,ContainerClass>(&tree,&treeValide);
+      FFmaBinLoader loaderSeq(filename);
+      FPoint position;
+      FReal physicalValue;
+      for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
+	loaderSeq.fillParticle(&position,&physicalValue);
+	treeValide.insert(position,physicalValue);
+      }
     }
+      
+    std::cout << "Working on particles ..." << std::endl;
+    FmmClassNoProc algoValide(&treeValide,&kernels);
+    counter.tic();
+    algoValide.execute();
+    counter.tac();
+    std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
+      
+    FReal potential = 0;
+    FReal fx = 0.0, fy = 0.0, fz = 0.0;
+	
+    tree.forEachLeaf([&](LeafClass* leaf){
+	const FReal*const potentials = leaf->getTargets()->getPotentials();
+	const FReal*const forcesX = leaf->getTargets()->getForcesX();
+	const FReal*const forcesY = leaf->getTargets()->getForcesY();
+	const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
+	const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+
+	for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
+	  potential += potentials[idxPart];
+	  fx += forcesX[idxPart];
+	  fy += forcesY[idxPart];
+	  fz += forcesZ[idxPart];
+	}
+      });
+
+    std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
+    std::cout << "Potential = " << potential << std::endl;
+
+    ValidateFMMAlgoProc<OctreeClass,ContainerClass>(&tree,&treeValide);
+  }
 #endif
 
 
-    // -----------------------------------------------------
+  // -----------------------------------------------------
 
-    return 0;
+  return 0;
 }
 
 
diff --git a/Tests/Kernels/testSphericalTsmAlgorithm.cpp b/Tests/Kernels/testTsmAlgorithm.cpp
similarity index 66%
rename from Tests/Kernels/testSphericalTsmAlgorithm.cpp
rename to Tests/Kernels/testTsmAlgorithm.cpp
index bda26e84d36074d8cbe85d2a42a6ad7db9e417c6..54ba515cec959786d8d2f96ec3b9ca71297c1f79 100755
--- a/Tests/Kernels/testSphericalTsmAlgorithm.cpp
+++ b/Tests/Kernels/testTsmAlgorithm.cpp
@@ -32,6 +32,9 @@
 #include "../../Src/Kernels/Spherical/FSphericalKernel.hpp"
 #include "../../Src/Kernels/Spherical/FSphericalCell.hpp"
 
+#include "../../Src/Kernels/Rotation/FRotationKernel.hpp"
+#include "../../Src/Kernels/Rotation/FRotationCell.hpp"
+
 #include "../../Src/Files/FFmaTsmLoader.hpp"
 
 #include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
@@ -44,26 +47,17 @@
 
 
 // Simply create particles and try the kernels
-int main(int argc, char ** argv){
-    typedef FTypedSphericalCell            CellClass;
-    typedef FP2PParticleContainer         ContainerClass;
-
-    typedef FTypedLeaf< ContainerClass >                      LeafClass;
-    typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
-    typedef FSphericalKernel< CellClass, ContainerClass >          KernelClass;
-
-    typedef FFmmAlgorithmTsm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
-    ///////////////////////What we do/////////////////////////////
-    std::cout << ">> This executable has to be used to test Spherical on a Tsm system.\n";
-    //////////////////////////////////////////////////////////////
-    const int DevP = FParameters::getValue(argc,argv,"-p", 8);
+template <class CellClass, class ContainerClass, class LeafClass, class OctreeClass,
+          class KernelClass, class FmmClass, typename... Args>
+int testFunction(int argc, char ** argv, Args... kernelPreArgs){
+    FTic counter;
+    // Retrieve parameters
     const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
     const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
-    FTic counter;
-
+    // Get working file
     const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.tsm.fma");
     std::cout << "Opening : " << filename << "\n";
-
+    // Create particles loader
     FFmaTsmLoader loader(filename);
     if(!loader.isOpen()){
         std::cout << "Loader Error, " << filename << " is missing\n";
@@ -71,7 +65,7 @@ int main(int argc, char ** argv){
     }
 
     // -----------------------------------------------------
-    CellClass::Init(DevP);
+    // Build the tree
     OctreeClass tree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
 
     // -----------------------------------------------------
@@ -96,7 +90,7 @@ int main(int argc, char ** argv){
     std::cout << "Create kernel ..." << std::endl;
     counter.tic();
 
-    KernelClass kernels(DevP, NbLevels, loader.getBoxWidth(), loader.getCenterOfBox());
+    KernelClass kernels( kernelPreArgs... , NbLevels, loader.getBoxWidth(), loader.getCenterOfBox());
 
     counter.tac();
     std::cout << "Done  " << " in " << counter.elapsed() << "s)." << std::endl;
@@ -142,5 +136,45 @@ int main(int argc, char ** argv){
     return 0;
 }
 
+// This is the real main!
+int main(int argc, char ** argv){
+    std::cout << "[PARAM] Use Parameters -spherical -rotation -chebyshev\n";
+
+    if( FParameters::existParameter(argc,argv,"-spherical") ){
+        std::cout << "[INFO] -spherical is used\n";
+        // Create template
+        typedef FTypedSphericalCell            CellClass;
+        typedef FP2PParticleContainer         ContainerClass;
+
+        typedef FTypedLeaf< ContainerClass >                      LeafClass;
+        typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
+        typedef FSphericalKernel< CellClass, ContainerClass >          KernelClass;
+
+        typedef FFmmAlgorithmTsm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
 
+        const int DevP = FParameters::getValue(argc,argv,"-p", 8);
+        CellClass::Init(DevP);
 
+        // Call Main function
+        testFunction< CellClass, ContainerClass, LeafClass, OctreeClass, KernelClass, FmmClass>(argc, argv, DevP);
+    }
+
+    if( FParameters::existParameter(argc,argv,"-rotation") ){
+        std::cout << "[INFO] -rotation is used\n";
+        // Create template
+        static const int P = 9;
+        typedef FTypedRotationCell<P>            CellClass;
+        typedef FP2PParticleContainer         ContainerClass;
+
+        typedef FTypedLeaf< ContainerClass >                      LeafClass;
+        typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
+        typedef FRotationKernel< CellClass, ContainerClass, P >          KernelClass;
+
+        typedef FFmmAlgorithmTsm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
+
+        // Call Main function
+        testFunction< CellClass, ContainerClass, LeafClass, OctreeClass, KernelClass, FmmClass>(argc, argv);
+    }
+
+    return 0;
+}
diff --git a/Tests/Utils/testChebInterpolator.cpp b/Tests/Utils/testChebInterpolator.cpp
index 7366c97dacfb97eb846d71c5e126a188b06e0e1c..2a7b18582deb751b05c01a44f85f901df96ea32d 100755
--- a/Tests/Utils/testChebInterpolator.cpp
+++ b/Tests/Utils/testChebInterpolator.cpp
@@ -30,7 +30,7 @@
 
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 
diff --git a/Tests/Utils/testChebOctree.cpp b/Tests/Utils/testChebOctree.cpp
index 5602bb9f31d284d25adf2ebf4daf5eb6fcf7a98f..5742d9d17d1d1814d713e37be3b9b06711874d62 100755
--- a/Tests/Utils/testChebOctree.cpp
+++ b/Tests/Utils/testChebOctree.cpp
@@ -29,7 +29,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
diff --git a/Tests/Utils/testChebSxUCBSy.cpp b/Tests/Utils/testChebSxUCBSy.cpp
index 353594fdca6e3b0ddbb14ea36641f08cb162631f..8ad70da9a19a2e5b2c2fccd4d93583f66f0abca0 100755
--- a/Tests/Utils/testChebSxUCBSy.cpp
+++ b/Tests/Utils/testChebSxUCBSy.cpp
@@ -30,7 +30,7 @@
 
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 
diff --git a/Tests/Utils/testChebTensorProduct.cpp b/Tests/Utils/testChebTensorProduct.cpp
index cd2b75dc8a08cf2776c0bba8af52e447b0d6ea65..38d57bb953f509b61effeafd88b0850b2e594889 100755
--- a/Tests/Utils/testChebTensorProduct.cpp
+++ b/Tests/Utils/testChebTensorProduct.cpp
@@ -31,7 +31,7 @@
 
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp"
diff --git a/Tests/Utils/testFmmAlgorithmProc.cpp b/Tests/Utils/testFmmAlgorithmProc.cpp
index 476b124ef46d86144385525acca30e3f7e422056..c139d6ac27badc741e045736e0a3a5865cdecdfb 100755
--- a/Tests/Utils/testFmmAlgorithmProc.cpp
+++ b/Tests/Utils/testFmmAlgorithmProc.cpp
@@ -29,13 +29,12 @@
 #include "../../Src/Components/FSimpleLeaf.hpp"
 
 #include "../../Src/Utils/FPoint.hpp"
-#include "../../Src/Components/FAbstractSendable.hpp"
 
 #include "../../Src/Components/FTestCell.hpp"
 #include "../../Src/Components/FTestKernels.hpp"
 #include "../../Src/Components/FTestParticleContainer.hpp"
 
-
+//#include "../../Src/Core/FFmmAlgorithmProcMpi.hpp"
 #include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
 #include "../../Src/Core/FFmmAlgorithmThread.hpp"
 
@@ -51,8 +50,8 @@
 
 
 /** This program show an example of use of the fmm threaded + mpi algo
-  * it also check that each particles is impacted each other particles
-  */
+ * it also check that each particles is impacted each other particles
+ */
 
 /////////////////////////////////////////////////////////////////////////////
 // Test function
@@ -61,229 +60,229 @@
 // Check if tree is built correctly
 template<class OctreeClass>
 void ValidateTree(OctreeClass& realTree,
-                        OctreeClass& treeValide, const FMpi::FComm& comm){
-    FSize totalNbLeafs = 0;
-    {
+		  OctreeClass& treeValide, const FMpi::FComm& comm){
+  FSize totalNbLeafs = 0;
+  {
 
-        typename OctreeClass::Iterator octreeIterator(&treeValide);
-        octreeIterator.gotoBottomLeft();
-        do {
-            ++totalNbLeafs;
-        } while(octreeIterator.moveRight());
-    }
+    typename OctreeClass::Iterator octreeIterator(&treeValide);
+    octreeIterator.gotoBottomLeft();
+    do {
+      ++totalNbLeafs;
+    } while(octreeIterator.moveRight());
+  }
 
-    const FSize myLeftLeaf = comm.getLeft(totalNbLeafs);
-    const FSize myRightLeaf = comm.getRight(totalNbLeafs);
+  const FSize myLeftLeaf = comm.getLeft(totalNbLeafs);
+  const FSize myRightLeaf = comm.getRight(totalNbLeafs);
 
-    //printf("%d should go from %d to %d leaf (on %d total leafs)\n", comm.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs);
+  //printf("%d should go from %d to %d leaf (on %d total leafs)\n", comm.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs);
 
-    typename OctreeClass::Iterator octreeIteratorValide(&treeValide);
-    octreeIteratorValide.gotoBottomLeft();
-    for(FSize idxLeaf = 0 ; idxLeaf < myLeftLeaf ; ++idxLeaf){
-        if(!octreeIteratorValide.moveRight()){
-            printf("Error cannot access to the left leaf %lld in the valide tree\n", myLeftLeaf);
-        }
+  typename OctreeClass::Iterator octreeIteratorValide(&treeValide);
+  octreeIteratorValide.gotoBottomLeft();
+  for(FSize idxLeaf = 0 ; idxLeaf < myLeftLeaf ; ++idxLeaf){
+    if(!octreeIteratorValide.moveRight()){
+      printf("Error cannot access to the left leaf %lld in the valide tree\n", myLeftLeaf);
     }
+  }
 
-    typename OctreeClass::Iterator octreeIterator(&realTree);
-    octreeIterator.gotoBottomLeft();
+  typename OctreeClass::Iterator octreeIterator(&realTree);
+  octreeIterator.gotoBottomLeft();
+
+  for(FSize idxLeaf = myLeftLeaf ; idxLeaf < myRightLeaf ; ++idxLeaf){
+    if(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+      printf("Error index are different, valide %lld invalid %lld\n",octreeIteratorValide.getCurrentGlobalIndex(),
+	     octreeIterator.getCurrentGlobalIndex());
+      break;
+    }
+    if(octreeIteratorValide.getCurrentListSrc()->getNbParticles() != octreeIterator.getCurrentListSrc()->getNbParticles()){
+      printf("Error leafs do not have the same number of particles, valide %d, invalide %d\n",
+	     octreeIteratorValide.getCurrentListSrc()->getNbParticles(), octreeIterator.getCurrentListSrc()->getNbParticles() );
+    }
 
-    for(FSize idxLeaf = myLeftLeaf ; idxLeaf < myRightLeaf ; ++idxLeaf){
-        if(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
-            printf("Error index are different, valide %lld invalid %lld\n",octreeIteratorValide.getCurrentGlobalIndex(),
-                   octreeIterator.getCurrentGlobalIndex());
-            break;
-        }
-        if(octreeIteratorValide.getCurrentListSrc()->getNbParticles() != octreeIterator.getCurrentListSrc()->getNbParticles()){
-            printf("Error leafs do not have the same number of particles, valide %d, invalide %d\n",
-                   octreeIteratorValide.getCurrentListSrc()->getNbParticles(), octreeIterator.getCurrentListSrc()->getNbParticles() );
-        }
-
-        //printf("index %lld with %d particles\n", octreeIteratorValide.getCurrentGlobalIndex(), octreeIteratorValide.getCurrentListSrc()->getSize());
-
-        if(!octreeIteratorValide.moveRight() && idxLeaf != myRightLeaf - 1){
-            printf("Error cannot valide tree end to early, idxLeaf %lld myRightLeaf %lld\n", idxLeaf, myRightLeaf);
-            break;
-        }
-
-        if(!octreeIterator.moveRight() && idxLeaf != myRightLeaf - 1){
-            printf("Error cannot test tree end to early, idxLeaf %lld myRightLeaf %lld\n", idxLeaf, myRightLeaf);
-            break;
-        }
+    //printf("index %lld with %d particles\n", octreeIteratorValide.getCurrentGlobalIndex(), octreeIteratorValide.getCurrentListSrc()->getSize());
+
+    if(!octreeIteratorValide.moveRight() && idxLeaf != myRightLeaf - 1){
+      printf("Error cannot valide tree end to early, idxLeaf %lld myRightLeaf %lld\n", idxLeaf, myRightLeaf);
+      break;
     }
 
+    if(!octreeIterator.moveRight() && idxLeaf != myRightLeaf - 1){
+      printf("Error cannot test tree end to early, idxLeaf %lld myRightLeaf %lld\n", idxLeaf, myRightLeaf);
+      break;
+    }
+  }
+
 }
 
 
 
 /** This function tests the octree to be sure that the fmm algorithm
-  * has worked completly.
-  */
+ * has worked completly.
+ */
 template<class OctreeClass, class ContainerClass, class FmmClassProc>
 void ValidateFMMAlgoProc(OctreeClass* const badTree,
-                         OctreeClass* const valideTree,
-                         FmmClassProc* const fmm){
-    const int OctreeHeight = badTree->getHeight();
-    {
-        typename OctreeClass::Iterator octreeIterator(badTree);
-        octreeIterator.gotoBottomLeft();
-
-        typename OctreeClass::Iterator octreeIteratorValide(valideTree);
-        octreeIteratorValide.gotoBottomLeft();
-
-        for(int level = OctreeHeight - 1 ; level > 0 && fmm->hasWorkAtLevel(level) ; --level){
-
-            while(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()) {
-                octreeIteratorValide.moveRight();
-            }
-
-            while(octreeIteratorValide.getCurrentGlobalIndex() != fmm->getWorkingInterval(level).min){
-                octreeIteratorValide.moveRight();
-                octreeIterator.moveRight();
-            }
-
-            FSize countCheck = 0;
-            do{
-                if(octreeIterator.getCurrentGlobalIndex() != octreeIteratorValide.getCurrentGlobalIndex()){
-                    std::cout << "Error index are not equal!" << std::endl;
-                }
-                else{
-                    if(octreeIterator.getCurrentCell()->getDataUp() != octreeIteratorValide.getCurrentCell()->getDataUp()){
-                        std::cout << "M2M error at level " << level << " up bad " << octreeIterator.getCurrentCell()->getDataUp()
-                                << " good " << octreeIteratorValide.getCurrentCell()->getDataUp() << " index " << octreeIterator.getCurrentGlobalIndex() << std::endl;
-                    }
-                    if(octreeIterator.getCurrentCell()->getDataDown() != octreeIteratorValide.getCurrentCell()->getDataDown()){
-                        std::cout << "L2L error at level " << level << " down bad " << octreeIterator.getCurrentCell()->getDataDown()
-                                << " good " << octreeIteratorValide.getCurrentCell()->getDataDown() << " index " << octreeIterator.getCurrentGlobalIndex() << std::endl;
-                    }
-                }
-                ++countCheck;
-            } while(octreeIteratorValide.moveRight() && octreeIterator.moveRight());
-
-            // Check that each particle has been summed with all other
-
-            octreeIterator.moveUp();
-            octreeIterator.gotoLeft();
-
-            octreeIteratorValide.moveUp();
-            octreeIteratorValide.gotoLeft();
-        }
-    }
+			 OctreeClass* const valideTree,
+			 FmmClassProc* const fmm){
+  const int OctreeHeight = badTree->getHeight();
+  {
+    typename OctreeClass::Iterator octreeIterator(badTree);
+    octreeIterator.gotoBottomLeft();
+
+    typename OctreeClass::Iterator octreeIteratorValide(valideTree);
+    octreeIteratorValide.gotoBottomLeft();
 
+    for(int level = OctreeHeight - 1 ; level > 0 && fmm->hasWorkAtLevel(level) ; --level){
+
+      while(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()) {
+	octreeIteratorValide.moveRight();
+      }
+
+      while(octreeIteratorValide.getCurrentGlobalIndex() != fmm->getWorkingInterval(level).min){
+	octreeIteratorValide.moveRight();
+	octreeIterator.moveRight();
+      }
+
+      FSize countCheck = 0;
+      do{
+	if(octreeIterator.getCurrentGlobalIndex() != octreeIteratorValide.getCurrentGlobalIndex()){
+	  std::cout << "Error index are not equal!" << std::endl;
+	}
+	else{
+	  if(octreeIterator.getCurrentCell()->getDataUp() != octreeIteratorValide.getCurrentCell()->getDataUp()){
+	    std::cout << "M2M error at level " << level << " up bad " << octreeIterator.getCurrentCell()->getDataUp()
+		      << " good " << octreeIteratorValide.getCurrentCell()->getDataUp() << " index " << octreeIterator.getCurrentGlobalIndex() << std::endl;
+	  }
+	  if(octreeIterator.getCurrentCell()->getDataDown() != octreeIteratorValide.getCurrentCell()->getDataDown()){
+	    std::cout << "L2L error at level " << level << " down bad " << octreeIterator.getCurrentCell()->getDataDown()
+		      << " good " << octreeIteratorValide.getCurrentCell()->getDataDown() << " index " << octreeIterator.getCurrentGlobalIndex() << std::endl;
+	  }
+	}
+	++countCheck;
+      } while(octreeIteratorValide.moveRight() && octreeIterator.moveRight());
+      
+      // Check that each particle has been summed with all other
+      
+      octreeIterator.moveUp();
+      octreeIterator.gotoLeft();
+      
+      octreeIteratorValide.moveUp();
+      octreeIteratorValide.gotoLeft();
+    }
+  }
+
+  {
+    FSize NbPart = 0;
+    FSize NbLeafs = 0;
+    { // Check that each particle has been summed with all other
+      typename OctreeClass::Iterator octreeIterator(valideTree);
+      octreeIterator.gotoBottomLeft();
+      do{
+	NbPart += octreeIterator.getCurrentListSrc()->getNbParticles();
+	++NbLeafs;
+      } while(octreeIterator.moveRight());
+    }
     {
-        FSize NbPart = 0;
-        FSize NbLeafs = 0;
-        { // Check that each particle has been summed with all other
-            typename OctreeClass::Iterator octreeIterator(valideTree);
-            octreeIterator.gotoBottomLeft();
-            do{
-                NbPart += octreeIterator.getCurrentListSrc()->getNbParticles();
-                ++NbLeafs;
-            } while(octreeIterator.moveRight());
-        }
-        {
-            // Check that each particle has been summed with all other
-            typename OctreeClass::Iterator octreeIterator(badTree);
-            octreeIterator.gotoBottomLeft();
-
-            do {
-                const bool isUsingTsm = (octreeIterator.getCurrentListTargets() != octreeIterator.getCurrentListSrc());
-
-                ContainerClass* container = (octreeIterator.getCurrentListTargets());
-                const long long int*const dataDown = container->getDataDown();
-
-                for(FSize idxPart = 0 ; idxPart < container->getNbParticles() ; ++idxPart){
-                    // If a particles has been impacted by less than NbPart - 1 (the current particle)
-                    // there is a problem
-                    if( (!isUsingTsm && dataDown[idxPart] != NbPart - 1) ||
-                        (isUsingTsm && dataDown[idxPart] != NbPart) ){
-                        std::cout << "Problem L2P + P2P, value on particle is : " << dataDown[idxPart] <<
-                                     " at pos " << idxPart << " index is " << octreeIterator.getCurrentGlobalIndex() << "\n";
-                    }
-                }
-            } while( octreeIterator.moveRight());
-        }
+      // Check that each particle has been summed with all other
+      typename OctreeClass::Iterator octreeIterator(badTree);
+      octreeIterator.gotoBottomLeft();
+
+      do {
+	const bool isUsingTsm = (octreeIterator.getCurrentListTargets() != octreeIterator.getCurrentListSrc());
+
+	ContainerClass* container = (octreeIterator.getCurrentListTargets());
+	const long long int*const dataDown = container->getDataDown();
+
+	for(FSize idxPart = 0 ; idxPart < container->getNbParticles() ; ++idxPart){
+	  // If a particles has been impacted by less than NbPart - 1 (the current particle)
+	  // there is a problem
+	  if( (!isUsingTsm && dataDown[idxPart] != NbPart - 1) ||
+	      (isUsingTsm && dataDown[idxPart] != NbPart) ){
+	    std::cout << "Problem L2P + P2P, value on particle is : " << dataDown[idxPart] <<
+	      " at pos " << idxPart << " index is " << octreeIterator.getCurrentGlobalIndex() << "\n";
+	  }
+	}
+      } while( octreeIterator.moveRight());
     }
+  }
+  {
     {
-        {
-            // Check that each particle has been summed with all other
-            typename OctreeClass::Iterator octreeIterator(badTree);
-            octreeIterator.gotoBottomLeft();
-
-            do {
-                if(octreeIterator.getCurrentListSrc()->getNbParticles() != octreeIterator.getCurrentCell()->getDataUp()){
-                    printf("P2M problem nb part %d data up %lld \n",
-                           octreeIterator.getCurrentListSrc()->getNbParticles(), octreeIterator.getCurrentCell()->getDataUp());
-                }
-            } while( octreeIterator.moveRight() );
-        }
+      // Check that each particle has been summed with all other
+      typename OctreeClass::Iterator octreeIterator(badTree);
+      octreeIterator.gotoBottomLeft();
+
+      do {
+	if(octreeIterator.getCurrentListSrc()->getNbParticles() != octreeIterator.getCurrentCell()->getDataUp()){
+	  printf("P2M problem nb part %d data up %lld \n",
+		 octreeIterator.getCurrentListSrc()->getNbParticles(), octreeIterator.getCurrentCell()->getDataUp());
+	}
+      } while( octreeIterator.moveRight() );
     }
+  }
 
-    {
-        // Check that each particle has been summed with all other
-        typename OctreeClass::Iterator octreeIterator(badTree);
-        octreeIterator.gotoBottomLeft();
-
-        typename OctreeClass::Iterator valideOctreeIterator(valideTree);
-        valideOctreeIterator.gotoBottomLeft();
-        while(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
-            valideOctreeIterator.moveRight();
-        }
-
-        do {
-            if(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
-                printf("Do not have the same index valide %lld invalide %lld \n",
-                       valideOctreeIterator.getCurrentGlobalIndex(), octreeIterator.getCurrentGlobalIndex());
-                break;
-            }
-
-            if(octreeIterator.getCurrentListTargets()->getNbParticles() != valideOctreeIterator.getCurrentListTargets()->getNbParticles()){
-                printf("Do not have the same number of particle at leaf id %lld, valide %d invalide %d \n",
-                       octreeIterator.getCurrentGlobalIndex(), valideOctreeIterator.getCurrentListTargets()->getNbParticles(),
-                       octreeIterator.getCurrentListTargets()->getNbParticles());
-            }
-            else {
-                ContainerClass* container = (octreeIterator.getCurrentListTargets());
-                const long long int*const dataDown = container->getDataDown();
-
-                ContainerClass* containerValide = (valideOctreeIterator.getCurrentListTargets());
-                const long long int*const dataDownValide = containerValide->getDataDown();
-
-                for(int idxPart = 0 ; idxPart < container->getNbParticles() ; ++idxPart){
-                    // If a particles has been impacted by less than NbPart - 1 (the current particle)
-                    // there is a problem
-                    if( dataDown[idxPart] != dataDownValide[idxPart]){
-                        std::cout << "Problem on leaf " << octreeIterator.getCurrentGlobalIndex() <<
-                                     " part " << idxPart << " valide data down " << dataDownValide[idxPart] <<
-                                     " invalide " << dataDown[idxPart] << "\n";
-                        std::cout << "Data down for leaf is: valide " << valideOctreeIterator.getCurrentCell()->getDataDown()
-                                  << " invalide " << octreeIterator.getCurrentCell()->getDataDown()
-                                  << " size is: valide " <<  valideOctreeIterator.getCurrentListTargets()->getNbParticles()
-                                  << " invalide " << octreeIterator.getCurrentListTargets()->getNbParticles() << std::endl;
-                    }
-                }
-            }
-
-        }while( octreeIterator.moveRight() && valideOctreeIterator.moveRight());
+  {
+    // Check that each particle has been summed with all other
+    typename OctreeClass::Iterator octreeIterator(badTree);
+    octreeIterator.gotoBottomLeft();
+
+    typename OctreeClass::Iterator valideOctreeIterator(valideTree);
+    valideOctreeIterator.gotoBottomLeft();
+    while(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+      valideOctreeIterator.moveRight();
     }
 
+    do {
+      if(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+	printf("Do not have the same index valide %lld invalide %lld \n",
+	       valideOctreeIterator.getCurrentGlobalIndex(), octreeIterator.getCurrentGlobalIndex());
+	break;
+      }
+
+      if(octreeIterator.getCurrentListTargets()->getNbParticles() != valideOctreeIterator.getCurrentListTargets()->getNbParticles()){
+	printf("Do not have the same number of particle at leaf id %lld, valide %d invalide %d \n",
+	       octreeIterator.getCurrentGlobalIndex(), valideOctreeIterator.getCurrentListTargets()->getNbParticles(),
+	       octreeIterator.getCurrentListTargets()->getNbParticles());
+      }
+      else {
+	ContainerClass* container = (octreeIterator.getCurrentListTargets());
+	const long long int*const dataDown = container->getDataDown();
+
+	ContainerClass* containerValide = (valideOctreeIterator.getCurrentListTargets());
+	const long long int*const dataDownValide = containerValide->getDataDown();
+
+	for(int idxPart = 0 ; idxPart < container->getNbParticles() ; ++idxPart){
+	  // If a particles has been impacted by less than NbPart - 1 (the current particle)
+	  // there is a problem
+	  if( dataDown[idxPart] != dataDownValide[idxPart]){
+	    std::cout << "Problem on leaf " << octreeIterator.getCurrentGlobalIndex() <<
+	      " part " << idxPart << " valide data down " << dataDownValide[idxPart] <<
+	      " invalide " << dataDown[idxPart] << "\n";
+	    std::cout << "Data down for leaf is: valide " << valideOctreeIterator.getCurrentCell()->getDataDown()
+		      << " invalide " << octreeIterator.getCurrentCell()->getDataDown()
+		      << " size is: valide " <<  valideOctreeIterator.getCurrentListTargets()->getNbParticles()
+		      << " invalide " << octreeIterator.getCurrentListTargets()->getNbParticles() << std::endl;
+	  }
+	}
+      }
+
+    }while( octreeIterator.moveRight() && valideOctreeIterator.moveRight());
+  }
+
 }
 
 
 /** To print an octree
-  * used to debug and understand how the values were passed
-  */
+ * used to debug and understand how the values were passed
+ */
 template<class OctreeClass>
 void print(OctreeClass* const valideTree){
-    typename OctreeClass::Iterator octreeIterator(valideTree);
-    for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){
-        do{
-            std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t";
-        } while(octreeIterator.moveRight());
-        std::cout << "\n";
-        octreeIterator.gotoLeft();
-        octreeIterator.moveDown();
-    }
+  typename OctreeClass::Iterator octreeIterator(valideTree);
+  for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){
+    do{
+      std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t";
+    } while(octreeIterator.moveRight());
+    std::cout << "\n";
+    octreeIterator.gotoLeft();
+    octreeIterator.moveDown();
+  }
 }
 
 
@@ -307,142 +306,154 @@ typedef FFmmAlgorithmThreadProc<OctreeClass, CellClass, ContainerClass, KernelCl
 
 // Simply create particles and try the kernels
 int main(int argc, char ** argv){
-    ///////////////////////What we do/////////////////////////////
-    std::cout << ">> This executable has to be used to test the FMM algorithm.\n";
-    //////////////////////////////////////////////////////////////
-
-    FMpi app( argc, argv);
-
-    const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
-    const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
-    FTic counter;
-    const char* const defaultFilename = (sizeof(FReal) == sizeof(float))?
-                                    "../../Data/test20k.bin.fma.single":
-                                    "../../Data/test20k.bin.fma.double";
-    const char* const filename = FParameters::getStr(argc,argv,"-f", defaultFilename);
-    std::cout << "Opening : " << filename << "\n";
-
-    FMpiFmaLoader loader(filename,app.global());
-    if(!loader.isOpen()){
-        std::cout << "Loader Error, " << filename << " is missing\n";
-        return 1;
-    }
-
-    // The real tree to work on
-    OctreeClass realTree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
-
-    if( app.global().processCount() != 1){
-        //////////////////////////////////////////////////////////////////////////////////
-        // Build tree from mpi loader
-        //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Build Tree ..." << std::endl;
-        counter.tic();
-
-        struct TestParticle{
-            FPoint position;
-            const FPoint& getPosition(){
-                return position;
-            }
-        };
-
-        TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
-        memset(particles, 0, sizeof(TestParticle) * loader.getNumberOfParticles());
-        FReal physicalValue; //unused
-        for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-            loader.fillParticle(&particles[idxPart].position,&physicalValue);
-        }
-
-        FVector<TestParticle> finalParticles;
-        FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
-                                                                           realTree.getBoxCenter(),
-                                                                           realTree.getBoxWidth(),
-                                                     realTree.getHeight(), &finalParticles);
-
-        for(int idx = 0 ; idx < finalParticles.getSize(); ++idx){
-            realTree.insert(finalParticles[idx].position);
-        }
-
-        delete[] particles;
-
-        counter.tac();
-        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
-
-        //////////////////////////////////////////////////////////////////////////////////
-    }    
-    else{
-        FPoint position;
-        FReal physicalValue;
-        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-            loader.fillParticle(&position,&physicalValue);
-            realTree.insert(position);
-        }
-    }
-
+  ///////////////////////What we do/////////////////////////////
+  std::cout << ">> This executable has to be used to test the FMM algorithm.\n";
+  //////////////////////////////////////////////////////////////
+
+  FMpi app( argc, argv);
+
+  const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
+  const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
+  FTic counter;
+  const char* const defaultFilename = (sizeof(FReal) == sizeof(float))?
+    "../../Data/test20k.bin.fma.single":
+    "../../Data/test20k.bin.fma.double";
+  const char* const filename = FParameters::getStr(argc,argv,"-f", defaultFilename);
+  std::cout << "Opening : " << filename << "\n";
+
+  FMpiFmaLoader loader(filename,app.global());
+  if(!loader.isOpen()){
+    std::cout << "Loader Error, " << filename << " is missing\n";
+    return 1;
+  }
+
+  std::cout << "Simulation properties :\n";
+  std::cout << "Nb Particles " << loader.getNumberOfParticles() << "\n";
+  std::cout << "Box Width : " << loader.getBoxWidth() << "\n";
+  std::cout << "Box Center : " << loader.getCenterOfBox() << "\n";
+
+  // The real tree to work on
+  OctreeClass realTree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
+
+  if( app.global().processCount() != 1){
     //////////////////////////////////////////////////////////////////////////////////
-    // Create real tree
+    // Build tree from mpi loader
     //////////////////////////////////////////////////////////////////////////////////
+    std::cout << "Build Tree ..." << std::endl;
+    counter.tic();
 
-    OctreeClass treeValide(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
-    {
-        FFmaBinLoader loaderSeq(filename);
-        FPoint position;
-        FReal physicalValue;
-        for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
-            loader.fillParticle(&position,&physicalValue);
-            treeValide.insert(position);
-        }
+    struct TestParticle{
+      FPoint position;
+      const FPoint& getPosition(){
+	return position;
+      }
+    };
+
+    TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
+    memset(particles, 0, sizeof(TestParticle) * loader.getNumberOfParticles());
+    FReal physicalValue; //unused
+    for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+      loader.fillParticle(&particles[idxPart].position,&physicalValue);
     }
 
-    //////////////////////////////////////////////////////////////////////////////////
-    // Check particles in tree
-    //////////////////////////////////////////////////////////////////////////////////
-    std::cout << "Validate tree ..." << std::endl;
-    counter.tic();
+    FVector<TestParticle> finalParticles;
+    FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
+						 realTree.getBoxCenter(),
+						 realTree.getBoxWidth(),
+						 realTree.getHeight(), &finalParticles);
 
-    ValidateTree(realTree, treeValide, app.global());
+    for(int idx = 0 ; idx < finalParticles.getSize(); ++idx){
+      realTree.insert(finalParticles[idx].position);
+    }
+
+    delete[] particles;
 
     counter.tac();
     std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
 
     //////////////////////////////////////////////////////////////////////////////////
+  }    
+  else{
+    FPoint position;
+    FReal physicalValue;
+    const FSize nbParticles = loader.getNumberOfParticles();
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+      loader.fillParticle(&position,&physicalValue);
+      realTree.insert(position);
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////
+  // Create real tree
+  //////////////////////////////////////////////////////////////////////////////////
+
+  OctreeClass treeValide(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
+  {
+    FFmaBinLoader loaderSeq(filename);
+
+    std::cout << "Simulation properties :\n";
+    std::cout << "Nb Particles " << loaderSeq.getNumberOfParticles() << "\n";
+    std::cout << "Box Width : " << loaderSeq.getBoxWidth() << "\n";
+    std::cout << "Box Center : " << loaderSeq.getCenterOfBox() << "\n";
+
+    FPoint position;
+    FReal physicalValue;
+    for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
+      loaderSeq.fillParticle(&position,&physicalValue);
+      treeValide.insert(position);
+    }
+  }
 
-    std::cout << "Working parallel particles ..." << std::endl;
-    counter.tic();
+  //////////////////////////////////////////////////////////////////////////////////
+  // Check particles in tree
+  //////////////////////////////////////////////////////////////////////////////////
+  std::cout << "Validate tree ..." << std::endl;
+  counter.tic();
 
-    KernelClass kernels;
+  ValidateTree(realTree, treeValide, app.global());
 
-    FmmClassProc algo(app.global(),&realTree,&kernels);
-    algo.execute();
+  counter.tac();
+  std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
 
-    counter.tac();
-    std::cout << "Done  " << "(@Algorithm Particles = " << counter.elapsed() << "s)." << std::endl;
+  //////////////////////////////////////////////////////////////////////////////////
 
-    //////////////////////////////////////////////////////////////////////////////////
+  std::cout << "Working parallel particles ..." << std::endl;
+  counter.tic();
 
-    std::cout << "Working sequential particles ..." << std::endl;
-    counter.tic();
+  KernelClass kernels;
 
-    FmmClass algoValide(&treeValide,&kernels);
-    algoValide.execute();
+  FmmClassProc algo(app.global(),&realTree,&kernels);
+  algo.execute();
 
-    counter.tac();
-    std::cout << "Done  " << "(@Algorithm Particles = " << counter.elapsed() << "s)." << std::endl;
+  counter.tac();
+  std::cout << "Done  " << "(@Algorithm Particles = " << counter.elapsed() << "s)." << std::endl;
 
-    //////////////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////
 
-    std::cout << "Checking data ..." << std::endl;
-    counter.tic();
+  std::cout << "Working sequential particles ..." << std::endl;
+  counter.tic();
 
-    ValidateFMMAlgoProc<OctreeClass,ContainerClass, FmmClassProc>(&realTree,&treeValide,&algo);
+  FmmClass algoValide(&treeValide,&kernels);
+  algoValide.execute();
 
-    counter.tac();
-    std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+  counter.tac();
+  std::cout << "Done  " << "(@Algorithm Particles = " << counter.elapsed() << "s)." << std::endl;
 
-    //////////////////////////////////////////////////////////////////////////////////
-    //////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << "Checking data ..." << std::endl;
+  counter.tic();
+
+  ValidateFMMAlgoProc<OctreeClass,ContainerClass, FmmClassProc>(&realTree,&treeValide,&algo);
+
+  counter.tac();
+  std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+
+  //////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////
 
-    return 0;
+  return 0;
 }
 
 
diff --git a/Tests/Utils/testFmmAlgorithmProcPeriodic.cpp b/Tests/Utils/testFmmAlgorithmProcPeriodic.cpp
index 605eb8ede9d02d5fe9a554bba04961dbfd6eecbd..8922047d51f967cf09a6692cbcd4d1b36d39732a 100755
--- a/Tests/Utils/testFmmAlgorithmProcPeriodic.cpp
+++ b/Tests/Utils/testFmmAlgorithmProcPeriodic.cpp
@@ -43,8 +43,6 @@
 
 #include "../../Src/Files/FMpiTreeBuilder.hpp"
 
-#include "../../Src/Components/FAbstractSendable.hpp"
-
 
 /** This program show an example of use of
   * the fmm basic algo
diff --git a/Tests/Utils/testFmmAlgorithmProcRotation.cpp b/Tests/Utils/testFmmAlgorithmProcRotation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e3d0108e5af30b257141f688c2276215cbe534d8
--- /dev/null
+++ b/Tests/Utils/testFmmAlgorithmProcRotation.cpp
@@ -0,0 +1,431 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.  
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info". 
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+
+// ==== CMAKE =====
+// @FUSE_MPI
+// ================
+
+#include "../../Src/Utils/FTic.hpp"
+#include "../../Src/Utils/FMpi.hpp"
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FMath.hpp"
+
+#include "../../Src/Containers/FOctree.hpp"
+#include "../../Src/Containers/FVector.hpp"
+
+#include "../../Src/Kernels/Spherical/FSphericalKernel.hpp"
+#include "../../Src/Kernels/Spherical/FSphericalCell.hpp"
+
+#include "../../Src/Kernels/Rotation/FRotationKernel.hpp"
+#include "../../Src/Kernels/Rotation/FRotationCell.hpp"
+
+#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
+#include "../../Src/Core/FFmmAlgorithmThread.hpp"
+
+#include "../../Src/Components/FSimpleLeaf.hpp"
+#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
+
+#include "../../Src/Files/FMpiFmaLoader.hpp"
+#include "../../Src/Files/FMpiTreeBuilder.hpp"
+#include "../../Src/Files/FFmaBinLoader.hpp"
+
+#include <iostream>
+
+#include <cstdio>
+#include <cstdlib>
+
+// Uncoment to validate the FMM
+#define VALIDATE_FMM
+
+/** This program show an example of use of
+  * the fmm basic algo it also check that eachh particles is little or longer
+  * related that each other
+  */
+
+
+#ifdef VALIDATE_FMM
+
+static const FReal Epsilon = FReal(0.0005);
+
+///////////////////////////////////////////////////////
+// to test equality between good and potentialy bad solution
+///////////////////////////////////////////////////////
+/** To compare data */
+template <class CellClass>
+bool isEqualPole(const CellClass& me, const CellClass& other, FReal*const cumul){
+    FMath::FAccurater accurate;
+    for(int idx = 0; idx < /*CellClass::GetPoleSize()*/ 36; ++idx){
+        accurate.add(me.getMultipole()[idx].getImag(),other.getMultipole()[idx].getImag());
+        accurate.add(me.getMultipole()[idx].getReal(),other.getMultipole()[idx].getReal());
+    }
+    *cumul = accurate.getInfNorm()+ accurate.getL2Norm();
+    return accurate.getInfNorm() < Epsilon && accurate.getL2Norm() < Epsilon;//FMath::LookEqual(cumul,FReal(0.0));
+}
+
+/** To compare data */
+bool isEqualLocal(/*const FSphericalCell& me, const FSphericalCell& other,*/
+		  const FRotationCell<7>& me, const FRotationCell<7>& other, FReal*const cumul){
+    FMath::FAccurater accurate;
+    for(int idx = 0; idx < /*FSphericalCell::GetLocalSize()*/ 36; ++idx){
+        accurate.add(me.getLocal()[idx].getImag(),other.getLocal()[idx].getImag());
+        accurate.add(me.getLocal()[idx].getReal(),other.getLocal()[idx].getReal());
+    }
+    *cumul = accurate.getInfNorm()+ accurate.getL2Norm();
+    return accurate.getInfNorm() < Epsilon && accurate.getL2Norm() < Epsilon;//FMath::LookEqual(cumul,FReal(0.0));
+}
+
+
+template<class OctreeClass, class ContainerClass>
+void ValidateFMMAlgoProc(OctreeClass* const badTree,
+                         OctreeClass* const valideTree){
+    std::cout << "Check Result\n";
+    {
+        const int OctreeHeight = valideTree->getHeight();
+        typename OctreeClass::Iterator octreeIterator(badTree);
+        octreeIterator.gotoBottomLeft();
+
+        typename OctreeClass::Iterator octreeIteratorValide(valideTree);
+        octreeIteratorValide.gotoBottomLeft();
+
+        for(int level = OctreeHeight - 1 ; level > 1 ; --level){
+            while(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+                octreeIteratorValide.moveRight();
+            }
+
+            do {
+                if(octreeIterator.getCurrentGlobalIndex() != octreeIteratorValide.getCurrentGlobalIndex()){
+                    std::cout << "Error index are not equal!" << std::endl;
+                }
+                else{
+                    FReal cumul;
+                    if( !isEqualPole(*octreeIterator.getCurrentCell(),*octreeIteratorValide.getCurrentCell(),&cumul) ){
+                        std::cout << "Pole Data are different. Cumul " << cumul << " at level " << level << " index is " << octreeIterator.getCurrentGlobalIndex() << std::endl;
+                    }
+                    if( !isEqualLocal(*octreeIterator.getCurrentCell(),*octreeIteratorValide.getCurrentCell(),&cumul) ){
+                        std::cout << "Local Data are different. Cumul " << cumul << " at level " << level << " index is " << octreeIterator.getCurrentGlobalIndex() << std::endl;
+                    }
+                }
+
+            } while(octreeIterator.moveRight() && octreeIteratorValide.moveRight());
+
+            octreeIterator.moveUp();
+            octreeIterator.gotoLeft();
+
+            octreeIteratorValide.moveUp();
+            octreeIteratorValide.gotoLeft();
+        }
+    }
+    {
+        // Check that each particle has been summed with all other
+        typename OctreeClass::Iterator octreeIterator(badTree);
+        octreeIterator.gotoBottomLeft();
+
+        typename OctreeClass::Iterator octreeIteratorValide(valideTree);
+        octreeIteratorValide.gotoBottomLeft();
+
+        while(octreeIteratorValide.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+            octreeIteratorValide.moveRight();
+        }
+
+        do {
+
+            if( octreeIterator.getCurrentListSrc()->getNbParticles() != octreeIteratorValide.getCurrentListSrc()->getNbParticles()){
+                std::cout << " Particules numbers is different " << std::endl;
+            }
+            if( octreeIterator.getCurrentGlobalIndex() != octreeIteratorValide.getCurrentGlobalIndex()){
+                std::cout << " Index are differents " << std::endl;
+            }
+
+            ContainerClass* firstLeaf = octreeIterator.getCurrentListTargets();
+            ContainerClass* valideLeaf = octreeIteratorValide.getCurrentListTargets();
+
+            const FReal*const potentials = firstLeaf->getPotentials();
+            const FReal*const forcesX = firstLeaf->getForcesX();
+            const FReal*const forcesY = firstLeaf->getForcesY();
+            const FReal*const forcesZ = firstLeaf->getForcesZ();
+            const FReal*const positionX = firstLeaf->getPositions()[0];
+            const FReal*const positionY = firstLeaf->getPositions()[1];
+            const FReal*const positionZ = firstLeaf->getPositions()[2];
+            const FReal*const validePositionX = valideLeaf->getPositions()[0];
+            const FReal*const validePositionY = valideLeaf->getPositions()[1];
+            const FReal*const validePositionZ = valideLeaf->getPositions()[2];
+            const FReal*const validePotentials = valideLeaf->getPotentials();
+            const FReal*const valideForcesX = valideLeaf->getForcesX();
+            const FReal*const valideForcesY = valideLeaf->getForcesY();
+            const FReal*const valideForcesZ = valideLeaf->getForcesZ();
+
+            for(int idxLeaf = 0 ; idxLeaf < firstLeaf->getNbParticles() ; ++idxLeaf){
+
+                int idxValideLeaf = 0;
+                for(; idxValideLeaf < valideLeaf->getNbParticles() ; ++idxValideLeaf){
+                    if( FMath::LookEqual(validePositionX[idxValideLeaf],positionX[idxLeaf]) &&
+                        FMath::LookEqual(validePositionY[idxValideLeaf],positionY[idxLeaf]) &&
+                        FMath::LookEqual(validePositionZ[idxValideLeaf],positionZ[idxLeaf]) ){
+                        break;
+                    }
+                }
+
+                if( idxValideLeaf < valideLeaf->getNbParticles() ){
+                    // If a particles has been impacted by less than NbPart - 1 (the current particle)
+                    // there is a problem
+                    bool error = false;
+                    if( FMath::RelatifDiff(validePotentials[idxValideLeaf] , potentials[idxLeaf])  > Epsilon ){
+                        std::cout << " Potential error : " << validePotentials[idxValideLeaf] << " " << potentials[idxLeaf] << "\n";
+                        error = true;
+                    }
+                    if( FMath::RelatifDiff(valideForcesX[idxValideLeaf],forcesX[idxLeaf]) > Epsilon
+                            || FMath::RelatifDiff(valideForcesY[idxValideLeaf],forcesY[idxLeaf]) > Epsilon
+                            || FMath::RelatifDiff(valideForcesZ[idxValideLeaf],forcesZ[idxLeaf]) > Epsilon){
+                        std::cout << " Forces error : x " << valideForcesX[idxValideLeaf] << " " << forcesX[idxLeaf]
+                                  << " y " << valideForcesY[idxValideLeaf]  << " " << forcesY[idxLeaf]
+                                  << " z " << valideForcesZ[idxValideLeaf]  << " " << forcesZ[idxLeaf] << "\n";
+                        error = true;
+                    }
+                    if( error ){
+                        std::cout << "At position " << FPoint(validePositionX[idxValideLeaf],validePositionY[idxValideLeaf],validePositionZ[idxValideLeaf])
+                                  << " == " << FPoint(positionX[idxLeaf],positionY[idxLeaf],positionZ[idxLeaf]) << std::endl;
+                    }
+                }
+                else{
+                    std::cout << "Particle not found " << FPoint(positionX[idxLeaf],positionY[idxLeaf],positionZ[idxLeaf]) << std::endl;
+                }
+            }
+
+        } while(octreeIterator.moveRight() && octreeIteratorValide.moveRight());
+    }
+
+    std::cout << "Done\n";
+}
+#endif
+
+
+// Simply create particles and try the kernels
+int main(int argc, char ** argv){
+  // typedef FSphericalCell         CellClass;
+  // typedef FP2PParticleContainer         ContainerClass;
+
+  // typedef FSimpleLeaf< ContainerClass >                     LeafClass;
+  // typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
+  // typedef FSphericalKernel< CellClass, ContainerClass >     KernelClass;
+
+  // typedef FFmmAlgorithmThreadProc<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
+  // typedef FFmmAlgorithmThread<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClassNoProc;
+
+  // For Rotation test ::
+  typedef FRotationCell<7>         CellClass;
+  typedef FP2PParticleContainer         ContainerClass;
+  
+  typedef FSimpleLeaf< ContainerClass >                     LeafClass;
+  typedef FOctree< CellClass, ContainerClass , LeafClass >  OctreeClass;
+  typedef FRotationKernel< CellClass, ContainerClass,7 >     KernelClass;
+
+  typedef FFmmAlgorithmThreadProc<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
+  typedef FFmmAlgorithmThread<OctreeClass,  CellClass, ContainerClass, KernelClass, LeafClass > FmmClassNoProc;
+
+
+  ///////////////////////What we do/////////////////////////////
+  std::cout << ">> This executable has to be used to test Spherical algorithm.\n";
+  //////////////////////////////////////////////////////////////
+
+  FMpi app( argc, argv);
+
+  //  const int DevP = FParameters::getValue(argc,argv,"-p", 8);
+  const int NbLevels = FParameters::getValue(argc,argv,"-h", 5);
+  const int SizeSubLevels = FParameters::getValue(argc,argv,"-sh", 3);
+  FTic counter;
+  const char* const defaultFilename = (sizeof(FReal) == sizeof(float))?
+    "../Data/test20k.bin.fma.single":
+    "../Data/test20k.bin.fma.double";
+  const char* const filename = FParameters::getStr(argc,argv,"-f", defaultFilename);
+
+  std::cout << "Opening : " << filename << "\n";
+
+  FMpiFmaLoader loader(filename, app.global());
+  if(!loader.isOpen()){
+    std::cout << "Loader Error, " << filename << " is missing\n";
+    return 1;
+  }
+
+  // ----Modified For Rotation----------------------------
+  //CellClass::Init(DevP);
+  
+  
+  OctreeClass tree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
+
+  // -----------------------------------------------------
+
+  std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl;
+  std::cout << "\tHeight : " << NbLevels << " \t sub-height : " << SizeSubLevels << std::endl;
+  counter.tic();
+
+  if( app.global().processCount() != 1){
+    //////////////////////////////////////////////////////////////////////////////////
+    // Build tree from mpi loader
+    //////////////////////////////////////////////////////////////////////////////////
+    std::cout << "Build Tree ..." << std::endl;
+    counter.tic();
+
+    struct TestParticle{
+      FPoint position;
+      FReal physicalValue;
+      const FPoint& getPosition(){
+	return position;
+      }
+    };
+
+    TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
+    memset(particles, 0, (unsigned int) (sizeof(TestParticle) * loader.getNumberOfParticles()));
+
+    for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+      loader.fillParticle(&particles[idxPart].position,&particles[idxPart].physicalValue);
+    }
+
+    FVector<TestParticle> finalParticles;
+    FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
+						 tree.getBoxCenter(),
+						 tree.getBoxWidth(),
+						 tree.getHeight(), &finalParticles);
+
+    for(int idx = 0 ; idx < finalParticles.getSize(); ++idx){
+      tree.insert(finalParticles[idx].position,finalParticles[idx].physicalValue);
+    }
+
+    delete[] particles;
+
+    counter.tac();
+    std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+
+    //////////////////////////////////////////////////////////////////////////////////
+  }
+  else{
+    FPoint position;
+    FReal physicalValue;
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+      loader.fillParticle(&position,&physicalValue);
+      tree.insert(position, physicalValue);
+    }
+  }
+
+  counter.tac();
+  std::cout << "Done  " << "(@Creating and Inserting Particles = " << counter.elapsed() << "s)." << std::endl;
+
+  // -----------------------------------------------------
+  std::cout << "Create kernel..." << std::endl;
+
+  KernelClass kernels( NbLevels,loader.getBoxWidth(), loader.getCenterOfBox());
+
+  std::cout << "Done  " << " in " << counter.elapsed() << "s)." << std::endl;
+
+  // -----------------------------------------------------
+
+  std::cout << "Working on particles ..." << std::endl;
+
+  FmmClass algo(app.global(),&tree,&kernels);
+
+  counter.tic();
+  algo.execute();
+  counter.tac();
+
+  std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
+
+  { // get sum forces&potential
+    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Sum Result" , __FILE__ , __LINE__) );
+
+    FReal potential = 0;
+    FReal fx = 0.0, fy = 0.0, fz = 0.0;
+
+    tree.forEachLeaf([&](LeafClass* leaf){
+	const FReal*const potentials = leaf->getTargets()->getPotentials();
+	const FReal*const forcesX = leaf->getTargets()->getForcesX();
+	const FReal*const forcesY = leaf->getTargets()->getForcesY();
+	const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
+	const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+
+	for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
+	  potential += potentials[idxPart];
+	  fx += forcesX[idxPart];
+	  fy += forcesY[idxPart];
+	  fz += forcesZ[idxPart];
+	}
+      });
+
+    std::cout << "My potential is " << potential << std::endl;
+
+    potential = app.global().reduceSum(potential);
+    fx = app.global().reduceSum(fx);
+    fy = app.global().reduceSum(fy);
+    fz = app.global().reduceSum(fz);
+
+
+    if(app.global().processId() == 0){
+      std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
+      std::cout << "Potential Sum = " << potential << std::endl;
+    }
+  }
+
+#ifdef VALIDATE_FMM
+  {
+    OctreeClass treeValide(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
+    {
+      FFmaBinLoader loaderSeq(filename);
+      FPoint position;
+      FReal physicalValue;
+      for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
+	loaderSeq.fillParticle(&position,&physicalValue);
+	treeValide.insert(position,physicalValue);
+      }
+    }
+      
+    std::cout << "Working on particles ..." << std::endl;
+    FmmClassNoProc algoValide(&treeValide,&kernels);
+    counter.tic();
+    algoValide.execute();
+    counter.tac();
+    std::cout << "Done  " << "(@Algorithm = " << counter.elapsed() << "s)." << std::endl;
+      
+    FReal potential = 0;
+    FReal fx = 0.0, fy = 0.0, fz = 0.0;
+	
+    tree.forEachLeaf([&](LeafClass* leaf){
+	const FReal*const potentials = leaf->getTargets()->getPotentials();
+	const FReal*const forcesX = leaf->getTargets()->getForcesX();
+	const FReal*const forcesY = leaf->getTargets()->getForcesY();
+	const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
+	const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+
+	for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
+	  potential += potentials[idxPart];
+	  fx += forcesX[idxPart];
+	  fy += forcesY[idxPart];
+	  fz += forcesZ[idxPart];
+	}
+      });
+
+    std::cout << "Foces Sum  x = " << fx << " y = " << fy << " z = " << fz << std::endl;
+    std::cout << "Potential = " << potential << std::endl;
+
+    ValidateFMMAlgoProc<OctreeClass,ContainerClass>(&tree,&treeValide);
+  }
+#endif
+
+
+  // -----------------------------------------------------
+
+  return 0;
+}
+
+
+
diff --git a/Tests/Utils/testLoader.cpp b/Tests/Utils/testLoader.cpp
index d80126edce13f3623cfa2d1cd885664a6fd92c3b..97a364ebf904787cbdb29540ada84fc4d942c8b4 100755
--- a/Tests/Utils/testLoader.cpp
+++ b/Tests/Utils/testLoader.cpp
@@ -26,7 +26,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicCell.hpp"
diff --git a/Tests/Utils/testLoaderFMA.cpp b/Tests/Utils/testLoaderFMA.cpp
index fad2028dff1c233252df963f7ea58b718c01b8aa..59819b689c5fa0fa03a6262c5accb374178f7247 100755
--- a/Tests/Utils/testLoaderFMA.cpp
+++ b/Tests/Utils/testLoaderFMA.cpp
@@ -26,7 +26,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicCell.hpp"
diff --git a/Tests/Utils/testLoaderFMATsm.cpp b/Tests/Utils/testLoaderFMATsm.cpp
index f1fcd19c9f862de835c764f114420247dfd56ea4..d2e4860123fdfa6c545aab85bf32834cb2880d02 100755
--- a/Tests/Utils/testLoaderFMATsm.cpp
+++ b/Tests/Utils/testLoaderFMATsm.cpp
@@ -25,7 +25,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicCell.hpp"
diff --git a/Tests/Utils/testOctree.cpp b/Tests/Utils/testOctree.cpp
index b2c6e5271610172a60ea4f7bd3ca02fcacb299b7..95df3c5293e4184ee74327641200f1590b6025b9 100755
--- a/Tests/Utils/testOctree.cpp
+++ b/Tests/Utils/testOctree.cpp
@@ -26,7 +26,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicParticleContainer.hpp"
diff --git a/Tests/Utils/testOctreeFuncteur.cpp b/Tests/Utils/testOctreeFuncteur.cpp
index 07f6909bc4eaaf2f3dc4dde41bc2d6b1d1e5cdce..a8851dd7ef6ed5774477057407998b804807fb37 100644
--- a/Tests/Utils/testOctreeFuncteur.cpp
+++ b/Tests/Utils/testOctreeFuncteur.cpp
@@ -26,7 +26,7 @@
 #include "../../Src/Containers/FOctree.hpp"
 #include "../../Src/Containers/FVector.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicParticleContainer.hpp"
diff --git a/Tests/Utils/testOctreeIter.cpp b/Tests/Utils/testOctreeIter.cpp
index 6c6fc6f41bca9892a7f2ec73b57c429b99addf93..fe311cb5e8959bb8390b97fa6f744abf3bbfc964 100755
--- a/Tests/Utils/testOctreeIter.cpp
+++ b/Tests/Utils/testOctreeIter.cpp
@@ -27,7 +27,7 @@
 #include "../../Src/Containers/FVector.hpp"
 #include "../../Src/Components/FSimpleLeaf.hpp"
 
-#include "../../Src/Utils/FAssertable.hpp"
+#include "../../Src/Utils/FAssert.hpp"
 #include "../../Src/Utils/FPoint.hpp"
 
 #include "../../Src/Components/FBasicParticleContainer.hpp"
diff --git a/UTests/utestOctree.cpp b/UTests/utestOctree.cpp
index eeaba8af0ed4637be9e6942d927f9bcf2aeb0d8e..7992c608f33280f77a317d3e41bb6aa3b5474242 100755
--- a/UTests/utestOctree.cpp
+++ b/UTests/utestOctree.cpp
@@ -20,7 +20,7 @@
 #include "../Src/Containers/FVector.hpp"
 #include "../Src/Components/FSimpleLeaf.hpp"
 
-#include "../Src/Utils/FAssertable.hpp"
+#include "../Src/Utils/FAssert.hpp"
 #include "../Src/Utils/FPoint.hpp"
 
 #include "../Src/Components/FBasicParticleContainer.hpp"
diff --git a/UTests/utestRotationDirectTsm.cpp b/UTests/utestRotationDirectTsm.cpp
index 12152da326940a07d1e42ae174807fbd412250c9..6697c70a77ae60fbc37f957615a59e54129275b9 100644
--- a/UTests/utestRotationDirectTsm.cpp
+++ b/UTests/utestRotationDirectTsm.cpp
@@ -54,7 +54,7 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
         Print(loader.getNumberOfParticles());
 
         const int NbLevels      = 4;
-        const int SizeSubLevels = 2;
+        const int SizeSubLevels = 3;
 
 
         struct TestParticle{
@@ -64,14 +64,13 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
             FReal potential;
         };
 
-        TestParticle* const particlesSources = new TestParticle[nbSources];
-        TestParticle* const particlesTargets = new TestParticle[nbTargets];
 
         // Create octree
         OctreeClass tree(NbLevels, SizeSubLevels, loader.getBoxWidth(), loader.getCenterOfBox());
 
         const FReal physicalValue = 0.10;
 
+        TestParticle* const particlesTargets = new TestParticle[nbTargets];
         for(int idxPart = 0 ; idxPart < nbTargets ; ++idxPart){
             FPoint position;
             loader.fillParticle(&position);
@@ -86,6 +85,7 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
             particlesTargets[idxPart].forces[2] = 0.0;
         }
 
+        TestParticle* const particlesSources = new TestParticle[nbSources];
         for(int idxPart = 0 ; idxPart < nbSources ; ++idxPart){
             FPoint position;
             loader.fillParticle(&position);
@@ -188,13 +188,9 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
 
     static const int P = 9;
 
-    template <const int P>
-    class CustomTypedRotationCell : public FRotationCell<P>, public FExtendCellType{
-    };
-
     /** Rotation */
     void TestRotation(){
-        typedef CustomTypedRotationCell<P>    CellClass;
+        typedef FTypedRotationCell<P>    CellClass;
         typedef FP2PParticleContainerIndexed  ContainerClass;
 
         typedef FRotationKernel<CellClass, ContainerClass, P >          KernelClass;
@@ -208,7 +204,7 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
     }
 
     void TestRotationThread(){
-        typedef CustomTypedRotationCell<P>    CellClass;
+        typedef FTypedRotationCell<P>    CellClass;
         typedef FP2PParticleContainerIndexed  ContainerClass;
 
         typedef FRotationKernel<CellClass, ContainerClass, P >          KernelClass;
@@ -228,7 +224,7 @@ class TestRotationDirectTsm : public FUTester<TestRotationDirectTsm> {
     /** set test */
     void SetTests(){
         AddTest(&TestRotationDirectTsm::TestRotation,"Test Rotation Kernel TSM");
-        AddTest(&TestRotationDirectTsm::TestRotation,"Test Rotation Kernel TSM thread");
+        AddTest(&TestRotationDirectTsm::TestRotationThread,"Test Rotation Kernel TSM thread");
     }
 };