diff --git a/Addons/BenchEfficiency/bordeaux_0116.pdf b/Addons/BenchEfficiency/bordeaux_0116.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..c7785df796322f7b6779f10deff6bef7ef3bb436
Binary files /dev/null and b/Addons/BenchEfficiency/bordeaux_0116.pdf differ
diff --git a/Addons/BenchEfficiency/execAllHomogeneous.sh b/Addons/BenchEfficiency/execAllHomogeneous.sh
index 610108bcc606ba56d0ef2809d61772b1954f8785..0ec3591b1bb06c6a87975c2ef5eae956e9dc36c2 100644
--- a/Addons/BenchEfficiency/execAllHomogeneous.sh
+++ b/Addons/BenchEfficiency/execAllHomogeneous.sh
@@ -13,14 +13,20 @@ for (( cpu=1 ; cpu<=$SCALFMM_MAX_NB_CPU ; cpu++)) ; do
 
     STARPU_NCPUS=$cpu
     STARPU_NCUDA=0
-    ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ
-    rec_name="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$cpu.rec"
+    logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_SEQ`
+    if [[ $VERBOSE ]] ; then
+        echo $logoutput
+    fi
+    rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$cpu.rec"
     mv trace.rec output/$rec_name
-    python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t output/$rec_name
+    python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
 
-    ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR
-    rec_name="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_PAR-CPU_$cpu.rec"
+    logoutput=`./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR`
+    if [[ $VERBOSE ]] ; then
+        echo $logoutput
+    fi
+    rec_name="$SCALFMM_RES_DIR/trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_PAR-CPU_$cpu.rec"
     mv trace.rec output/$rec_name
-    python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t output/$rec_name
+    python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t $rec_name > $rec_name.time
 done
 
diff --git a/Addons/BenchEfficiency/paintmodel.fmm.colors b/Addons/BenchEfficiency/paintmodel.fmm.colors
new file mode 100644
index 0000000000000000000000000000000000000000..8766ca2f30208b5be1c3bf08fa451bb1d9482cf6
--- /dev/null
+++ b/Addons/BenchEfficiency/paintmodel.fmm.colors
@@ -0,0 +1,9 @@
+#expression evaluation;replacement (if not empty);red;green;blue
+P2P;;1.0;1.0;1.0
+P2P-out;;0.7;0.7;0.7
+(M2L-level-)[0-9]*;;0.2;0.2;0.6
+(M2L-out-level-)[0-9]*;;0.1;0.1;0.6
+(L2L-level-)[0-9]*;;0.2;0.6;0.2
+(M2M-level-)[0-9]*;;0.1;0.6;0.6
+P2M;;0.2;0.2;0.2
+L2P;;0.6;0.8;0.2
diff --git a/Addons/BenchEfficiency/pajecolor b/Addons/BenchEfficiency/pajecolor
new file mode 100755
index 0000000000000000000000000000000000000000..281fe611c05b4238fd0af0e8098a4b2ff27391ab
Binary files /dev/null and b/Addons/BenchEfficiency/pajecolor differ
diff --git a/Addons/BenchEfficiency/par-bs-search.png b/Addons/BenchEfficiency/par-bs-search.png
new file mode 100644
index 0000000000000000000000000000000000000000..4243937f718415f59047a70a48cb51391dcd223e
Binary files /dev/null and b/Addons/BenchEfficiency/par-bs-search.png differ
diff --git a/Addons/BenchEfficiency/scalfmm.html b/Addons/BenchEfficiency/scalfmm.html
index 1c0c3211b6190fd2671baf7b08b2ed0242b7b2b9..85da3260555a959efc0d6cf222967ac0f683f4b5 100644
--- a/Addons/BenchEfficiency/scalfmm.html
+++ b/Addons/BenchEfficiency/scalfmm.html
@@ -13,6 +13,7 @@
 <li>BLAS/LAPACK (The configure of ScalFMM is different if the MKL is used or not, but with the MKL it is recommended to set environment variable <code>MKLROOT</code>)</li>
 <li>CUDA (&gt;= 7) and <code>CUDA_PATH</code> must be set. In our case, <code>CUDA_PATH=/usr/local/cuda-7.5/</code></li>
 <li><strong>Optional</strong> Vite (from <code>sudo apt-get install vite</code> or see <a href="http://vite.gforge.inria.fr/download.php" class="uri">http://vite.gforge.inria.fr/download.php</a>)</li>
+<li><strong>Optional</strong> Qt5 library to be able to change the colors of the execution traces in order to visualize the different FMM operators</li>
 </ul>
 <blockquote>
 <p>Some installations of CUDA does not have libcuda file. In this case, one needs to create a link : <code>sudo ln /usr/local/cuda-7.5/lib64/libcudart.so /usr/local/cuda-7.5/lib64/libcuda.so</code></p>
@@ -26,9 +27,14 @@
 <h3 id="working-directory">Working directory</h3>
 <p>The variable <code>SCALFMM_TEST_DIR</code> is used to specify the working directory:</p>
 <pre class="bash"><code>export SCALFMM_TEST_DIR=~/scalfmm_test
-mkdir $SCALFMM_TEST_DIR
+if [[ ! -d $SCALFMM_TEST_DIR ]] ; then
+	mkdir $SCALFMM_TEST_DIR
+fi    
 cd $SCALFMM_TEST_DIR</code></pre>
-<p><em>Output variables:</em> <code>$SCALFMM_TEST_DIR</code></p>
+<p>In order to be able to stop the tutorial in the middle and restart later, we will keep the register variables in a file that should be source to restart.</p>
+<pre class="bash"><code>function scalfmmRegisterVariable() { echo &quot;export $1=${!1}&quot; &gt;&gt; &quot;$SCALFMM_TEST_DIR/environment.source&quot;; }
+echo &quot;function scalfmmRegisterVariable() { echo \&quot;export $1=${!1}\&quot; &gt;&gt; \&quot;$SCALFMM_TEST_DIR/environment.source\&quot;; }&quot; &gt; &quot;$SCALFMM_TEST_DIR/environment.source&quot;</code></pre>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_TEST_DIR</code></p>
 <p>Valid-if</p>
 <pre class="bash"><code>if [[ -n $SCALFMM_TEST_DIR ]] &amp;&amp; [[ -d $SCALFMM_TEST_DIR ]] ; then
    echo “STEP-OK”
@@ -48,10 +54,10 @@ if [[ ! -f hwloc-1.11.2.tar.gz ]] ; then
 fi
 tar xvf hwloc-1.11.2.tar.gz
 cd hwloc-1.11.2/
-SCALFMM_HWLOC_DIR=$SCALFMM_TEST_DIR/hwlocinstall
+export SCALFMM_HWLOC_DIR=$SCALFMM_TEST_DIR/hwlocinstall
 ./configure --prefix=$SCALFMM_HWLOC_DIR
 make install</code></pre>
-<p><em>Output variables:</em> <code>$SCALFMM_HWLOC_DIR</code></p>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_HWLOC_DIR</code></p>
 <p>Valid-if:</p>
 <pre class="bash"><code>if [[ -n $SCALFMM_HWLOC_DIR ]] &amp;&amp; [[ -d $SCALFMM_HWLOC_DIR/lib/ ]] &amp;&amp; [[ -f  $SCALFMM_HWLOC_DIR/lib/libhwloc.so ]]; then
    echo “OK”
@@ -63,10 +69,10 @@ if [[ ! -f fxt-0.2.11.tar.gz ]] ; then
 fi
 tar xvf fxt-0.2.11.tar.gz
 cd fxt-0.2.11/
-SCALFMM_FXT_DIR=$SCALFMM_TEST_DIR/fxtinstall
+export SCALFMM_FXT_DIR=$SCALFMM_TEST_DIR/fxtinstall
 ./configure --prefix=$SCALFMM_FXT_DIR
 make install</code></pre>
-<p><em>Output variables:</em> <code>$SCALFMM_FXT_DIR</code></p>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_FXT_DIR</code></p>
 <p>Valid-if:</p>
 <pre class="bash"><code>if [[ -n $SCALFMM_FXT_DIR ]] &amp;&amp; [[ -d $SCALFMM_FXT_DIR/lib/ ]] &amp;&amp; [[ -f  $SCALFMM_FXT_DIR/lib/libfxt.so ]]; then
    echo “OK”
@@ -79,12 +85,12 @@ if [[ ! -f fftw-3.3.4.tar.gz ]] ; then
 fi    
 tar xvf fftw-3.3.4.tar.gz
 cd fftw-3.3.4/
-SCALFMM_FFTW_DIR=$SCALFMM_TEST_DIR/fftinstall
+export SCALFMM_FFTW_DIR=$SCALFMM_TEST_DIR/fftinstall
 ./configure --prefix=$SCALFMM_FFTW_DIR
 make install
 ./configure --prefix=$SCALFMM_FFTW_DIR --enable-float
 make install</code></pre>
-<p><em>Output variables:</em> <code>$SCALFMM_FFTW_DIR</code></p>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_FFTW_DIR</code></p>
 <p>Valid-if:</p>
 <pre class="bash"><code>if [[ -n $SCALFMM_FFTW_DIR ]] &amp;&amp; [[ -d $SCALFMM_FFTW_DIR/lib/ ]] &amp;&amp; [[ -f  $SCALFMM_FFTW_DIR/lib/libfftw3.a ]] &amp;&amp; [[ -f  $SCALFMM_FFTW_DIR/lib/libfftw3f.a ]]; then
    echo “OK”
@@ -95,14 +101,14 @@ if [[ ! -d starpu ]] ; then
 	svn export svn://scm.gforge.inria.fr/svnroot/starpu/trunk starpu
 fi    
 cd starpu/
-SCALFMM_STARPU_DIR=$SCALFMM_TEST_DIR/starpuinstall
+export SCALFMM_STARPU_DIR=$SCALFMM_TEST_DIR/starpuinstall
 ./autogen.sh
 ./configure --prefix=$SCALFMM_STARPU_DIR --with-fxt=$SCALFMM_FXT_DIR --with-hwloc=$SCALFMM_HWLOC_DIR --with-cuda-dir=$CUDA_PATH --disable-opencl
 make install</code></pre>
 <blockquote>
 <p><strong>Optional</strong> In case you do not want to use trace (FXT) please remove the <code>--with-fxt=$SCALFMM_FXT_DIR</code> parameter from the command</p>
 </blockquote>
-<p><em>Output variables:</em> <code>$SCALFMM_STARPU_DIR</code></p>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_STARPU_DIR</code></p>
 <p>Valid-if:</p>
 <pre class="bash"><code>if [[ -n $SCALFMM_STARPU_DIR ]] &amp;&amp; [[ -d $SCALFMM_STARPU_DIR/lib/ ]] &amp;&amp; [[ -f  $SCALFMM_STARPU_DIR/lib/libstarpu.so ]] ; then
    echo “OK”
@@ -117,10 +123,10 @@ git clone --depth=1 https://scm.gforge.inria.fr/anonscm/git/scalfmm-public/scalf
 fi    
 cd scalfmm-public/
 export SCALFMM_SOURCE_DIR=`pwd`
-Build/
+cd Build/
 export SCALFMM_BUILD_DIR=`pwd`</code></pre></li>
 </ul>
-<p><em>Output variables:</em> <code>SCALFMM_BUILD_DIR</code> <code>SCALFMM_SOURCE_DIR</code></p>
+<p><em>Output variables:</em> <code>scalfmmRegisterVariable SCALFMM_BUILD_DIR</code> <code>scalfmmRegisterVariable SCALFMM_SOURCE_DIR</code></p>
 <ul>
 <li><p>Configure (No MKL):</p>
 <pre class="bash"><code>cmake .. -DSCALFMM_BUILD_DEBUG=OFF -DSCALFMM_USE_MPI=OFF -DSCALFMM_BUILD_TESTS=ON -DSCALFMM_BUILD_UTESTS=OFF -DSCALFMM_USE_BLAS=ON -DSCALFMM_USE_MKL_AS_BLAS=OFF -DSCALFMM_USE_LOG=ON -DSCALFMM_USE_STARPU=ON -DSCALFMM_USE_CUDA=ON -DSCALFMM_USE_OPENCL=OFF -DHWLOC_DIR=$SCALFMM_HWLOC_DIR -DSTARPU_DIR=$SCALFMM_STARPU_DIR -DSCALFMM_USE_FFT=ON -DFFT_DIR=$SCALFMM_FFT_DIR</code></pre></li>
@@ -154,7 +160,27 @@ make testBlockedUnifCudaBench</code></pre>
 <li><code>-p2p-m2l-cuda-only</code> : to compute the P2P and the M2L only on GPU (the rest on the CPU)</li>
 </ul>
 <p>Examples:</p>
-<pre><code>STARPU_NCPUS=3 STARPU_NCUDA=1 ./Tests/Release/testBlockedUnifCudaBench -nb 10000 -h 3</code></pre>
+<pre><code>export STARPU_NCPUS=12
+export STARPU_NCUDA=2
+./Tests/Release/testBlockedUnifCudaBench -nb 30000000 -h 7 -bs 800</code></pre>
+<p>Last part of the output should be:</p>
+<pre class="bash"><code>	Start FGroupTaskStarPUAlgorithm
+		 directPass in 0.0406482s
+			 inblock  in 0.000780428s
+			 outblock in 0.0398674s
+		 bottomPass in 0.00586269s
+		 upwardPass in 0.00265723s
+		 transferPass in 0.00323571s
+			 inblock in  0.000124817s
+			 outblock in 0.00298331s
+		 downardPass in 0.00257975s
+		 transferPass in 0.0652285s
+			 inblock in  0.00164774s
+			 outblock in 0.0635799s
+		 L2P in 0.0115733s
+		 Submitting the tasks took 0.139101s
+		 Moving data to the host took 0.0578765s
+@EXEC TIME = 14.6321s</code></pre>
 <ul>
 <li>Visualize the execution trace (<strong>Optional</strong>)</li>
 </ul>
@@ -162,119 +188,147 @@ make testBlockedUnifCudaBench</code></pre>
 <pre class="bash"><code>$SCALFMM_STARPU_DIR/bin/starpu_fxt_tool -i &quot;/tmp/prof_file_&quot;$USER&quot;_0&quot;</code></pre>
 <p>Then visualize the output with vite</p>
 <pre class="bash"><code>vite ./paje.trace</code></pre>
+<p>Should be like: // IMAGE HERE</p>
+<p>We can convert the color of the trace by (it needs Qt5 library):</p>
+<pre class="bash"><code>$SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/pajecolor paje.trace $SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/paintmodel.fmm.colors
+vite ./paje.trace.painted</code></pre>
+<p>Should be like: // IMAGE HERE</p>
 <ul>
 <li>Get execution times</li>
 </ul>
 <pre class="bash"><code>python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t trace.rec</code></pre>
 <p>Should give something like:</p>
 <pre><code>&quot;Name&quot;,&quot;Count&quot;,&quot;Type&quot;,&quot;Duration&quot;
-&quot;Initializing&quot;,3,&quot;Runtime&quot;,5.027746
-&quot;Overhead&quot;,37,&quot;Runtime&quot;,0.110073
-&quot;Idle&quot;,13,&quot;Other&quot;,0.03678
-&quot;Scheduling&quot;,24,&quot;Runtime&quot;,16.529527
-&quot;Sleeping&quot;,17,&quot;Other&quot;,2197.255516
-&quot;FetchingInput&quot;,10,&quot;Runtime&quot;,0.012637
-&quot;execute_on_all_wrapper&quot;,6,&quot;Task&quot;,8.431909
-&quot;PushingOutput&quot;,10,&quot;Runtime&quot;,16.505568
-&quot;P2P&quot;,1,&quot;Task&quot;,105.131112
-&quot;Callback&quot;,4,&quot;Runtime&quot;,0.001048
-&quot;Deinitializing&quot;,3,&quot;Runtime&quot;,0.014547
-&quot;P2M&quot;,1,&quot;Task&quot;,2.543303
-&quot;L2P&quot;,1,&quot;Task&quot;,5.649106
-&quot;M2L-level-2&quot;,1,&quot;Task&quot;,2.167273</code></pre>
+&quot;Initializing&quot;,14,&quot;Runtime&quot;,7153.096196
+&quot;Overhead&quot;,57010,&quot;Runtime&quot;,376.473463
+&quot;Idle&quot;,14355,&quot;Other&quot;,12.815899
+&quot;Scheduling&quot;,28441,&quot;Runtime&quot;,238.367394
+&quot;Sleeping&quot;,610,&quot;Other&quot;,13786.513208
+&quot;FetchingInput&quot;,14341,&quot;Runtime&quot;,13918.805814
+&quot;execute_on_all_wrapper&quot;,30,&quot;Task&quot;,21.288802
+&quot;Executing&quot;,414,&quot;Runtime&quot;,26852.864578
+&quot;PushingOutput&quot;,14341,&quot;Runtime&quot;,284.96123
+&quot;P2P-out&quot;,3846,&quot;Task&quot;,60378.266619
+&quot;Callback&quot;,13559,&quot;Runtime&quot;,4.210633
+&quot;P2P&quot;,328,&quot;Task&quot;,15383.426991
+&quot;M2L-level-5&quot;,41,&quot;Task&quot;,2354.702554
+&quot;M2L-level-6&quot;,328,&quot;Task&quot;,18349.915495
+&quot;Deinitializing&quot;,14,&quot;Runtime&quot;,109.87483
+&quot;M2L-level-4&quot;,6,&quot;Task&quot;,275.088295
+&quot;P2M&quot;,328,&quot;Task&quot;,11312.022842
+&quot;M2M-level-5&quot;,328,&quot;Task&quot;,829.9055
+&quot;M2M-level-4&quot;,41,&quot;Task&quot;,93.130498
+&quot;M2L-out-level-5&quot;,638,&quot;Task&quot;,1914.900053
+&quot;M2M-level-3&quot;,6,&quot;Task&quot;,11.053067
+&quot;M2M-level-2&quot;,1,&quot;Task&quot;,1.363157
+&quot;M2L-out-level-4&quot;,22,&quot;Task&quot;,159.580457
+&quot;L2L-level-4&quot;,41,&quot;Task&quot;,84.554065
+&quot;L2L-level-5&quot;,328,&quot;Task&quot;,1087.717767
+&quot;M2L-out-level-6&quot;,7692,&quot;Task&quot;,18322.518045
+&quot;L2P&quot;,328,&quot;Task&quot;,27146.256793
+&quot;M2L-level-2&quot;,1,&quot;Task&quot;,2.661235
+&quot;L2L-level-3&quot;,6,&quot;Task&quot;,11.346978
+&quot;M2L-level-3&quot;,1,&quot;Task&quot;,47.612555
+&quot;L2L-level-2&quot;,1,&quot;Task&quot;,1.471873</code></pre>
 <p>Most of the script are in the addon directories</p>
 <pre><code>export SCALFMM_AB=$SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/</code></pre>
+<p><em>Output variable:</em> <code>scalfmmRegisterVariable SCALFMM_AB</code></p>
 <h2 id="homogeneous-efficiencies">Homogeneous Efficiencies</h2>
 <p>Here we compute the efficiencies for a given test case on CPU only.</p>
 <p>Go in the build dir and create output dir</p>
 <pre><code>cd $SCALFMM_BUILD_DIR
-mkdir homogeneous</code></pre>
+export SCALFMM_RES_DIR=$SCALFMM_BUILD_DIR/homogeneous
+mkdir $SCALFMM_RES_DIR</code></pre>
+<p><em>Output variable:</em> <code>scalfmmRegisterVariable SCALFMM_AB</code></p>
 <p>Set up the configuration variables:</p>
-<pre class="bash"><code>SCALFMM_NB=10000000
-SCALFMM_H=7
-SCALFMM_MIN_BS=100
-SCALFMM_MAX_BS=3000
-SCALFMM_MAX_NB_CPU=24</code></pre>
+<pre class="bash"><code>export SCALFMM_NB=10000000
+export SCALFMM_H=7
+export SCALFMM_MIN_BS=100
+export SCALFMM_MAX_BS=10000
+export SCALFMM_MAX_NB_CPU=24</code></pre>
 <p>Find best granularity in sequential and in parallel:</p>
-<pre class="bash"><code>STARPU_NCPUS=1
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh &quot;./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs&quot; $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
+<pre class="bash"><code>export STARPU_NCPUS=1
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh &quot;./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs&quot; $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmmExtractKey.sh &quot;@BEST BS&quot; `
 if [[ `which gnuplot | wc -l` == &quot;1&quot; ]] ;  then
     gnuplot -e &quot;filename=&#39;seq-bs-search&#39;&quot; $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
-=`$SCALFMM_AB/scalfmmFindBs.sh &quot;./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs&quot; $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh &quot;./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs&quot; $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
 if [[ `which gnuplot | wc -l` == &quot;1&quot; ]] ;  then
     gnuplot -e &quot;filename=&#39;par-bs-search&#39;&quot; $SCALFMM_AB/scalfmmFindBs.gplot
 fi</code></pre>
-<p>Then we compute the efficiency using both granulirities and keep the .rec files.</p>
+<p>In our case we get and 5385.</p>
+<p><em>Output variable:</em> <code>scalfmmRegisterVariable SCALFMM_BS_CPU_SEQ</code> <code>scalfmmRegisterVariable SCALFMM_BS_CPU_PAR</code></p>
+<p>Then we compute the efficiency using both granulirities and keep the .rec files:</p>
 <pre class="bash"><code>source $SCALFMM_AB/execAllHomogeneous.sh</code></pre>
-<p>We should end with all the rec files and their corresponding time files</p>
-<pre class="bash"><code></code></pre>
+<p>We should end with all the .rec files and their corresponding time files</p>
+<pre class="bash"><code>ls $SCALFMM_RES_DIR</code></pre>
 <p>We compute the efficiencies</p>
-<pre class="bash"><code></code></pre>
+<pre class="bash"><code>source $SCALFMM_AB/computeHomogeneousEfficiencies</code></pre>
 <p>We end with efficiency for the application and for the operators.</p>
-<pre class="bash"><code></code></pre>
+<pre class="bash"><code>cat $SCALFMM_RES_DIR/efficiencies.txt</code></pre>
 <p>We can plot each of them</p>
-<pre class="bash"><code></code></pre>
+<pre class="bash"><code>source $SCALFMM_AB/plotEfficiencies.sh $SCALFMM_RES_DIR/efficiencies.txt</code></pre>
+<p>Sould give: // IMAGE HERE</p>
 <h2 id="generating-execution-results">Generating Execution Results</h2>
 <p>For test case <code>-nb 10000000</code> (10 million) and <code>-h 6</code> (height of the tree equal to 6), we first want to know the best granularity <code>-bs</code>.</p>
 <p>This parameter will certainly not be the same for sequential/parallel/heterogenous configurations.</p>
-<pre class="bash"><code>SCALFMM_NB=10000000
-SCALFMM_H=7
-SCALFMM_MIN_BS=100
-SCALFMM_MAX_BS=3000
-SCALFMM_MAX_NB_CPU=24
-SCALFMM_MAX_NB_GPU=4</code></pre>
-<pre class="bash"><code>STARPU_NCPUS=1
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
+<pre class="bash"><code>export SCALFMM_NB=10000000
+export SCALFMM_H=7
+export SCALFMM_MIN_BS=100
+export SCALFMM_MAX_BS=3000
+export SCALFMM_MAX_NB_CPU=24
+export SCALFMM_MAX_NB_GPU=4</code></pre>
+<pre class="bash"><code>export STARPU_NCPUS=1
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
 if [[ `which gnuplot | wc -l` == &quot;1&quot; ]] ;  then
     gnuplot -e &quot;filename=&#39;seq-bs-search&#39;&quot; $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
 if [[ `which gnuplot | wc -l` == &quot;1&quot; ]] ;  then
     gnuplot -e &quot;filename=&#39;par-bs-search&#39;&quot; $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
-SCALFMM_BS_CPU_GPU=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+export SCALFMM_BS_CPU_GPU=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key &quot;@BEST BS&quot; `
 if [[ `which gnuplot | wc -l` == &quot;1&quot; ]] ;  then
     gnuplot -e &quot;filename=&#39;cpugpu-bs-search&#39;&quot; $SCALFMM_AB/scalfmmFindBs.gplot
 fi</code></pre>
 <p>Then, we can execute three best configurations, and keep .rec for each of them:</p>
-<pre class="bash"><code>STARPU_NCPUS=1
-STARPU_NCUDA=0
+<pre class="bash"><code>export STARPU_NCPUS=1
+export STARPU_NCUDA=0
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_CPU_SEQ
-SCALFMM_SEQ_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
+export SCALFMM_SEQ_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
 mv trace.rec $SCALFMM_SEQ_REC
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR
-SCALFMM_PAR_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
+export SCALFMM_PAR_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
 mv trace.rec $SCALFMM_PAR_REC
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_GPU
-SCALFMM_PAR_CPU_GPU_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
+export SCALFMM_PAR_CPU_GPU_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
 mv trace.rec $SCALFMM_PAR_CPU_GPU_REC</code></pre>
 <p>And we also want the GPU tasks only on GPU</p>
-<pre class="bash"><code>STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+<pre class="bash"><code>export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_GPU -p2p-m2l-cuda-only
-SCALFMM_PAR_GPU_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA-GPUONLY.rec&quot;
+export SCALFMM_PAR_GPU_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA-GPUONLY.rec&quot;
 mv trace.rec $SCALFMM_PAR_GPU_REC</code></pre>
 <p>And we want the sequential version with parallel granularity:</p>
-<pre class="bash"><code>STARPU_NCPUS=1
-STARPU_NCUDA=0
+<pre class="bash"><code>export STARPU_NCPUS=1
+export STARPU_NCUDA=0
 
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR
 SCALFMM_SEQ_CPU_BS_REC=&quot;trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec&quot;
diff --git a/Addons/BenchEfficiency/scalfmm.md b/Addons/BenchEfficiency/scalfmm.md
index 1d483ddf5e5e8529b9b9acd857429f97fa040fec..2c984565b81fcfef45cdca792ef54e128a67d64c 100644
--- a/Addons/BenchEfficiency/scalfmm.md
+++ b/Addons/BenchEfficiency/scalfmm.md
@@ -21,6 +21,7 @@ In order to follow this tutorial, it is needed to have the following application
 * BLAS/LAPACK (The configure of ScalFMM is different if the MKL is used or not, but with the MKL it is recommended to set environment variable `MKLROOT`)
 * CUDA (>= 7) and `CUDA_PATH` must be set. In our case, `CUDA_PATH=/usr/local/cuda-7.5/`
 * __Optional__ Vite (from `sudo apt-get install vite` or see [http://vite.gforge.inria.fr/download.php](http://vite.gforge.inria.fr/download.php))
+*  __Optional__ Qt5 library to be able to change the colors of the execution traces in order to visualize the different FMM operators
 
 > Some installations of CUDA does not have libcuda file.
 > In this case, one needs to create a link : `sudo ln /usr/local/cuda-7.5/lib64/libcudart.so /usr/local/cuda-7.5/lib64/libcuda.so`
@@ -38,11 +39,20 @@ In order to follow this tutorial, it is needed to have the following application
 The variable `SCALFMM_TEST_DIR` is used to specify the working directory:
 ```bash
 export SCALFMM_TEST_DIR=~/scalfmm_test
-mkdir $SCALFMM_TEST_DIR
+if [[ ! -d $SCALFMM_TEST_DIR ]] ; then
+	mkdir $SCALFMM_TEST_DIR
+fi    
 cd $SCALFMM_TEST_DIR
 ```
 
-*Output variables:* `$SCALFMM_TEST_DIR`
+In order to be able to stop the tutorial in the middle and restart later, we will keep the register variables in a file that should be source to restart.
+```bash
+function scalfmmRegisterVariable() { echo "export $1=${!1}" >> "$SCALFMM_TEST_DIR/environment.source"; }
+echo "function scalfmmRegisterVariable() { echo \"export $1=${!1}\" >> \"$SCALFMM_TEST_DIR/environment.source\"; }" > "$SCALFMM_TEST_DIR/environment.source"
+```
+
+
+*Output variables:* `scalfmmRegisterVariable SCALFMM_TEST_DIR`
 
 Valid-if
 ```bash
@@ -72,12 +82,12 @@ if [[ ! -f hwloc-1.11.2.tar.gz ]] ; then
 fi
 tar xvf hwloc-1.11.2.tar.gz
 cd hwloc-1.11.2/
-SCALFMM_HWLOC_DIR=$SCALFMM_TEST_DIR/hwlocinstall
+export SCALFMM_HWLOC_DIR=$SCALFMM_TEST_DIR/hwlocinstall
 ./configure --prefix=$SCALFMM_HWLOC_DIR
 make install
 ```
 
-*Output variables:* `$SCALFMM_HWLOC_DIR`
+*Output variables:* `scalfmmRegisterVariable SCALFMM_HWLOC_DIR`
 
 Valid-if:
 ```bash
@@ -94,12 +104,12 @@ if [[ ! -f fxt-0.2.11.tar.gz ]] ; then
 fi
 tar xvf fxt-0.2.11.tar.gz
 cd fxt-0.2.11/
-SCALFMM_FXT_DIR=$SCALFMM_TEST_DIR/fxtinstall
+export SCALFMM_FXT_DIR=$SCALFMM_TEST_DIR/fxtinstall
 ./configure --prefix=$SCALFMM_FXT_DIR
 make install
 ```
 
-*Output variables:* `$SCALFMM_FXT_DIR`
+*Output variables:* `scalfmmRegisterVariable SCALFMM_FXT_DIR`
 
 Valid-if:
 ```bash
@@ -117,14 +127,14 @@ if [[ ! -f fftw-3.3.4.tar.gz ]] ; then
 fi    
 tar xvf fftw-3.3.4.tar.gz
 cd fftw-3.3.4/
-SCALFMM_FFTW_DIR=$SCALFMM_TEST_DIR/fftinstall
+export SCALFMM_FFTW_DIR=$SCALFMM_TEST_DIR/fftinstall
 ./configure --prefix=$SCALFMM_FFTW_DIR
 make install
 ./configure --prefix=$SCALFMM_FFTW_DIR --enable-float
 make install
 ```
 
-*Output variables:* `$SCALFMM_FFTW_DIR`
+*Output variables:* `scalfmmRegisterVariable SCALFMM_FFTW_DIR`
 
 Valid-if:
 ```bash
@@ -140,14 +150,14 @@ if [[ ! -d starpu ]] ; then
 	svn export svn://scm.gforge.inria.fr/svnroot/starpu/trunk starpu
 fi    
 cd starpu/
-SCALFMM_STARPU_DIR=$SCALFMM_TEST_DIR/starpuinstall
+export SCALFMM_STARPU_DIR=$SCALFMM_TEST_DIR/starpuinstall
 ./autogen.sh
 ./configure --prefix=$SCALFMM_STARPU_DIR --with-fxt=$SCALFMM_FXT_DIR --with-hwloc=$SCALFMM_HWLOC_DIR --with-cuda-dir=$CUDA_PATH --disable-opencl
 make install
 ```
 > __Optional__ In case you do not want to use trace (FXT) please remove the `--with-fxt=$SCALFMM_FXT_DIR` parameter from the command
 
-*Output variables:* `$SCALFMM_STARPU_DIR`
+*Output variables:* `scalfmmRegisterVariable SCALFMM_STARPU_DIR`
 
 Valid-if:
 ```bash
@@ -167,11 +177,11 @@ if [[ ! -d scalfmm-public ]] ; then
 fi    
 cd scalfmm-public/
 export SCALFMM_SOURCE_DIR=`pwd`
-Build/
+cd Build/
 export SCALFMM_BUILD_DIR=`pwd`
 ```
 
-*Output variables:* `SCALFMM_BUILD_DIR` `SCALFMM_SOURCE_DIR`
+*Output variables:* `scalfmmRegisterVariable SCALFMM_BUILD_DIR` `scalfmmRegisterVariable SCALFMM_SOURCE_DIR`
 
 + Configure (No MKL):
 ```bash
@@ -224,7 +234,30 @@ Information for scalfmm binaries
 Examples:
 
 ```
-STARPU_NCPUS=3 STARPU_NCUDA=1 ./Tests/Release/testBlockedUnifCudaBench -nb 10000 -h 3
+export STARPU_NCPUS=12
+export STARPU_NCUDA=2
+./Tests/Release/testBlockedUnifCudaBench -nb 30000000 -h 7 -bs 800
+```
+
+Last part of the output should be:
+```bash
+	Start FGroupTaskStarPUAlgorithm
+		 directPass in 0.0406482s
+			 inblock  in 0.000780428s
+			 outblock in 0.0398674s
+		 bottomPass in 0.00586269s
+		 upwardPass in 0.00265723s
+		 transferPass in 0.00323571s
+			 inblock in  0.000124817s
+			 outblock in 0.00298331s
+		 downardPass in 0.00257975s
+		 transferPass in 0.0652285s
+			 inblock in  0.00164774s
+			 outblock in 0.0635799s
+		 L2P in 0.0115733s
+		 Submitting the tasks took 0.139101s
+		 Moving data to the host took 0.0578765s
+@EXEC TIME = 14.6321s
 ```
 
 + Visualize the execution trace (__Optional__)
@@ -238,6 +271,17 @@ Then visualize the output with vite
 vite ./paje.trace
 ```
 
+Should be like: // IMAGE HERE
+
+We can convert the color of the trace by (it needs Qt5 library):
+
+```bash
+$SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/pajecolor paje.trace $SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/paintmodel.fmm.colors
+vite ./paje.trace.painted
+```
+
+Should be like: // IMAGE HERE
+
 + Get execution times
 
 ```bash
@@ -247,20 +291,37 @@ python $SCALFMM_STARPU_DIR/bin/starpu_trace_state_stats.py -t trace.rec
 Should give something like:
 ```
 "Name","Count","Type","Duration"
-"Initializing",3,"Runtime",5.027746
-"Overhead",37,"Runtime",0.110073
-"Idle",13,"Other",0.03678
-"Scheduling",24,"Runtime",16.529527
-"Sleeping",17,"Other",2197.255516
-"FetchingInput",10,"Runtime",0.012637
-"execute_on_all_wrapper",6,"Task",8.431909
-"PushingOutput",10,"Runtime",16.505568
-"P2P",1,"Task",105.131112
-"Callback",4,"Runtime",0.001048
-"Deinitializing",3,"Runtime",0.014547
-"P2M",1,"Task",2.543303
-"L2P",1,"Task",5.649106
-"M2L-level-2",1,"Task",2.167273
+"Initializing",14,"Runtime",7153.096196
+"Overhead",57010,"Runtime",376.473463
+"Idle",14355,"Other",12.815899
+"Scheduling",28441,"Runtime",238.367394
+"Sleeping",610,"Other",13786.513208
+"FetchingInput",14341,"Runtime",13918.805814
+"execute_on_all_wrapper",30,"Task",21.288802
+"Executing",414,"Runtime",26852.864578
+"PushingOutput",14341,"Runtime",284.96123
+"P2P-out",3846,"Task",60378.266619
+"Callback",13559,"Runtime",4.210633
+"P2P",328,"Task",15383.426991
+"M2L-level-5",41,"Task",2354.702554
+"M2L-level-6",328,"Task",18349.915495
+"Deinitializing",14,"Runtime",109.87483
+"M2L-level-4",6,"Task",275.088295
+"P2M",328,"Task",11312.022842
+"M2M-level-5",328,"Task",829.9055
+"M2M-level-4",41,"Task",93.130498
+"M2L-out-level-5",638,"Task",1914.900053
+"M2M-level-3",6,"Task",11.053067
+"M2M-level-2",1,"Task",1.363157
+"M2L-out-level-4",22,"Task",159.580457
+"L2L-level-4",41,"Task",84.554065
+"L2L-level-5",328,"Task",1087.717767
+"M2L-out-level-6",7692,"Task",18322.518045
+"L2P",328,"Task",27146.256793
+"M2L-level-2",1,"Task",2.661235
+"L2L-level-3",6,"Task",11.346978
+"M2L-level-3",1,"Task",47.612555
+"L2L-level-2",1,"Task",1.471873
 ```
 
 Most of the script are in the addon directories
@@ -268,6 +329,8 @@ Most of the script are in the addon directories
 export SCALFMM_AB=$SCALFMM_SOURCE_DIR/Addons/BenchEfficiency/
 ```
 
+*Output variable:* `scalfmmRegisterVariable SCALFMM_AB`
+
 ## Homogeneous Efficiencies
 
 Here we compute the efficiencies for a given test case on CPU only.
@@ -275,62 +338,68 @@ Here we compute the efficiencies for a given test case on CPU only.
 Go in the build dir and create output dir
 ```
 cd $SCALFMM_BUILD_DIR
-mkdir homogeneous
+export SCALFMM_RES_DIR=$SCALFMM_BUILD_DIR/homogeneous
+mkdir $SCALFMM_RES_DIR
 ```
 
+*Output variable:* `scalfmmRegisterVariable SCALFMM_AB`
+
 Set up the configuration variables:
 ```bash
-SCALFMM_NB=10000000
-SCALFMM_H=7
-SCALFMM_MIN_BS=100
-SCALFMM_MAX_BS=3000
-SCALFMM_MAX_NB_CPU=24
+export SCALFMM_NB=10000000
+export SCALFMM_H=7
+export SCALFMM_MIN_BS=100
+export SCALFMM_MAX_BS=10000
+export SCALFMM_MAX_NB_CPU=24
 ```
 
 Find best granularity in sequential and in parallel:
 ```bash
-STARPU_NCPUS=1
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh "./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs" $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
+export STARPU_NCPUS=1
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh "./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs" $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmmExtractKey.sh "@BEST BS" `
 if [[ `which gnuplot | wc -l` == "1" ]] ;  then
     gnuplot -e "filename='seq-bs-search'" $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
-=`$SCALFMM_AB/scalfmmFindBs.sh "./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs" $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh "./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs" $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
 if [[ `which gnuplot | wc -l` == "1" ]] ;  then
     gnuplot -e "filename='par-bs-search'" $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 ```
+In our case we get  and 5385.
 
-Then we compute the efficiency using both granulirities and keep the .rec files.
+*Output variable:* `scalfmmRegisterVariable SCALFMM_BS_CPU_SEQ`  `scalfmmRegisterVariable SCALFMM_BS_CPU_PAR`
 
+Then we compute the efficiency using both granulirities and keep the .rec files:
 ```bash
 source $SCALFMM_AB/execAllHomogeneous.sh
 ```
 
-We should end with all the rec files and their corresponding time files
+We should end with all the .rec files and their corresponding time files
 ```bash
-
+ls $SCALFMM_RES_DIR
 ```
 
-
 We compute the efficiencies
 ```bash
-
+source $SCALFMM_AB/computeHomogeneousEfficiencies
 ```
 
 We end with efficiency for the application and for the operators.
 ```bash
-
+cat $SCALFMM_RES_DIR/efficiencies.txt
 ```
 
 We can plot each of them
 ```bash
-
+source $SCALFMM_AB/plotEfficiencies.sh $SCALFMM_RES_DIR/efficiencies.txt
 ```
 
+Sould give: // IMAGE HERE
+
 
 ## Generating Execution Results
 
@@ -340,32 +409,32 @@ we first want to know the best granularity `-bs`.
 This parameter will certainly not be the same for sequential/parallel/heterogenous configurations.
 
 ```bash
-SCALFMM_NB=10000000
-SCALFMM_H=7
-SCALFMM_MIN_BS=100
-SCALFMM_MAX_BS=3000
-SCALFMM_MAX_NB_CPU=24
-SCALFMM_MAX_NB_GPU=4
+export SCALFMM_NB=10000000
+export SCALFMM_H=7
+export SCALFMM_MIN_BS=100
+export SCALFMM_MAX_BS=3000
+export SCALFMM_MAX_NB_CPU=24
+export SCALFMM_MAX_NB_GPU=4
 ```
 
 ```bash
-STARPU_NCPUS=1
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
+export STARPU_NCPUS=1
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_SEQ=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
 if [[ `which gnuplot | wc -l` == "1" ]] ;  then
     gnuplot -e "filename='seq-bs-search'" $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
-SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
+export SCALFMM_BS_CPU_PAR=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
 if [[ `which gnuplot | wc -l` == "1" ]] ;  then
     gnuplot -e "filename='par-bs-search'" $SCALFMM_AB/scalfmmFindBs.gplot
 fi
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
-SCALFMM_BS_CPU_GPU=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+export SCALFMM_BS_CPU_GPU=`$SCALFMM_AB/scalfmmFindBs.sh -nb $SCALFMM_NB -h $SCALFMM_H $SCALFMM_MIN_BS $SCALFMM_MAX_BS | $SCALFMM_AB/scalfmm_extract_key "@BEST BS" `
 if [[ `which gnuplot | wc -l` == "1" ]] ;  then
     gnuplot -e "filename='cpugpu-bs-search'" $SCALFMM_AB/scalfmmFindBs.gplot
 fi
@@ -373,38 +442,38 @@ fi
 
 Then, we can execute three best configurations, and keep .rec for each of them:
 ```bash
-STARPU_NCPUS=1
-STARPU_NCUDA=0
+export STARPU_NCPUS=1
+export STARPU_NCUDA=0
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_CPU_SEQ
-SCALFMM_SEQ_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
+export SCALFMM_SEQ_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
 mv trace.rec $SCALFMM_SEQ_REC
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=0
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=0
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR
-SCALFMM_PAR_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
+export SCALFMM_PAR_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
 mv trace.rec $SCALFMM_PAR_REC
 
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_GPU
-SCALFMM_PAR_CPU_GPU_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
+export SCALFMM_PAR_CPU_GPU_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
 mv trace.rec $SCALFMM_PAR_CPU_GPU_REC
 ```
 
 And we also want the GPU tasks only on GPU
 ```bash
-STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
-STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
+export STARPU_NCPUS=$SCALFMM_MAX_NB_CPU
+export STARPU_NCUDA=$SCALFMM_MAX_NB_GPU
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_GPU -p2p-m2l-cuda-only
-SCALFMM_PAR_GPU_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA-GPUONLY.rec"
+export SCALFMM_PAR_GPU_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA-GPUONLY.rec"
 mv trace.rec $SCALFMM_PAR_GPU_REC
 ```
 
 And we want the sequential version with parallel granularity:
 ```bash
-STARPU_NCPUS=1
-STARPU_NCUDA=0
+export STARPU_NCPUS=1
+export STARPU_NCUDA=0
 
 ./Tests/Release/testBlockedUnifCudaBench -nb $SCALFMM_NB -h $SCALFMM_H -bs $SCALFMM_BS_CPU_PAR
 SCALFMM_SEQ_CPU_BS_REC="trace-nb_$SCALFMM_NB-h_$SCALFMM_H-bs_$SCALFMM_CPU_SEQ-CPU_$STARPU_NCPUS-GPU_$STARPU_NCUDA.rec"
diff --git a/Addons/BenchEfficiency/scalfmmFindBs.sh b/Addons/BenchEfficiency/scalfmmFindBs.sh
index 3c454832e0c3231625b58d3df8b3a7660b3788de..06a45c402eb3eb094bee6674a692ecda037b38d2 100644
--- a/Addons/BenchEfficiency/scalfmmFindBs.sh
+++ b/Addons/BenchEfficiency/scalfmmFindBs.sh
@@ -5,7 +5,9 @@ function GetExecTime() {
     local res_output=`$1 "$2"`
     >&2 echo "[LOG]     Try to find $3"
     local time_result=`echo "$res_output" | grep "$3" | cut -d'=' -f2 | cut -d's' -f1`
-    # >&2 echo "[LOG] output : $res_output"
+    if [[ $VERBOSE ]] ; then    
+        >&2 echo "[LOG] output : $res_output"
+    fi
     >&2 echo "[LOG]     Done in $time_result"
     echo $time_result
 }
@@ -31,8 +33,10 @@ fi
 echo "You ask to find the best bs for:"
 echo "Command: $1"
 echo "From $2 to $3"
+echo "STARPU_NCPUS = $STARPU_NCPUS"
+echo "STARPU_NCUDA = $STARPU_NCUDA"
 
-outputfile=./bs_bench.data
+outputfile=./benchBs.data
 
 echo "# BS TIME" > $outputfile
 
diff --git a/Addons/BenchEfficiency/seq-bs-search.png b/Addons/BenchEfficiency/seq-bs-search.png
new file mode 100644
index 0000000000000000000000000000000000000000..215e5ce2110aca4e0eb3e4fed032aa1a3e5590fd
Binary files /dev/null and b/Addons/BenchEfficiency/seq-bs-search.png differ
diff --git a/Addons/BenchEfficiency/trace-example-colors.png b/Addons/BenchEfficiency/trace-example-colors.png
new file mode 100644
index 0000000000000000000000000000000000000000..dcefa9fb53660927f1509d64f89254ee03e60dec
Binary files /dev/null and b/Addons/BenchEfficiency/trace-example-colors.png differ
diff --git a/Addons/BenchEfficiency/trace-example.png b/Addons/BenchEfficiency/trace-example.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e466b94ed15a4d0905484425a75de9d390f45d7
Binary files /dev/null and b/Addons/BenchEfficiency/trace-example.png differ