From 9fbafbe576b211532e53ccb8c8faf6f2fdb934bc Mon Sep 17 00:00:00 2001
From: Florent Pruvost <florent.pruvost@inria.fr>
Date: Tue, 31 Jan 2017 11:12:59 +0100
Subject: [PATCH] update texinfo documentation with the current development
 state

---
 docs/texinfo/chapters/configuration.texi |  8 ++---
 docs/texinfo/chapters/installing.texi    | 46 +++++++++++-------------
 docs/texinfo/chapters/using.texi         | 41 ++++++++++++++++++---
 docs/texinfo/users_guide.texi.in         |  2 +-
 docs/texinfo/version.texi.in             |  8 ++---
 5 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/docs/texinfo/chapters/configuration.texi b/docs/texinfo/chapters/configuration.texi
index 7bfcd5edf..a147cf1f9 100644
--- a/docs/texinfo/chapters/configuration.texi
+++ b/docs/texinfo/chapters/configuration.texi
@@ -1,7 +1,7 @@
 @c -*-texinfo-*-
 
 @c This file is part of the MORSE Handbook.
-@c Copyright (C) 2014 Inria
+@c Copyright (C) 2017 Inria
 @c Copyright (C) 2014 The University of Tennessee
 @c Copyright (C) 2014 King Abdullah University of Science and Technology
 @c See the file ../chameleon.texi for copying conditions.
@@ -140,10 +140,6 @@ execution time on any architecture.
 This feature should be used to make experiments on the scheduler behaviors and
 performances not to produce solutions of linear systems.
 
-@item @option{-DCHAMELEON_SIMULATION_MAGMA=trigger} (default: @code{OFF})
-when using simulation mode (see CHAMELEON_SIMULATION), one can decide to enable
-the magma kernels or not.
-
 @item @option{-DCHAMELEON_ENABLE_DOCS=trigger} (default: @code{ON})
 to control build of the documentation contained in @file{docs/} sub-directory
 @item @option{-DCHAMELEON_ENABLE_EXAMPLE=trigger} (default: @code{ON})
@@ -318,6 +314,8 @@ The trace file will be named paje.trace (use -o option to specify an output
 name).
 @end itemize
 
+Alternatively, one can also generate directly .paje trace files after the execution
+by setting @env{STARPU_GENERATE_TRACE=1}.
 
 @node Use simulation mode with StarPU-SimGrid
 @section Use simulation mode with StarPU-SimGrid
diff --git a/docs/texinfo/chapters/installing.texi b/docs/texinfo/chapters/installing.texi
index d46c112ce..fec50baab 100644
--- a/docs/texinfo/chapters/installing.texi
+++ b/docs/texinfo/chapters/installing.texi
@@ -1,7 +1,7 @@
 @c -*-texinfo-*-
 
 @c This file is part of the CHAMELEON Handbook.
-@c Copyright (C) 2014 Inria
+@c Copyright (C) 2017 Inria
 @c Copyright (C) 2014 The University of Tennessee
 @c Copyright (C) 2014 King Abdullah University of Science and Technology
 @c See the file ../chameleon.texi for copying conditions.
@@ -33,11 +33,10 @@ to install CHAMELEON.
 
 The latest official release tarballs of CHAMELEON sources are available for
 download from
-@uref{http://morse.gforge.inria.fr/chameleon-0.9.1.tar.gz, chameleon-0.9.1}.
+@uref{https://gforge.inria.fr/frs/download.php/file/34884/chameleon-0.9.1.tar.gz, chameleon-0.9.1}.
 
-@c The latest development snapshot is available from
-@c @uref{http://hydra.bordeaux.inria.fr/job/hiepacs/morse-cmake/tarball/latest/
-@c download-by-type/file/source-dist}.
+The latest development snapshot is available on gitlab:
+@uref{https://gitlab.inria.fr/solverstack/chameleon}
 
 @node Required dependencies
 @subsection Required dependencies
@@ -66,14 +65,13 @@ Both these implementations are reference implementation of BLAS, are not
 optimized for modern processor architectures and provide an order of magnitude
 lower performance than optimized implementations.
 Highly optimized implementations of BLAS are available from many hardware
-vendors, such as Intel MKL and AMD ACML.
+vendors, such as Intel MKL, IBM ESSL and AMD ACML.
 Fast implementations are also available as academic packages, such as ATLAS and
-Goto BLAS.
+OpenBLAS.
 The standard interface to BLAS is the FORTRAN interface.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference BLAS from NETLIB and the Intel MKL 11.1 from Intel distribution
-2013_sp1.
+the reference BLAS from NETLIB, OpenBLAS and Intel MKL.
 
 @node CBLAS
 @subsubsection CBLAS
@@ -86,8 +84,7 @@ Netlib provides a reference implementation of CBLAS on top of FORTRAN BLAS
 Since GSL is implemented in C, it naturally provides CBLAS.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference CBLAS from NETLIB and the Intel MKL 11.1 from Intel distribution
-2013_sp1.
+the reference CBLAS from NETLIB, OpenBLAS and Intel MKL.
 
 @node a LAPACK implementation
 @subsubsection a LAPACK implementation
@@ -100,8 +97,7 @@ square problems, eigenvalue problems and singular value problems.
 Most commercial and academic BLAS packages also provide some LAPACK routines.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference LAPACK from NETLIB and the Intel MKL 11.1 from Intel distribution
-2013_sp1.
+the reference LAPACK from NETLIB, OpenBLAS and Intel MKL.
 
 @node LAPACKE
 @subsubsection LAPACKE
@@ -116,8 +112,7 @@ which automatically handle workspace allocation, making the use of LAPACK much
 more convenient.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference LAPACKE from NETLIB.
-A stand-alone version of LAPACKE is required.
+the reference LAPACKE from NETLIB, OpenBLAS and Intel MKL.
 
 @node libtmg
 @subsubsection libtmg
@@ -129,8 +124,7 @@ The testing and timing suites of LAPACK require libtmg, but not the library
 itself. Note that the LAPACK library can be built and used without libtmg.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference TMG from NETLIB and the Intel MKL 11.1 from Intel distribution
-2013_sp1.
+the reference TMGLIB from NETLIB, OpenBLAS and Intel MKL.
 
 @node QUARK
 @subsubsection QUARK
@@ -147,7 +141,7 @@ CUDA (for GPUs) nor MPI (distributed-memory environment).
 You can use StarPU to do so.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the QUARK library from PLASMA release between versions 2.5.0 and 2.6.0.
+the QUARK library 0.9.
 
 @node StarPU
 @subsubsection StarPU
@@ -169,7 +163,7 @@ If StarPU is enabled then QUARK is disabled and conversely.
 Note StarPU is enabled by default.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-StarPU-1.1 releases.
+StarPU-1.1 and 1.2 releases.
 
 @node hwloc
 @subsubsection hwloc
@@ -214,8 +208,8 @@ MPI can be enabled only if the runtime system chosen is StarPU (default).
 To use MPI through StarPU, it is necessary to compile StarPU with MPI
 enabled.
 
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-OpenMPI releases from versions 1.4 to 1.6.
+@strong{Caution about the compatibility:} OpenMPI should be built with the
+--enable-mpi-thread-multiple option.
 
 @node Nvidia CUDA Toolkit
 @subsubsection Nvidia CUDA Toolkit
@@ -236,8 +230,8 @@ To use CUDA through StarPU, it is necessary to compile StarPU with CUDA
 enabled.
 
 @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-CUDA releases from versions 4 to 6.
-MAGMA library must be compatible with CUDA.
+CUDA releases from versions 4 to 7.5.
+Your compiler and MAGMA library must be compatible with CUDA.
 
 @node MAGMA
 @subsubsection MAGMA
@@ -255,8 +249,8 @@ MAGMA releases from versions 1.4 to 1.6.
 MAGMA library must be compatible with CUDA.
 MAGMA library should be built with sequential versions of BLAS/LAPACK.
 We should not get some MAGMA link flags embarking multithreaded
-BLAS/LAPACK because it could affect permformances (take care about the
-MAGMA link flag @option{-lmkl_intel_thread} for example that we could heritate
+BLAS/LAPACK because it could affect performances (take care about the
+MAGMA link flag @option{-lmkl_intel_thread} for example that we could inheritate
 from the pkg-config file @file{magma.pc}).
 
 @node FxT
@@ -291,7 +285,7 @@ version of StarPU used.
 The CHAMELEON build process requires CMake version 2.8.0 or higher and
 working C and Fortran compilers.
 Compilation and link with CHAMELEON libraries have been tested with
-@strong{gcc/gfortran 4.8.1} and @strong{icc/ifort 14.0.2}.
+@strong{gcc/gfortran} and @strong{icc/ifort}.
 On Unix-like operating systems, it also requires Make.
 The CHAMELEON project can not be configured for an in-source build.
 You will get an error message if you try to compile in-source.
diff --git a/docs/texinfo/chapters/using.texi b/docs/texinfo/chapters/using.texi
index c8bb2125c..47bf96ddf 100644
--- a/docs/texinfo/chapters/using.texi
+++ b/docs/texinfo/chapters/using.texi
@@ -307,7 +307,9 @@ a code that most users should easily understand.
 Then, the different interfaces CHAMELEON provides are exposed, from the
 simplest API (step1) to more complicated ones (until step4).
 The way some important parameters are set is discussed in step5.
-Finally step6 is an example about distributed computation with MPI.
+step6 is an example about distributed computation with MPI.
+Finally step7 shows how to let Chameleon initialize user's data
+(matrices/vectors) in parallel.
 
 Source files can be found in the @file{example/lapack_to_morse/}
 directory.
@@ -332,12 +334,13 @@ Lets comment the different steps of the tutorial
 @menu
 * Step0:: a simple Cholesky example using the C interface of
 BLAS/LAPACK
-* Step1:: introduces the LAPACK equivalent interface of MORSE
+* Step1:: introduces the LAPACK equivalent interface of Chameleon
 * Step2:: introduces the tile interface
-* Step3:: indicates how to give your own tile matrix to MORSE
+* Step3:: indicates how to give your own tile matrix to Chameleon
 * Step4:: introduces the tile async interface
 * Step5:: shows how to set some important parameters
-* Step6:: introduces how to benefit from MPI in MORSE.
+* Step6:: introduces how to benefit from MPI in Chameleon
+* Step7:: introduces how to let Chameleon initialize the user's matrix data
 @end menu
 
 @node Step0
@@ -708,6 +711,36 @@ but they have sense only for 2-D block-cyclic distribution and then using
 Of course it could be used with other distributions, being no more the
 parameters of a 2-D block-cyclic grid but of another distribution.
 
+@node Step7
+@subsubsection Step7
+
+This program is a copy of step6 with some additional calls to
+build a matrix from within chameleon using a function provided by the user.
+This can be seen as a replacement of the function like @code{MORSE_dplgsy_Tile()} that can be used
+to fill the matrix with random data, @code{MORSE_dLapack_to_Tile()} to fill the matrix
+with data stored in a lapack-like buffer, or @code{MORSE_Desc_Create_User()} that can be used
+to describe an arbitrary tile matrix structure.
+In this example, the build callback function are just wrapper towards @code{CORE_xxx()} functions, so the output
+of the program step7 should be exactly similar to that of step6.
+The difference is that the function used to fill the tiles is provided by the user,
+and therefore this approach is much more flexible.
+
+The new function to understand is @code{MORSE_dbuild_Tile}, e.g.
+@verbatim
+struct data_pl data_A={(double)N, 51, N};
+MORSE_dbuild_Tile(MorseUpperLower, descA, (void*)&data_A, Morse_build_callback_plgsy);
+@end verbatim
+The idea here is to let Chameleon fill the matrix data in a task-based fashion
+(parallel) by using a function given by the user.
+First, the user should define if all the blocks must be entirelly filled or just
+the upper/lower part with, e.g. @code{MorseUpperLower}.
+We still relies on the same structure @code{MORSE_desc_t} which must be
+initialized with the proper parameters, by calling for example
+@code{MORSE_Desc_Create}.
+Then, an opaque pointer is used to let the user give some extra data used by
+his function.
+The last parameter is the pointer to the user's function.
+
 @node List of available routines
 @subsection List of available routines
 
diff --git a/docs/texinfo/users_guide.texi.in b/docs/texinfo/users_guide.texi.in
index 43d4bf688..79051a895 100644
--- a/docs/texinfo/users_guide.texi.in
+++ b/docs/texinfo/users_guide.texi.in
@@ -10,7 +10,7 @@
 @c #############################################################################
 
 @copying
-Copyright @copyright{} 2014 Inria
+Copyright @copyright{} 2017 Inria
 
 @noindent
 Copyright @copyright{} 2014 The University of Tennessee
diff --git a/docs/texinfo/version.texi.in b/docs/texinfo/version.texi.in
index cb5ee79fc..4af718ef2 100644
--- a/docs/texinfo/version.texi.in
+++ b/docs/texinfo/version.texi.in
@@ -1,4 +1,4 @@
-@set UPDATED 02 November 2015
-@set UPDATED-MONTH November 2015
-@set EDITION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_PATCH@
-@set VERSION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_PATCH@
+@set UPDATED 30 January 2017
+@set UPDATED-MONTH January 2017
+@set EDITION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_MICRO@
+@set VERSION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_MICRO@
-- 
GitLab