diff --git a/READMEDEV.org b/READMEDEV.org
index 486662318f704fa4113176ee322d308cf50ddbd3..7c7866be45d4b1bb6ba95f32b5083d2ba352e3c6 100644
--- a/READMEDEV.org
+++ b/READMEDEV.org
@@ -31,11 +31,11 @@ developers must follow and that should be read by contributors.
 *** Prerequisites
 
      To generate the documentation you need to have [[http://www.stack.nl/~dimitri/doxygen/][Doxygen]] and
-     [[https://www.gnu.org/software/texinfo/][Texinfo]] installed on your system.
+     [[https://orgmode.org/][org-mode]] installed on your system.
 
      For example, on Debian systems:
      #+begin_src sh
-     sudo apt install doxygen texinfo texlive texlive-latex-extra emacs
+     sudo apt install doxygen org-mode texlive texlive-latex-extra emacs
      #+end_src
 
 *** configure + make documentation
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 3f96ef056b0a3663480f4747e1a80f9f39707d83..dc2b7c8fcd9b450ea770147cede32614bbfd0803 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -35,7 +35,6 @@ cmake_minimum_required(VERSION 2.8)
 #############################################
 add_subdirectory(doxygen)
 add_subdirectory(orgmode)
-#add_subdirectory(texinfo)
 ###
 ### END CMakeLists.txt
 ###
diff --git a/doc/texinfo/CMakeLists.txt b/doc/texinfo/CMakeLists.txt
deleted file mode 100644
index 155151b895e1ee27e97a5fc395900081d0639781..0000000000000000000000000000000000000000
--- a/doc/texinfo/CMakeLists.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-###
-#
-# @file CMakeLists.txt
-#
-# @copyright 2009-2014 The University of Tennessee and The University of
-#                      Tennessee Research Foundation. All rights reserved.
-# @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
-#                      Univ. Bordeaux. All rights reserved.
-#
-###
-#
-#  @project MORSE
-#  MORSE is a software package provided by:
-#     Inria Bordeaux - Sud-Ouest,
-#     Univ. of Tennessee,
-#     King Abdullah Univesity of Science and Technology
-#     Univ. of California Berkeley,
-#     Univ. of Colorado Denver. 
-#
-# @version 1.0.0
-#  @author Cedric Castagnede
-#  @author Emmanuel Agullo
-#  @author Mathieu Faverge
-#  @author Florent Pruvost
-#  @date 2012-07-13
-#
-###
-
-cmake_minimum_required(VERSION 2.8)
-
-# Create file version.texi
-# ------------------------
-configure_file("version.texi.in"
-               "version.texi"
-               @ONLY)
-configure_file("users_guide.texi.in"
-               "users_guide.texi"
-               @ONLY)
-
-set(FIGURES 
-    tile_lu.pdf
-    tile_lu.jpg
-    tile_layout.pdf
-    tile_layout.jpg
-    trace_qr.pdf
-    trace_qr.jpg
-    potri_async.png
-    morse_header.png
-    )
-set(FIGURES_HTML 
-    tile_lu.jpg
-    tile_layout.jpg
-    trace_qr.jpg
-    potri_async.png
-    morse_header.png
-    )
-
-foreach(_fig ${FIGURES})
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/figures/${_fig}
-                   ${CMAKE_CURRENT_BINARY_DIR}/${_fig}
-                   COPYONLY)
-endforeach()
-
-# Looking which version we can compile
-# ------------------------------------
-FIND_PROGRAM(MAKEINFO_COMPILER makeinfo)
-FIND_PROGRAM(TEXI2DVI_COMPILER texi2dvi)
-FIND_PROGRAM(TEX_COMPILER tex)
-FIND_PROGRAM(DOT_COMPILER dot)
-
-# Looking for makeinfo
-# --------------------
-if(MAKEINFO_COMPILER)
-    # Add target
-    # ----------
-    add_custom_command(OUTPUT  users_guide.info
-                       COMMAND ${MAKEINFO_COMPILER}
-                       ARGS    users_guide.texi
-                       DEPENDS users_guide.texi.in
-                      )
-    add_custom_command(OUTPUT  users_guide.html
-                       COMMAND ${MAKEINFO_COMPILER}
-                       ARGS    --html
-                               --no-split
-                               --css-include=${CMAKE_CURRENT_SOURCE_DIR}/morse.css
-                               users_guide.texi
-                       DEPENDS users_guide.texi.in
-                      )
-    add_custom_target(doc-info-users_guide ALL DEPENDS users_guide.info)
-    add_custom_target(doc-html-users_guide ALL DEPENDS users_guide.html)
-
-    # Installation
-    # ------------
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.info
-            DESTINATION share/chameleon/info)
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.html
-            DESTINATION share/chameleon/html)
-    foreach(_fig ${FIGURES_HTML})
-        install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${_fig}
-                DESTINATION share/chameleon/html)
-    endforeach()
-
-else(MAKEINFO_COMPILER)
-    message(STATUS "Looking for makeinfo - not found")
-endif(MAKEINFO_COMPILER)
-
-# Looking for texi2dvi
-# --------------------
-if(TEXI2DVI_COMPILER AND TEX_COMPILER)
-    # Add target
-    # ----------
-    add_custom_command(OUTPUT  users_guide.pdf
-                       COMMAND ${TEXI2DVI_COMPILER}
-                       ARGS    --pdf
-                               --batch
-                               users_guide.texi
-                       DEPENDS users_guide.texi.in
-                      )
-    add_custom_target(doc-pdf-users_guide ALL DEPENDS users_guide.pdf)
-
-    # Installation
-    # ------------
-    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.pdf
-            DESTINATION share/chameleon/pdf)
-
-else()
-    message(STATUS "Looking for texi2dvi - not found")
-endif()
-
-###
-### END CMakeLists.txt
-###
diff --git a/doc/texinfo/chapters/configuration.texi b/doc/texinfo/chapters/configuration.texi
deleted file mode 100644
index df23adb6f3088b931be4a876cc546b360ac78cc0..0000000000000000000000000000000000000000
--- a/doc/texinfo/chapters/configuration.texi
+++ /dev/null
@@ -1,360 +0,0 @@
-@c -*-texinfo-*-
-
-@c This file is part of the MORSE Handbook.
-@c Copyright (C) 2017 Inria
-@c Copyright (C) 2014 The University of Tennessee
-@c Copyright (C) 2014 King Abdullah University of Science and Technology
-@c See the file ../chameleon.texi for copying conditions.
-
-@menu
-* Compilation configuration::
-* Dependencies detection::
-@c * Dependencies compilation::
-* Use FxT profiling through StarPU::
-* Use simulation mode with StarPU-SimGrid::
-* Use out of core support with StarPU::
-@end menu
-
-@c @code{} @option{}
-@c @table @code
-@c @item truc
-@c @item muche
-@c @item et zut
-@c @c @end table
-
-@node Compilation configuration
-@section Compilation configuration
-
-The following arguments can be given to the @command{cmake <path to source
-directory>} script.
-
-In this chapter, the following convention is used:
-@itemize @bullet
-@item
-@option{path} is a path in your filesystem,
-@item
-@option{var} is a string and the correct value or an example will be given,
-@item
-@option{trigger} is an CMake option and the correct value is @code{ON} or
-@code{OFF}.
-@end itemize
-
-Using CMake there are several ways to give options:
-@enumerate
-@item directly as CMake command line arguments
-@item invoque @command{cmake <path to source directory>} once and then use
-@command{ccmake <path to source directory>} to edit options through a
-minimalist gui (required
-@samp{cmake-curses-gui} installed on a Linux system)
-@item invoque @command{cmake-gui} command and fill information about the
-location of the sources and where to build the project, then you have
-access to options through a user-friendly Qt interface (required
-@samp{cmake-qt-gui} installed on a Linux system)
-@end enumerate
-
-Example of configuration using the command line
-@example
-cmake ~/chameleon/ -DCMAKE_BUILD_TYPE=Debug          \
-                   -DCMAKE_INSTALL_PREFIX=~/install  \
-                   -DCHAMELEON_USE_CUDA=ON           \
-                   -DCHAMELEON_USE_MPI=ON            \
-                   -DBLA_VENDOR=Intel10_64lp         \
-                   -DSTARPU_DIR=~/install/starpu-1.1 \
-                   -DCHAMELEON_ENABLE_TRACING=ON
-@end example
-
-You can get the full list of options with @option{-L[A][H]} options of
-@command{cmake} command:
-@example
-cmake -LH <path to source directory>
-@end example
-
-@menu
-* General CMake options::
-* CHAMELEON options::
-@end menu
-
-@node General CMake options
-@subsection General CMake options
-
-@table @code
-
-@item -DCMAKE_INSTALL_PREFIX=@option{path} (default:@option{path=/usr/local})
-Install directory used by @code{make install} where some headers and libraries
-will be copied.
-Permissions have to be granted to write onto @option{path} during @code{make
-install} step.
-
-@item -DCMAKE_BUILD_TYPE=@option{var} (default: @option{Release})
-Define the build type and the compiler optimization level.
-The possible values for @option{var} are:
-@table @code
-@item empty
-@item Debug
-@item Release
-@item RelWithDebInfo
-@item MinSizeRel
-@end table
-
-@item -DBUILD_SHARED_LIBS=@option{trigger} (default:@option{OFF})
-Indicate wether or not CMake has to build CHAMELEON static (@option{OFF}) or
-shared (@option{ON}) libraries.
-
-@end table
-
-@node CHAMELEON options
-@subsection CHAMELEON options
-
-List of CHAMELEON options that can be enabled/disabled (value=@code{ON}
-or @code{OFF}):
-@table @code
-
-@item @option{-DCHAMELEON_SCHED_STARPU}=@option{trigger} (default: @code{ON})
-to link with StarPU library (runtime system)
-
-@item @option{-DCHAMELEON_SCHED_QUARK}=@option{trigger} (default: @code{OFF})
-to link with QUARK library (runtime system)
-
-@item @option{-DCHAMELEON_USE_CUDA}=@option{trigger} (default: @code{OFF})
-to link with CUDA runtime (implementation paradigm for accelerated codes on
-GPUs) and cuBLAS library (optimized BLAS kernels on GPUs), can only be used with
-StarPU
-
-@item @option{-DCHAMELEON_USE_MPI}=@option{trigger} (default: @code{OFF})
-to link with MPI library (message passing implementation for use of multiple
-nodes with distributed memory), can only be used with StarPU
-
-@item @option{-DCHAMELEON_ENABLE_TRACING}=@option{trigger} (default: @code{OFF})
-to enable trace generation during execution of timing drivers.
-It requires StarPU to be linked with FxT library (trace execution of kernels on workers).
-
-@item @option{-DCHAMELEON_SIMULATION=trigger} (default: @code{OFF})
-to enable simulation mode, means CHAMELEON will not really execute tasks,
-see details in section @ref{Use simulation mode with StarPU-SimGrid}.
-This option must be used with StarPU compiled with
-@uref{http://simgrid.gforge.inria.fr/, SimGrid} allowing to guess the
-execution time on any architecture.
-This feature should be used to make experiments on the scheduler behaviors and
-performances not to produce solutions of linear systems.
-
-@item @option{-DCHAMELEON_ENABLE_DOCS=trigger} (default: @code{ON})
-@item @option{-DCHAMELEON_ENABLE_EXAMPLE=trigger} (default: @code{ON})
-to control build of the examples executables (API usage)
-@item @option{-DCHAMELEON_ENABLE_TESTING=trigger} (default: @code{ON})
-to control build of testing executables (numerical check) contained in
-@item @option{-DCHAMELEON_ENABLE_TIMING=trigger} (default: @code{ON})
-to control build of timing executables (performances check) contained in
-
-@item @option{-DCHAMELEON_PREC_S=trigger} (default: @code{ON})
-to enable the support of simple arithmetic precision (float in C)
-@item @option{-DCHAMELEON_PREC_D=trigger} (default: @code{ON})
-to enable the support of double arithmetic precision (double in C)
-@item @option{-DCHAMELEON_PREC_C=trigger} (default: @code{ON})
-to enable the support of complex arithmetic precision (complex in C)
-@item @option{-DCHAMELEON_PREC_Z=trigger} (default: @code{ON})
-to enable the support of double complex arithmetic precision (double complex
-in C)
-
-@item @option{-DBLAS_VERBOSE=trigger} (default: @code{OFF})
-to make BLAS library discovery verbose
-@item @option{-DLAPACK_VERBOSE=trigger} (default: @code{OFF})
-to make LAPACK library discovery verbose (automatically enabled if
-@option{BLAS_VERBOSE=@code{ON}})
-@end table
-
-List of CHAMELEON options that needs a specific value:
-@table @code
-@item @option{-DBLA_VENDOR=@option{var}} (default: @option{empty})
-The possible values for @option{var} are:
-@table @code
-@item empty
-@item all
-@item Intel10_64lp
-@item Intel10_64lp_seq
-@item ACML
-@item Apple
-@item Generic
-@item ...
-@end table
-to force CMake to find a specific BLAS library, see the full list of BLA_VENDOR
-By default @option{BLA_VENDOR} is empty so that CMake tries to detect all
-possible BLAS vendor with a preference for Intel MKL.
-@end table
-
-List of CHAMELEON options which requires to give a path:
-@table @code
-@item @option{-DLIBNAME_DIR=@option{path}} (default: empty)
-root directory of the LIBNAME library installation
-@item @option{-DLIBNAME_INCDIR=@option{path}} (default: empty)
-directory of the LIBNAME library headers installation
-@item @option{-DLIBNAME_LIBDIR=@option{path}} (default: empty)
-directory of the LIBNAME libraries (.so, .a, .dylib, etc) installation
-@end table
-LIBNAME can be one of the following: BLAS - CBLAS - FXT - HWLOC -
-LAPACK - LAPACKE - QUARK - STARPU - TMG.
-See paragraph about @ref{Dependencies detection} for details.
-
-Libraries detected with an official CMake module (see module files in
-@itemize @bullet
-@item CUDA
-@item MPI
-@item Threads
-@end itemize
-
-Libraries detected with CHAMELEON cmake modules (see module files in
-@itemize @bullet
-@item BLAS
-@item CBLAS
-@item FXT
-@item HWLOC
-@item LAPACK
-@item LAPACKE
-@item QUARK
-@item STARPU
-@item TMG
-@end itemize
-
-
-@node Dependencies detection
-@section Dependencies detection
-You have different choices to detect dependencies on your system, either by
-setting some environment variables containing paths to the libs and headers or
-by specifying them directly at cmake configure.
-Different cases :
-@enumerate
-@item detection of dependencies through environment variables:
-  @itemize @bullet
-  @item @env{LD_LIBRARY_PATH} environment variable should contain the list of
-paths
-where to find the libraries:
-    @example
-    export @env{LD_LIBRARY_PATH}=$@env{LD_LIBRARY_PATH}:path/to/your/libs
-    @end example
-  @item @env{INCLUDE} environment variable should contain the list of paths
-where to find the header files of libraries
-    @example
-    export @env{INCLUDE}=$@env{INCLUDE}:path/to/your/headers
-    @end example
-  @end itemize
-
-@item detection with user's given paths:
-  @itemize @bullet
-  @item you can specify the path at cmake configure by invoking
-  @example
-  cmake <path to SOURCE_DIR> -DLIBNAME_DIR=path/to/your/lib
-  @end example
-  where LIB stands for the name of the lib to look for, example
-  @example
-  cmake <path to SOURCE_DIR> -DSTARPU_DIR=path/to/starpudir \
-                             -DCBLAS_DIR= ...
-  @end example
-  @item it is also possible to specify headers and library directories
-separately, example
-  @example
-  cmake <path to SOURCE_DIR>                           \
-  -DSTARPU_INCDIR=path/to/libstarpu/include/starpu/1.1 \
-  -DSTARPU_LIBDIR=path/to/libstarpu/lib
-  @end example
-  @item Note BLAS and LAPACK detection can be tedious so that we provide a
-verbose mode. Use @option{-DBLAS_VERBOSE=ON} or @option{-DLAPACK_VERBOSE=ON} to
-enable it.
-  @end itemize
-
-@end enumerate
-
-
-@c @node Dependencies compilation
-@c @section Dependencies compilation
-
-@node Use FxT profiling through StarPU
-@section Use FxT profiling through StarPU
-
-StarPU can generate its own trace log files by compiling it with the
-@option{--with-fxt}
-option at the configure step (you can have to specify the directory where you
-installed FxT by giving @option{--with-fxt=...} instead of @option{--with-fxt}
-alone).
-By doing so, traces are generated after each execution of a program which uses
-StarPU in the directory pointed by the @env{STARPU_FXT_PREFIX} environment
-variable. Example:
-@example
-export @env{STARPU_FXT_PREFIX}=/home/yourname/fxt_files/
-@end example
-
-When executing a @command{./timing/...} CHAMELEON program, if it has been
-enabled (StarPU compiled with FxT and @option{-DCHAMELEON_ENABLE_TRACING=ON}), you
-can give the option @option{--trace} to tell the program to generate trace log
-files.
-
-Finally, to generate the trace file which can be opened with
-@uref{http://vite.gforge.inria.fr/, Vite} program, you have to use the
-@command{starpu_fxt_tool} executable of StarPU.
-You can use it to generate the trace file like this:
-@itemize @bullet
-@item @command{path/to/your/install/starpu/bin/starpu_fxt_tool -i prof_filename}
-
-There is one file per mpi processus (prof_filename_0, prof_filename_1 ...).
-To generate a trace of mpi programs you can call it like this:
-@item @command{path/to/your/install/starpu/bin/starpu_fxt_tool -i
-prof_filename*}
-
-The trace file will be named paje.trace (use -o option to specify an output
-name).
-@end itemize
-
-Alternatively, one can also generate directly .paje trace files after the execution
-by setting @env{STARPU_GENERATE_TRACE=1}.
-
-@node Use simulation mode with StarPU-SimGrid
-@section Use simulation mode with StarPU-SimGrid
-
-Simulation mode can be enabled by setting the cmake option
-@option{-DCHAMELEON_SIMULATION=ON}.
-This mode allows you to simulate execution of algorithms with StarPU compiled
-with @uref{http://simgrid.gforge.inria.fr/, SimGrid}.
-directory of CHAMELEON sources.
-To use these perfmodels, please set the following
-@itemize @bullet
-@item @env{STARPU_HOME} environment variable to:
-  @example
-  @code{<path to SOURCE_DIR>/simucore/perfmodels}
-  @end example
-@item @env{STARPU_HOSTNAME} environment variable to the name of the machine to
-simulate. For example, on our platform (PlaFRIM) with GPUs at Inria Bordeaux
-  @example
-  @env{STARPU_HOSTNAME}=mirage
-  @end example
-Note that only POTRF kernels with block sizes of 320 or 960 (simple and double
-precision) on mirage machine are available for now.
-Database of models is subject to change, it should be enrich in a near future.
-@end itemize
-
-@node Use out of core support with StarPU
-@section Use out of core support with StarPU
-
-If the matrix can not fit in the main memory, StarPU can automatically evict
-tiles to the disk.  The descriptors for the matrices which can not fit in the
-main memory need to be created with @code{MORSE_Desc_Create_OOC}, so that MORSE
-does not force StarPU to keep it in the main memory.
-
-The following variables then need to be set:
-@itemize @bullet
-@item @env{STARPU_DISK_SWAP} environment variable to a place where to store
-evicted tiles, for example:
-  @example
-  @env{STARPU_DISK_SWAP}=/tmp
-  @end example
-@item @env{STARPU_DISK_SWAP_BACKEND} environment variable to the I/O method,
-for example:
-  @example
-  @env{STARPU_DISK_SWAP_BACKEND}=unistd_o_direct
-  @end example
-This will create a hierarchy of directory to store one file per tile. If that
-poses problems, one can use the hdf5 I/O method which stores all tiles in a
-single file.
-@item @env{STARPU_LIMIT_CPU_MEM} environment variable to the amount of memory
-that can be used in MBytes, for example:
-  @example
-  @env{STARPU_LIMIT_CPU_MEM}=1000
-  @end example
-@end itemize
diff --git a/doc/texinfo/chapters/installing.texi b/doc/texinfo/chapters/installing.texi
deleted file mode 100644
index f056c1ef319dfd9bf1b976aadbc01d0613773a47..0000000000000000000000000000000000000000
--- a/doc/texinfo/chapters/installing.texi
+++ /dev/null
@@ -1,330 +0,0 @@
-@c -*-texinfo-*-
-
-@c This file is part of the CHAMELEON Handbook.
-@c Copyright (C) 2017 Inria
-@c Copyright (C) 2014 The University of Tennessee
-@c Copyright (C) 2014 King Abdullah University of Science and Technology
-@c See the file ../chameleon.texi for copying conditions.
-
-@menu
-* Downloading CHAMELEON::
-* Build process of CHAMELEON::
-@end menu
-
-CHAMELEON can be built and installed by the standard means of CMake
-(@uref{http://www.cmake.org/}).
-General information about CMake, as well as installation binaries and CMake
-source code are available from
-@uref{http://www.cmake.org/cmake/resources/software.html}.
-The following chapter is intended to briefly remind how these tools can be used
-to install CHAMELEON.
-
-@node Downloading CHAMELEON
-@section Downloading CHAMELEON
-
-@menu
-* Getting Sources::
-* Required dependencies::
-* Optional dependencies::
-@end menu
-
-@node Getting Sources
-@subsection Getting Sources
-
-The latest official release tarballs of CHAMELEON sources are available for
-download from
-@uref{https://gforge.inria.fr/frs/download.php/file/34884/chameleon-0.9.1.tar.gz, chameleon-0.9.1}.
-
-The latest development snapshot is available on gitlab:
-@uref{https://gitlab.inria.fr/solverstack/chameleon}
-
-@node Required dependencies
-@subsection Required dependencies
-
-@menu
-* a BLAS implementation::
-* CBLAS::
-* a LAPACK implementation::
-* LAPACKE::
-* libtmg::
-* QUARK::
-* StarPU::
-* hwloc::
-* pthread::
-@end menu
-
-@node a BLAS implementation
-@subsubsection a BLAS implementation
-
-@uref{http://www.netlib.org/blas/, BLAS} (Basic Linear Algebra Subprograms),
-are a de facto standard for basic linear algebra operations such as vector and
-matrix multiplication.
-FORTRAN implementation of BLAS is available from Netlib.
-Also, C implementation of BLAS is included in GSL (GNU Scientific Library).
-Both these implementations are reference implementation of BLAS, are not
-optimized for modern processor architectures and provide an order of magnitude
-lower performance than optimized implementations.
-Highly optimized implementations of BLAS are available from many hardware
-vendors, such as Intel MKL, IBM ESSL and AMD ACML.
-Fast implementations are also available as academic packages, such as ATLAS and
-OpenBLAS.
-The standard interface to BLAS is the FORTRAN interface.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference BLAS from NETLIB, OpenBLAS and Intel MKL.
-
-@node CBLAS
-@subsubsection CBLAS
-
-@uref{http://www.netlib.org/blas/#_cblas, CBLAS} is a C language interface to
-BLAS.
-Most commercial and academic implementations of BLAS also provide CBLAS.
-Netlib provides a reference implementation of CBLAS on top of FORTRAN BLAS
-(Netlib CBLAS).
-Since GSL is implemented in C, it naturally provides CBLAS.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference CBLAS from NETLIB, OpenBLAS and Intel MKL.
-
-@node a LAPACK implementation
-@subsubsection a LAPACK implementation
-
-@uref{http://www.netlib.org/lapack/, LAPACK} (Linear Algebra PACKage) is a
-software library for numerical linear algebra, a successor of LINPACK and
-EISPACK and a predecessor of CHAMELEON.
-LAPACK provides routines for solving linear systems of equations, linear least
-square problems, eigenvalue problems and singular value problems.
-Most commercial and academic BLAS packages also provide some LAPACK routines.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference LAPACK from NETLIB, OpenBLAS and Intel MKL.
-
-@node LAPACKE
-@subsubsection LAPACKE
-
-@uref{http://www.netlib.org/lapack/, LAPACKE} is a C language interface to
-LAPACK (or CLAPACK).
-It is produced by Intel in coordination with the LAPACK team and is available
-in source code from Netlib in its original version (Netlib LAPACKE) and from
-CHAMELEON website in an extended version (LAPACKE for CHAMELEON).
-In addition to implementing the C interface, LAPACKE also provides routines
-which automatically handle workspace allocation, making the use of LAPACK much
-more convenient.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference LAPACKE from NETLIB, OpenBLAS and Intel MKL.
-
-@node libtmg
-@subsubsection libtmg
-
-@uref{http://www.netlib.org/lapack/, libtmg} is a component of the LAPACK
-library, containing routines for generation
-of input matrices for testing and timing of LAPACK.
-The testing and timing suites of LAPACK require libtmg, but not the library
-itself. Note that the LAPACK library can be built and used without libtmg.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the reference TMGLIB from NETLIB, OpenBLAS and Intel MKL.
-
-@node QUARK
-@subsubsection QUARK
-
-@uref{http://icl.cs.utk.edu/quark/, QUARK} (QUeuing And Runtime for Kernels)
-provides a library that enables the dynamic execution of tasks with data
-dependencies in a multi-core, multi-socket, shared-memory environment.
-One of QUARK or StarPU Runtime systems has to be enabled in order to schedule
-tasks on the architecture.
-If QUARK is enabled then StarPU is disabled and conversely.
-Note StarPU is enabled by default.
-When CHAMELEON is linked with QUARK, it is not possible to exploit neither
-CUDA (for GPUs) nor MPI (distributed-memory environment).
-You can use StarPU to do so.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-the QUARK library 0.9.
-
-@node StarPU
-@subsubsection StarPU
-
-@uref{http://runtime.bordeaux.inria.fr/StarPU/, StarPU} is a task programming
-library for hybrid architectures.
-StarPU handles run-time concerns such as:
-@itemize @bullet
-@item Task dependencies
-@item Optimized heterogeneous scheduling
-@item Optimized data transfers and replication between main memory and discrete
-memories
-@item Optimized cluster communications
-@end itemize
-StarPU can be used to benefit from GPUs and distributed-memory environment.
-One of QUARK or StarPU runtime system has to be enabled in order to schedule
-tasks on the architecture.
-If StarPU is enabled then QUARK is disabled and conversely.
-Note StarPU is enabled by default.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-StarPU-1.1 and 1.2 releases.
-
-@node hwloc
-@subsubsection hwloc
-
-@uref{http://www.open-mpi.org/projects/hwloc/, hwloc} (Portable Hardware
-Locality) is a software package for accessing the  topology of a multicore
-system including components like: cores, sockets, caches and NUMA nodes.
-@c The topology discovery library, @code{hwloc}, is not mandatory to use StarPU
-@c but strongly recommended.
-It allows to increase performance, and to perform some topology aware
-scheduling.
-@code{hwloc} is available in major distributions and for most OSes and can be
-downloaded from @uref{http://www.open-mpi.org/software/hwloc}.
-
-@strong{Caution about the compatibility:} hwloc should be compatible with the
-version of StarPU used.
-
-@node pthread
-@subsubsection pthread
-
-POSIX threads library is required to run CHAMELEON on Unix-like systems.
-It is a standard component of any such system.
-@comment  Windows threads are used on Microsoft Windows systems.
-
-@node Optional dependencies
-@subsection Optional dependencies
-
-@menu
-* OpenMPI::
-* Nvidia CUDA Toolkit::
-* FxT::
-@end menu
-
-@node OpenMPI
-@subsubsection OpenMPI
-
-@uref{http://www.open-mpi.org/, OpenMPI} is an open source Message Passing
-Interface implementation for execution on multiple nodes with
-distributed-memory environment.
-MPI can be enabled only if the runtime system chosen is StarPU (default).
-To use MPI through StarPU, it is necessary to compile StarPU with MPI
-enabled.
-
-@strong{Caution about the compatibility:} OpenMPI should be built with the
---enable-mpi-thread-multiple option.
-
-@node Nvidia CUDA Toolkit
-@subsubsection Nvidia CUDA Toolkit
-
-@uref{https://developer.nvidia.com/cuda-toolkit, Nvidia CUDA Toolkit} provides
-a
-comprehensive development environment for C and C++ developers building
-GPU-accelerated applications.
-CHAMELEON can use a set of low level optimized kernels coming from cuBLAS to
-accelerate computations on GPUs.
-The @uref{http://docs.nvidia.com/cuda/cublas/, cuBLAS} library is an
-implementation of BLAS (Basic Linear Algebra Subprograms) on top of the Nvidia
-CUDA runtime.
-cuBLAS is normaly distributed with Nvidia CUDA Toolkit.
-CUDA/cuBLAS can be enabled in CHAMELEON only if the runtime system chosen
-is StarPU (default).
-To use CUDA through StarPU, it is necessary to compile StarPU with CUDA
-enabled.
-
-@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
-CUDA releases from versions 4 to 7.5.
-Your compiler must be compatible with CUDA.
-
-@node FxT
-@subsubsection FxT
-
-@uref{http://download.savannah.gnu.org/releases/fkt/, FxT} stands for both
-FKT (Fast Kernel Tracing) and FUT (Fast User Tracing).
-This library provides efficient support for recording traces.
-CHAMELEON can trace kernels execution on the different workers and produce
-.paje files if FxT is enabled.
-FxT can only be used through StarPU and StarPU must be compiled with FxT
-enabled, see how to use this feature here @ref{Use FxT profiling through
-StarPU}.
-
-@strong{Caution about the compatibility:} FxT should be compatible with the
-version of StarPU used.
-
-@node Build process of CHAMELEON
-@section Build process of CHAMELEON
-
-@menu
-* Setting up a build directory::
-* Configuring the project with best efforts::
-* Building::
-* Tests::
-* Installing::
-@end menu
-
-@node Setting up a build directory
-@subsection Setting up a build directory
-
-The CHAMELEON build process requires CMake version 2.8.0 or higher and
-working C and Fortran compilers.
-Compilation and link with CHAMELEON libraries have been tested with
-@strong{gcc/gfortran} and @strong{icc/ifort}.
-On Unix-like operating systems, it also requires Make.
-The CHAMELEON project can not be configured for an in-source build.
-You will get an error message if you try to compile in-source.
-Please clean the root of your project by deleting the generated
-
-@example
-mkdir build
-cd build
-@end example
-
-@quotation
-You can create a build directory from any location you would like. It can be a
-sub-directory of the CHAMELEON base source directory or anywhere else.
-@end quotation
-
-@node Configuring the project with best efforts
-@subsection Configuring the project with best efforts
-
-@example
-cmake <path to SOURCE_DIR> -DOPTION1= -DOPTION2= ...
-@end example
-stands
-Details about options that are useful to give to @command{cmake <path to
-SOURCE_DIR>} are given in @ref{Compilation configuration}.
-
-@node Building
-@subsection Building
-
-@example
-make [-j[ncores]]
-@end example
-do not hesitate to use @option{-j[ncores]} option to speedup the compilation
-
-@node Tests
-@subsection Tests
-
-In order to make sure that CHAMELEON is working properly on the system, it is
-also possible to run a test suite.
-
-@example
-make check
-@end example
-or
-@example
-ctest
-@end example
-
-@node Installing
-@subsection Installing
-
-In order to install CHAMELEON at the location that was specified during
-configuration:
-
-@example
-make install
-@end example
-do not forget to specify the install directory with
-@option{-DCMAKE_INSTALL_PREFIX} at cmake configure
-@example
-cmake <path to SOURCE_DIR> -DCMAKE_INSTALL_PREFIX=<path to INSTALL_DIR>
-@end example
-Note that the install process is optional.
-You are free to use CHAMELEON binaries compiled in the build directory.
diff --git a/doc/texinfo/chapters/introduction.texi b/doc/texinfo/chapters/introduction.texi
deleted file mode 100644
index b94921f37cc78db466d9e7e40806d004f5b9e7c9..0000000000000000000000000000000000000000
--- a/doc/texinfo/chapters/introduction.texi
+++ /dev/null
@@ -1,379 +0,0 @@
-@c -*-texinfo-*-
-
-@c This file is part of the CHAMELEON User's Guide.
-@c Copyright (C) 2014 Inria
-@c Copyright (C) 2014 The University of Tennessee
-@c Copyright (C) 2014 King Abdullah University of Science and Technology
-@c See the file ../chameleon.texi for copying conditions.
-
-@menu
-* MORSE project:: Discussion about MORSE project motivation
-* CHAMELEON::   Outline of this specific MORSE subproject
-@end menu
-
-@node MORSE project
-@section MORSE project
-
-@ifnottex
-@center @image{morse_header}
-@end ifnottex
-
-@menu
-* MORSE Objectives::
-* Research fields::
-* Research papers::
-@end menu
-
-@node MORSE Objectives
-@subsection MORSE Objectives
-
-When processor clock speeds flatlined in 2004, after more than fifteen years
-of exponential increases, the era of near automatic performance improvements
-that the HPC application community had previously enjoyed came to an abrupt
-end.
-To develop software that will perform well on petascale and exascale systems
-with thousands of nodes and millions of cores, the list of major challenges
-that must now be confronted is formidable:
-1) dramatic escalation in the costs of intrasystem communication between
-processors and/or levels of memory hierarchy;
-2) increased heterogeneity of the processing units (mixing CPUs, GPUs, etc. in
-varying and unexpected design combinations);
-3) high levels of parallelism and more complex constraints means that
-cooperating processes must be dynamically and unpredictably scheduled for
-asynchronous execution;
-4) software will not run at scale without much better resilience to faults and
-far more robustness; and
-5) new levels of self-adaptivity will be required to enable software to
-modulate process speed in order to satisfy limited energy budgets.
-The MORSE associate team will tackle the first three challenges in a
-orchestrating work between research groups respectively specialized in sparse
-linear algebra, dense linear algebra and runtime systems.
-The overall objective is to develop robust linear algebra libraries relying on
-innovative runtime systems that can fully benefit from the potential of those
-future large-scale complex machines.
-Challenges 4) and 5) will also be investigated by the different teams in the
-context of other partnerships, but they will not be the main focus of the
-associate team as they are much more prospective.
-
-@node Research fields
-@subsection Research fields
-
-The overall goal of the MORSE associate team is to enable advanced numerical
-algorithms to be executed on a scalable unified runtime system for exploiting
-the full potential of future exascale machines.
-We expect advances in three directions based first on strong and closed
-interactions between the runtime and numerical linear algebra communities.
-This initial activity will then naturally expand to more focused but still
-joint research in both fields.
-
-@menu
-* Fine interaction between linear algebra and runtime systems::
-* Runtime systems::
-* Linear algebra::
-@end menu
-
-@node Fine interaction between linear algebra and runtime systems
-@subsubsection Fine interaction between linear algebra and runtime systems
-
-On parallel machines, HPC applications need to take care of data movement and
-consistency, which can be either explicitly managed at the level of the
-application itself or delegated to a runtime system.
-We adopt the latter approach in order to better keep up with hardware trends
-whose complexity is growing exponentially.
-One major task in this project is to define a proper interface between HPC
-applications and runtime systems in order to maximize productivity and
-expressivity.
-As mentioned in the next section, a widely used approach consists in
-abstracting the application as a DAG that the runtime system is in charge of
-scheduling.
-Scheduling such a DAG over a set of heterogeneous processing units introduces a
-lot of new challenges, such as predicting accurately the execution time of each
-type of task over each kind of unit, minimizing data transfers between memory
-banks, performing data prefetching, etc.
-Expected advances: In a nutshell, a new runtime system API will be designed to
-allow applications to provide scheduling hints to the runtime system and to get
-real-time feedback about the consequences of scheduling decisions.
-
-@node Runtime systems
-@subsubsection Runtime systems
-
-A runtime environment is an intermediate layer between the system and the
-application.
-It provides low-level functionality not provided by the system (such as
-scheduling or management of the heterogeneity) and high-level features (such as
-performance portability).
-In the framework of this proposal, we will work on the scalability of runtime
-environment. To achieve scalability it is required to avoid all centralization.
-Here, the main problem is the scheduling of the tasks.
-In many task-based runtime environments the scheduler is centralized and
-becomes a bottleneck as soon as too many cores are involved.
-It is therefore required to distribute the scheduling decision or to compute a
-data distribution that impose the mapping of task using, for instance the
-so-called ``owner-compute'' rule.
-Expected advances: We will design runtime systems that enable an efficient and
-scalable use of thousands of distributed multicore nodes enhanced with
-accelerators.
-
-@node Linear algebra
-@subsubsection Linear algebra
-
-Because of its central position in HPC and of the well understood structure of
-its algorithms, dense linear algebra has often pioneered new challenges that HPC
-had to face.
-Again, dense linear algebra has been in the vanguard of the new era of
-petascale computing with the design of new algorithms that can efficiently run
-on a multicore node with GPU accelerators. These algorithms are called
-``communication-avoiding'' since they have been redesigned to limit the amount
-of communication between processing units (and between the different levels of
-memory hierarchy).
-They are expressed through Direct Acyclic Graphs (DAG) of fine-grained tasks
-that are dynamically scheduled. Expected advances: First, we plan to investigate
-the impact of these principles in the case of sparse applications (whose
-algorithms are slightly more complicated but often rely on dense kernels).
-Furthermore, both in the dense and sparse cases, the scalability on thousands of
-nodes is still limited; new numerical approaches need to be found.
-We will specifically design sparse hybrid direct/iterative methods that
-represent a promising approach.
-
-@node Research papers
-@subsection Research papers
-
-Research papers about MORSE can be found at
-
-@uref{http://icl.cs.utk.edu/projectsdev/morse/pubs/index.html}
-
-
-@node CHAMELEON
-@section CHAMELEON
-
-@menu
-* CHAMELEON software::       content and objectives
-* PLASMA's design principles:: algorithmic and matrix layout
-@end menu
-
-@node CHAMELEON software
-@subsection CHAMELEON software
-
-The main purpose is to address the performance shortcomings of
-the @uref{http://www.netlib.org/lapack/, LAPACK}
-and @uref{http://www.netlib.org/scalapack/, ScaLAPACK}
-libraries on multicore processors and multi-socket systems of multicore
-processors and their inability to efficiently utilize accelerators such as
-Graphics Processing Units (GPUs).
-
-CHAMELEON is a framework written in C which provides routines to solve dense
-general systems of linear equations, symmetric positive definite systems of
-linear equations and linear least squares problems, using LU, Cholesky, QR and
-LQ factorizations.
-Real arithmetic and complex arithmetic are supported in both single precision
-and double precision.
-It supports Linux and Mac OS/X machines (only tested on Intel x86-64
-architecture).
-
-CHAMELEON is based on @uref{http://icl.cs.utk.edu/plasma/, PLASMA} source
-code but is not limited to shared-memory environment and can exploit
-multiple GPUs.
-CHAMELEON is interfaced in a generic way with both
-@uref{http://icl.cs.utk.edu/quark/, QUARK} and
-@uref{http://runtime.bordeaux.inria.fr/StarPU/, StarPU} runtime systems.
-This feature allows to analyze in a unified framework how sequential
-task-based algorithms behave regarding different runtime systems
-implementations.
-Using CHAMELEON with @uref{http://runtime.bordeaux.inria.fr/StarPU/,
-StarPU} runtime system allows to exploit GPUs through
-kernels provided by @uref{https://developer.nvidia.com/cublas, cuBLAS}
-and clusters of interconnected nodes with distributed memory (using
-@uref{http://www.open-mpi.org/, MPI}).  Computation of very large
-systems with dense matrices on a cluster of nodes is still being
-experimented and stabilized.
-It is not expected to get stable performances with the current version using
-MPI.
-
-
-@comment  %//////////////////////////////////////////////////////////
-
-@node PLASMA's design principles
-@subsection PLASMA's design principles
-
-CHAMELEON is originally based on @uref{http://icl.cs.utk.edu/plasma/,
-PLASMA} so that design principles are very similar.
-The content of this section @ref{PLASMA's design principles} has been copied
-from the @samp{Design principles} section of the PLASMA User's Guide.
-
-@menu
-* Tile Algorithms::
-* Tile Data Layout::
-* Dynamic Task Scheduling::
-@end menu
-
-@node Tile Algorithms
-@subsubsection Tile Algorithms
-
-Tile algorithms are based on the idea of processing the matrix by square tiles
-of relatively small size, such that a tile fits entirely in one of the cache
-levels associated with one core.
-This way a tile can be loaded to the cache and processed completely before being
-evicted back to the main memory.
-Of the three types of cache misses, @emph{compulsory}, @emph{capacity}
-and @emph{conflict}, the use of tile algorithms minimizes the number of
-capacity misses, since each operation loads the amount of data that does not
-``overflow'' the cache.
-
-For some operations such as matrix multiplication and Cholesky factorization,
-translating the classic algorithm to the tile algorithm is trivial.
-In the case of matrix multiplication, the tile algorithm is simply a product of
-applying the technique of @emph{loop tiling} to the canonical definition of
-three nested loops.
-It is very similar for the Cholesky factorization.
-The @strong{left-looking} definition of Cholesky factorization from LAPACK is a
-loop with a sequence of calls to four routines: xSYRK (symmetric
-@strong{rank-k} update), xPOTRF (Cholesky factorization of a small block on the
-diagonal), xGEMM (matrix multiplication) and xTRSM (triangular solve).
-If the xSYRK, xGEMM and xTRSM operations are expressed with the canonical
-definition of three nested loops and the technique of loop tiling is applied,
-the tile algorithm results.
-Since the algorithm is produced by simple reordering of operations, neither the
-number of operations nor numerical stability of the algorithm are affected.
-
-The situation becomes slightly more complicated for LU and QR factorizations,
-where the classic algorithms factorize an entire panel of the matrix (a block
-of columns) at every step of the algorithm.
-One can observe, however, that the process of matrix factorization is
-synonymous with introducing zeros in approproate places and a tile algorithm
-can be fought of as one that zeroes one tile of the matrix at a time.
-This process is referred to as updating of a factorization or @emph{incremental
-factorization}.
-The process is equivalent to factorizing the top tile of a panel, then placing
-the upper triangle of the result on top of the tile blow and factorizing again,
-then moving to the next tile and so on.
-Here, the tile LU and QR algorithms perform slightly more floating point
-operations and require slightly more memory for auxiliary data.
-Also, the tile LU factorization applies a different pivoting pattern and, as a
-result, is less numerically stable than classic LU with full pivoting.
-Numerical stability is not an issue in case of the tile QR, which relies on
-orthogonal transformations (Householder reflections), which are numerically
-stable.
-
-@center @image{tile_lu,7cm,7cm}
-
-Schematic illustration of the tile LU factorization (kernel names for
-real arithmetics in double precision), courtesey of the
-@uref{http://icl.cs.utk.edu/plasma/, PLASMA} team.
-
-@comment  //////////////////////////////////////////////////////////
-
-@node Tile Data Layout
-@subsubsection Tile Data Layout
-
-Tile layout is based on the idea of storing the matrix by square tiles
-of relatively small size, such that each tile occupies a continuous memory
-region.
-This way a tile can be loaded to the cache memory efficiently and the risk of
-evicting it from the cache memory before it is completely processed is
-minimized.
-Of the three types of cache misses, @emph{compulsory}, @emph{capacity} and
-@emph{conflict}, the use of tile layout minimizes the number of conflict
-misses, since a continuous region of memory will completely fill out a
-@strong{set-associative} cache memory before an eviction can happen.
-Also, from the standpoint of multithreaded execution, the probability of
-@emph{false sharing} is minimized.
-It can only affect the cache lines containing the beginning and the ending of a
-tile.
-
-In standard @strong{cache-based} architecture, tiles continously laid out in
-memory maximize the profit from automatic prefetching.
-Tile layout is also beneficial in situations involving the use of accelerators,
-where explicit communication of tiles through DMA transfers is required, such as
-moving tiles between the system memory and the local store in Cell B. E. or
-moving tiles between the host memory and the device memory in GPUs.
-In most circumstances tile layout also minimizes the number of TLB
-misses and conflicts to memory banks or partitions.
-With the standard (@strong{column-major}) layout, access to each column of
-a tile is much more likely
-to cause a conflict miss, a false sharing miss, a TLB miss or a bank
-or partition conflict.
-The use of the standard layout for dense matrix operations is a
-performance minefield.
-Although occasionally one can pass through it unscathed, the risk of hitting a
-spot deadly to performance is very high.
-
-Another property of the layout utilized in PLASMA is that it is ``flat'',
-meaning that it does not involve a level of indirection. Each tile stores a
-small square submatrix of the main matrix in a @strong{column-major} layout. In
-turn, the main matrix is an arrangement of tiles immediately following one
-another in a @strong{column-major} layout.
-The offset of each tile can be calculated through address arithmetics and
-does not involve pointer indirection.
-Alternatively, a matrix could be represented as an array of pointers to
-tiles, located anywhere in memory. Such layout would be a radical
-and unjustifiable departure from LAPACK and ScaLAPACK.
-Flat tile layout is a natural progression from LAPACK's @strong{column-major}
-layout and ScaLAPACK's @strong{block-cyclic} layout.
-
-Another related property of PLASMA's tile layout is that it includes
-provisions for padding of tiles, i.e., the actual region of memory designated
-for a tile can be larger than the memory occupied by the actual data.
-This allows to force a certain alignment of tile boundaries, while using the
-flat organization described in the previous paragraph.
-The motivation is that, at the price of small memory overhead, alignment of
-tile boundaries may prove benefivial in multiple scenarios involving
-memory systems of standard multicore processors, as well as accelerators.
-The issues that come into play are, again, the use of TLBs and memory banks or
-partitions.
-
-@center @image{tile_layout,7cm,7cm}
-
-Schematic illustration of the tile layout with @strong{column-major}
-order of tiles, @strong{column-major} order of elements within tiles and
-(optional) padding for enforcing a certain alighment of tile bondaries,
-courtesey of the @uref{http://icl.cs.utk.edu/plasma/, PLASMA} team.
-
-@comment  %//////////////////////////////////////////////////////////
-
-@node Dynamic Task Scheduling
-@subsubsection Dynamic Task Scheduling
-
-Dynamic scheduling is the idea of assigning work to cores based on the
-availability of data for processing at any given point in time and is also
-referred to as @strong{@emph{data-driven}} scheduling.
-The concept is related closely to the idea of expressing computation through a
-task graph, often referred to as the DAG (@emph{Direct Acyclic Graph}), and
-the flexibility exploring the DAG at runtime.
-Thus, to a large extent, dynamic scheduling is synonymous with
-@strong{@emph{runtime scheduling}}.
-An important concept here is the one of the @emph{critical path}, which defines
-the upper bound on the achievable parallelism, and needs to be pursued at the
-maximum speed.
-This is in direct opposition to the @strong{@emph{fork-and-join}} or
-@strong{@emph{data-parallel}} programming models, where
-artificial synchronization points expose serial sections of
-the code, where multiple cores are idle, while sequential processing takes
-place.
-The use of dynamic scheduling introduces a @strong{trade-off}, though.
-The more dynamic (flexible) scheduling is, the more centralized (and less
-scalable) the scheduling mechanism is.
-For that reason, currently PLASMA uses two scheduling
-mechanisms, one which is fully dynamic and one where work is assigned
-statically and dependency checks are done at runtime.
-
-The first scheduling mechanism relies on unfolding a @emph{sliding window} of
-the task graph at runtime and scheduling work by resolving data hazards:
-@emph{Read After Write~(RAW)}, @emph{Write After Read~(WAR)} and @emph{Write
-After Write~(WAW)}, a technique analogous to instruction scheduling in
-superscalar processors.
-It also relies on @strong{@emph{work-stealing}} for balanding the
-load among all multiple cores.
-The second scheduling mechanism relies on statically designating a path through
-the execution space of the algorithm to each core and following a
-cycle: transition to a task, wait for its dependencies, execute it, update the
-overall progress.
-Task are identified by tuples and task transitions are done through locally
-evaluated formulas.
-Progress information can be centralized, replicated or distributed (currently
-centralized).
-
-@center @image{trace_qr,12cm,5cm}
-
-A trace of the tile QR factorization executing on eight cores without
-any global synchronization points (kernel names for real arithmetics in single
-precision), courtesey of the @uref{http://icl.cs.utk.edu/plasma/, PLASMA} team.
diff --git a/doc/texinfo/chapters/using.texi b/doc/texinfo/chapters/using.texi
deleted file mode 100644
index cf83f26e85cd2f7959a3b2ce81cfd96882469bc9..0000000000000000000000000000000000000000
--- a/doc/texinfo/chapters/using.texi
+++ /dev/null
@@ -1,1425 +0,0 @@
-@c -*-texinfo-*-
-
-@c This file is part of the MORSE Handbook.
-@c Copyright (C) 2014 Inria
-@c Copyright (C) 2014 The University of Tennessee
-@c Copyright (C) 2014 King Abdullah University of Science and Technology
-@c See the file ../chameleon.texi for copying conditions.
-
-@menu
-* Using CHAMELEON executables::
-* Linking an external application with CHAMELEON libraries::
-* CHAMELEON API::
-@end menu
-
-@node Using CHAMELEON executables
-@section Using CHAMELEON executables
-
-CHAMELEON provides several test executables that are compiled and link with
-CHAMELEON stack of dependencies.
-Instructions about the arguments to give to executables are accessible thanks
-to the option @option{-[-]help} or @option{-[-]h}.
-This set of binaries are separated into three categories and can be found in
-three different directories:
-
-@itemize @bullet
-
-  @item example
-
-  contains examples of API usage and more specifically the
-  sub-directory lapack_to_morse/ provides a tutorial that explain how to use
-  CHAMELEON functionalities starting from a full LAPACK code, see
-@ref{Tutorial LAPACK to CHAMELEON}
-
-  @item testing
-
-  contains testing drivers to check numerical correctness of
-  CHAMELEON linear algebra routines with a wide range of parameters
-  @example
-  ./testing/stesting 4 1 LANGE 600 100 700
-  @end example
-  Two first arguments are the number of cores and gpus to use.
-  The third one is the name of the algorithm to test.
-  The other arguments depend on the algorithm, here it lies for the number of
-  rows, columns and leading dimension of the problem.
-
-  Name of algorithms available for testing are:
-  @itemize @bullet
-    @item LANGE: norms of matrices Infinite, One, Max, Frobenius
-    @item GEMM: general matrix-matrix multiply
-    @item HEMM: hermitian matrix-matrix multiply
-    @item HERK: hermitian matrix-matrix rank k update
-    @item HER2K: hermitian matrix-matrix rank 2k update
-    @item SYMM: symmetric matrix-matrix multiply
-    @item SYRK: symmetric matrix-matrix rank k update
-    @item SYR2K: symmetric matrix-matrix rank 2k update
-    @item PEMV: matrix-vector multiply with pentadiagonal matrix
-    @item TRMM: triangular matrix-matrix multiply
-    @item TRSM: triangular solve, multiple rhs
-    @item POSV: solve linear systems with symmetric positive-definite matrix
-    @item GESV_INCPIV: solve linear systems with general matrix
-    @item GELS: linear least squares with general matrix
-  @end itemize
-
-  @item timing
-
-  contains timing drivers to assess performances of CHAMELEON routines.
-  There are two sets of executables, those who do not use the tile interface
-and those who do (with _tile in the name of the executable).
-  Executables without tile interface allocates data following LAPACK
-conventions and these data can be given as arguments to CHAMELEON routines
-as you would do with LAPACK.
-  Executables with tile interface generate directly the data in the format
-  CHAMELEON tile algorithms used to submit tasks to the runtime system.
-  Executables with tile interface should be more performant because no data
-copy from LAPACK matrix layout to tile matrix layout are necessary.
-  Calling example:
-  @example
-  ./timing/time_dpotrf --n_range=1000:10000:1000 --nb=320
-                       --threads=9 --gpus=3
-                       --nowarmup
-  @end example
-
-  List of main options that can be used in timing:
-  @itemize @bullet
-    @item @option{--help}: show usage
-    @item Machine parameters
-    @itemize @bullet
-       @item @option{-t x, --threads=x}: Number of CPU workers (default: automatic detection through runtime)
-       @item @option{-g x, --gpus=x}: Number of GPU workers (default: @option{0})
-       @item @option{-P x, --P=x}:  Rows (P) in the PxQ process grid (deafult: @option{1})
-       @item @option{--nocpu}: All GPU kernels are exclusively executed on GPUs (default: @option{0})
-    @end itemize
-    @item Matrix parameters
-    @itemize @bullet
-      @item @option{-m x, --m=x, --M=x}: Dimension (M) of the matrices (default: @option{N})
-      @item @option{-n x, --n=x, --N=x}: Dimension (N) of the matrices
-      @item @option{-N R, --n_range=R}: Range of N values to time with R=Start:Stop:Step (default: @option{500:5000:500})
-      @item @option{-k x, --k=x, --K=x, --nrhs=x}: Dimension (K) of the matrices or number of right-hand size (default: @option{1}). This is useful for GEMM like algorithms (k is the shared dimension and must be defined >1 to consider matrices and not vectors)
-      @item @option{-b x, --nb=x}: NB size. (default: @option{320})
-      @item @option{-i x, --ib=x}: IB size. (default: @option{32})
-    @end itemize
-    @item Check/prints
-    @itemize @bullet
-      @item @option{--niter=x}: number of iterations performed for each test (default: @option{1})
-      @item @option{-W, --nowarnings}: Do not show warnings
-      @item @option{-w, --nowarmup}: Cancel the warmup run to pre-load libraries
-      @item @option{-c, --check}: Check result
-      @item @option{-C, --inv}: Check on inverse
-      @item @option{--mode=x}: Change the xLATMS matrix mode generation for SVD/EVD (default: @option{4}). It must be between 0 and 20 included.
-    @end itemize
-    @item Profiling parameters
-    @itemize @bullet
-      @item @option{-T, --trace}: Enable trace generation
-      @item @option{--progress}: Display progress indicator
-      @item @option{-d, --dag}: Enable DAG generation. Generates a dot_dag_file.dot.
-      @item @option{-p, --profile}: Print profiling informations
-    @end itemize
-    @item HQR parameters
-    @itemize @bullet
-      @item @option{-a x, --qr_a=x, --rhblk=x}: Define the size of the local TS trees in housholder reduction trees for QR and LQ factorization. N is the size of each subdomain (default: @option{-1})
-      @item @option{-l x, --llvl=x}: Tree used for low level reduction inside nodes (default: @option{-1})
-      @item @option{-L x, --hlvl=x}: Tree used for high level reduction between nodes, only if P > 1 (default: @option{-1}). Possible values are -1: Automatic, 0: Flat, 1: Greedy, 2: Fibonacci, 3: Binary, 4: Replicated greedy.
-      @item @option{-D, --domino}: Enable the domino between upper and lower trees
-    @end itemize
-    @item Advanced options
-    @itemize @bullet
-      @item @option{--nobigmat}: Disable single large matrix allocation for multiple tiled allocations
-      @item @option{-s, --sync}: Enable synchronous calls in wrapper function such as POTRI
-      @item @option{-o, --ooc}: Enable out-of-core (available only with StarPU)
-      @item @option{-G, --gemm3m}: Use gemm3m complex method
-      @item @option{--bound}: Compare result to area bound
-    @end itemize
-
-  List of timing algorithms available:
-  @itemize @bullet
-    @item LANGE: norms of matrices
-    @item GEMM: general matrix-matrix multiply
-    @item TRSM: triangular solve
-    @item POTRF: Cholesky factorization with a symmetric
-positive-definite matrix
-    @item POSV: solve linear systems with symmetric positive-definite matrix
-    @item GETRF_NOPIV: LU factorization of a general matrix
-using the tile LU algorithm without row pivoting
-    @item GESV_NOPIV: solve linear system for a general matrix
-using the tile LU algorithm without row pivoting
-    @item GETRF_INCPIV: LU factorization of a general matrix
-using the tile LU algorithm with partial tile pivoting with row interchanges
-    @item GESV_INCPIV: solve linear system for a general matrix
-using the tile LU algorithm with partial tile pivoting with row interchanges
-matrix
-    @item GEQRF: QR factorization of a general matrix
-    @item GELS: solves overdetermined or underdetermined linear systems
-involving a general matrix using the QR or the LQ factorization
-  @end itemize
-
-@end itemize
-
-@node Linking an external application with CHAMELEON libraries
-@section Linking an external application with CHAMELEON libraries
-
-Compilation and link with CHAMELEON libraries have been tested with
-@strong{gcc/gfortran 4.8.1} and @strong{icc/ifort 14.0.2}.
-
-@menu
-* Static linking in C::
-* Dynamic linking in C::
-* Build a Fortran program with CHAMELEON::
-@end menu
-
-@node Static linking in C
-@subsection Static linking in C
-
-Lets imagine you have a file main.c that you want to link with CHAMELEON
-static libraries.
-Here could be your compilation command with gcc compiler:
-@example
-gcc -I/home/yourname/install/chameleon/include -o main.o -c main.c
-@end example
-
-Now if you want to link your application with CHAMELEON static libraries, you
-could do:
-@example
-gcc main.o -o main                                         \
-/home/yourname/install/chameleon/lib/libchameleon.a        \
-/home/yourname/install/chameleon/lib/libchameleon_starpu.a \
-/home/yourname/install/chameleon/lib/libcoreblas.a         \
--lstarpu-1.1 -Wl,--no-as-needed -lmkl_intel_lp64           \
--lmkl_sequential -lmkl_core -lpthread -lm -lrt
-@end example
-As you can see in this example, we also link with some dynamic libraries
-@option{starpu-1.1}, @option{Intel MKL} libraries (for
-BLAS/LAPACK/CBLAS/LAPACKE), @option{pthread}, @option{m} (math) and
-@option{rt}.
-These libraries will depend on the configuration of your CHAMELEON build.
-You can find these dependencies in .pc files we generate during compilation and
-CHAMELEON install directory.
-Note also that you could need to specify where to find these libraries with
-@option{-L} option of your compiler/linker.
-
-Before to run your program, make sure that all shared libraries paths your
-executable depends on are known.
-Enter @code{ldd main} to check.
-If some shared libraries paths are missing append them in the
-@env{LD_LIBRARY_PATH} (for Linux systems) environment variable
-(@env{DYLD_LIBRARY_PATH} on Mac, @env{LIB} on Windows).
-
-@node Dynamic linking in C
-@subsection Dynamic linking in C
-
-For dynamic linking (need to build CHAMELEON with CMake
-option @option{BUILD_SHARED_LIBS=ON}) it is similar to static compilation/link
-but instead of specifying path to your static libraries you indicate the path
-to dynamic libraries with @option{-L} option and you give the name of libraries
-with @option{-l} option like this:
-@example
-gcc main.o -o main                               \
--L/home/yourname/install/chameleon/lib           \
--lchameleon -lchameleon_starpu -lcoreblas        \
--lstarpu-1.1 -Wl,--no-as-needed -lmkl_intel_lp64 \
--lmkl_sequential -lmkl_core -lpthread -lm -lrt
-@end example
-
-Note that an update of your environment variable
-@env{LD_LIBRARY_PATH} (@env{DYLD_LIBRARY_PATH} on Mac, @env{LIB} on Windows)
-with the path of the libraries could be required before executing, example:
-@example
-export @env{LD_LIBRARY_PATH}=path/to/libs:path/to/chameleon/lib
-@end example
-
-@node Build a Fortran program with CHAMELEON
-@subsection Build a Fortran program with CHAMELEON
-
-CHAMELEON provides a Fortran interface to user functions. Example:
-@example
-call morse_version(major, minor, patch) !or
-call MORSE_VERSION(major, minor, patch)
-@end example
-
-Build and link are very similar to the C case.
-
-Compilation example:
-@example
-gfortran -o main.o -c main.c
-@end example
-
-Static linking example:
-@example
-gfortran main.o -o main                                    \
-/home/yourname/install/chameleon/lib/libchameleon.a        \
-/home/yourname/install/chameleon/lib/libchameleon_starpu.a \
-/home/yourname/install/chameleon/lib/libcoreblas.a         \
--lstarpu-1.1 -Wl,--no-as-needed -lmkl_intel_lp64           \
--lmkl_sequential -lmkl_core -lpthread -lm -lrt
-@end example
-
-Dynamic linking example:
-@example
-gfortran main.o -o main                          \
--L/home/yourname/install/chameleon/lib           \
--lchameleon -lchameleon_starpu -lcoreblas        \
--lstarpu-1.1 -Wl,--no-as-needed -lmkl_intel_lp64 \
--lmkl_sequential -lmkl_core -lpthread -lm -lrt
-@end example
-
-@node CHAMELEON API
-@section CHAMELEON API
-
-CHAMELEON provides routines to solve dense general systems of linear
-equations, symmetric positive definite systems of linear equations and linear
-least squares problems, using LU, Cholesky, QR and LQ factorizations.
-Real arithmetic and complex arithmetic are supported in both single precision
-and double precision.
-Routines that compute linear algebra are of the folowing form:
-@example
-MORSE_name[_Tile[_Async]]
-@end example
-@itemize @bullet
-@item all user routines are prefixed with @code{MORSE}
-@item @code{name} follows BLAS/LAPACK naming scheme for algorithms
-(@emph{e.g.} sgemm for general matrix-matrix multiply simple precision)
-@item CHAMELEON provides three interface levels
-  @itemize @minus
-  @item @code{MORSE_name}: simplest interface, very close to CBLAS and LAPACKE,
-matrices are given following the LAPACK data layout (1-D array column-major).
-It involves copy of data from LAPACK layout to tile layout and conversely (to
-update LAPACK data), see @ref{Step1}.
-  @item @code{MORSE_name_Tile}: the tile interface avoid copies between LAPACK
-and tile layouts. It is the standard interface of CHAMELEON and it should
-achieved better performance than the previous simplest interface. The data are
-given through a specific structure called a descriptor, see @ref{Step2}.
-  @item @code{MORSE_name_Tile_Async}: similar to the tile interface, it avoids
-synchonization barrier normally called between @code{Tile} routines.
-At the end of an @code{Async} function, completion of tasks is not guarentee
-and data are not necessarily up-to-date.
-To ensure that tasks have been all executed a synchronization function has to
-be called after the sequence of @code{Async} functions, see @ref{Step4}.
-  @end itemize
-@end itemize
-
-MORSE routine calls have to be precede from
-@example
-MORSE_Init( NCPU, NGPU );
-@end example
-to initialize MORSE and the runtime system and followed by
-@example
-MORSE_Finalize();
-@end example
-to free some data and finalize the runtime and/or MPI.
-
-@menu
-* Tutorial LAPACK to CHAMELEON::
-* List of available routines::
-@end menu
-
-@node Tutorial LAPACK to CHAMELEON
-@subsection Tutorial LAPACK to CHAMELEON
-
-This tutorial is dedicated to the API usage of CHAMELEON.
-The idea is to start from a simple code and step by step explain how to
-use CHAMELEON routines.
-The first step is a full BLAS/LAPACK code without dependencies to CHAMELEON,
-a code that most users should easily understand.
-Then, the different interfaces CHAMELEON provides are exposed, from the
-simplest API (step1) to more complicated ones (until step4).
-The way some important parameters are set is discussed in step5.
-step6 is an example about distributed computation with MPI.
-Finally step7 shows how to let Chameleon initialize user's data
-(matrices/vectors) in parallel.
-
-directory.
-If CMake option @option{CHAMELEON_ENABLE_EXAMPLE} is @option{ON} then source
-files are compiled with the project libraries.
-The arithmetic precision is @code{double}.
-To execute a step @samp{X}, enter the following command:
-@example
-./step@samp{X} --option1 --option2 ...
-@end example
-Instructions about the arguments to give to executables are accessible thanks
-to the option @option{-[-]help} or @option{-[-]h}.
-Note there exist default values for options.
-
-For all steps, the program solves a linear system @math{Ax=B}
-The matrix values are randomly generated but ensure that matrix @math{A} is
-symmetric positive definite so that @math{A} can be factorized in a @math{LL^T}
-form using the Cholesky factorization.
-
-
-Lets comment the different steps of the tutorial
-@menu
-* Step0:: a simple Cholesky example using the C interface of
-BLAS/LAPACK
-* Step1:: introduces the LAPACK equivalent interface of Chameleon
-* Step2:: introduces the tile interface
-* Step3:: indicates how to give your own tile matrix to Chameleon
-* Step4:: introduces the tile async interface
-* Step5:: shows how to set some important parameters
-* Step6:: introduces how to benefit from MPI in Chameleon
-* Step7:: introduces how to let Chameleon initialize the user's matrix data
-@end menu
-
-@node Step0
-@subsubsection Step0
-
-The C interface of BLAS and LAPACK, that is, CBLAS and
-LAPACKE, are used to solve the system. The size of the system (matrix) and the
-number of right hand-sides can be given as arguments to the executable (be
-careful not to give huge numbers if you do not have an infinite amount of RAM!).
-As for every step, the correctness of the solution is checked by calculating
-the norm @math{||Ax-B||/(||A||||x||+||B||)}.
-The time spent in factorization+solve is recorded and, because we know exactly
-the number of operations of these algorithms, we deduce the number of
-operations that have been processed per second (in GFlops/s).
-The important part of the code that solves the problem is:
-@verbatim
-/* Cholesky factorization:
- * A is replaced by its factorization L or L^T depending on uplo */
-LAPACKE_dpotrf( LAPACK_COL_MAJOR, 'U', N, A, N );
-/* Solve:
- * B is stored in X on entry, X contains the result on exit.
- * Forward ...
- */
-cblas_dtrsm(
-    CblasColMajor,
-    CblasLeft,
-    CblasUpper,
-    CblasConjTrans,
-    CblasNonUnit,
-    N, NRHS, 1.0, A, N, X, N);
-/* ... and back substitution */
-cblas_dtrsm(
-    CblasColMajor,
-    CblasLeft,
-    CblasUpper,
-    CblasNoTrans,
-    CblasNonUnit,
-    N, NRHS, 1.0, A, N, X, N);
-@end verbatim
-
-@node Step1
-@subsubsection Step1
-
-It introduces the simplest CHAMELEON interface which is equivalent to
-CBLAS/LAPACKE.
-The code is very similar to step0 but instead of calling CBLAS/LAPACKE
-functions, we call CHAMELEON equivalent functions.
-The solving code becomes:
-@verbatim
-/* Factorization: */
-MORSE_dpotrf( UPLO, N, A, N );
-/* Solve: */
-MORSE_dpotrs(UPLO, N, NRHS, A, N, X, N);
-@end verbatim
-The API is almost the same so that it is easy to use for beginners.
-It is important to keep in mind that before any call to MORSE routines,
-@code{MORSE_Init} has to be invoked to initialize MORSE and the runtime system.
-Example:
-@verbatim
-MORSE_Init( NCPU, NGPU );
-@end verbatim
-After all MORSE calls have been done, a call to @code{MORSE_Finalize} is
-required to free some data and finalize the runtime and/or MPI.
-@verbatim
-MORSE_Finalize();
-@end verbatim
-We use MORSE routines with the LAPACK interface which means the routines
-accepts the same matrix format as LAPACK (1-D array column-major).
-Note that we copy the matrix to get it in our own tile structures, see details
-about this format here @ref{Tile Data Layout}.
-This means you can get an overhead coming from copies.
-
-@node Step2
-@subsubsection Step2
-
-This program is a copy of step1 but instead of using the LAPACK interface which
-leads to copy LAPACK matrices inside MORSE routines we use the tile interface.
-We will still use standard format of matrix but we will see how to give this
-matrix to create a MORSE descriptor, a structure wrapping data on which we want
-to apply sequential task-based algorithms.
-The solving code becomes:
-@verbatim
-/* Factorization: */
-MORSE_dpotrf_Tile( UPLO, descA );
-/* Solve: */
-MORSE_dpotrs_Tile( UPLO, descA, descX );
-@end verbatim
-To use the tile interface, a specific structure @code{MORSE_desc_t} must be
-created.
-This can be achieved from different ways.
-@enumerate
-@item Use the existing function @code{MORSE_Desc_Create}: means the
-matrix data are considered contiguous in memory as it is considered in PLASMA
-(@ref{Tile Data Layout}).
-@item Use the existing function @code{MORSE_Desc_Create_OOC}: means the
-matrix data is allocated on-demand in memory tile by tile, and possibly pushed
-to disk if that does not fit memory.
-@item Use the existing function @code{MORSE_Desc_Create_User}: it is more
-flexible than @code{Desc_Create} because you can give your own way to access to
-tile data so that your tiles can be allocated wherever you want in memory, see
-next paragraph @ref{Step3}.
-@item Create you own function to fill the descriptor.
-If you understand well the meaning of each item of @code{MORSE_desc_t}, you
-should be able to fill correctly the structure (good luck).
-@end enumerate
-
-In Step2, we use the first way to create the descriptor:
-@verbatim
-MORSE_Desc_Create(&descA, NULL, MorseRealDouble,
-                  NB, NB, NB*NB, N, N,
-                  0, 0, N, N,
-                  1, 1);
-@end verbatim
-
-@itemize @bullet
-
-@item @code{descA} is the descriptor to create.
-
-@item The second argument is a pointer to existing data.
-The existing data must follow LAPACK/PLASMA matrix layout @ref{Tile Data
-Layout} (1-D array column-major) if @code{MORSE_Desc_Create} is used to create
-the descriptor.
-The @code{MORSE_Desc_Create_User} function can be used if you have data
-organized differently.
-This is discussed in the next paragraph @ref{Step3}.
-Giving a @code{NULL} pointer means you let the function allocate memory space.
-This requires to copy your data in the memory allocated by the
-@code{Desc_Create}.
-This can be done with
-@verbatim
-MORSE_Lapack_to_Tile(A, N, descA);
-@end verbatim
-
-@item Third argument of @code{Desc_Create} is the datatype (used for memory
-allocation).
-
-@item Fourth argument until sixth argument stand for respectively, the number
-of rows (@code{NB}), columns (@code{NB}) in each tile, the total number of
-values in a tile (@code{NB*NB}), the number of rows (@code{N}), colmumns
-(@code{N}) in the entire matrix.
-
-@item Seventh argument until ninth argument stand for respectively, the
-beginning row (@code{0}), column (@code{0}) indexes of the submatrix and the
-number of rows (@code{N}), columns (@code{N}) in the submatrix.
-These arguments are specific and used in precise cases.
-If you do not consider submatrices, just use @code{0, 0, NROWS, NCOLS}.
-
-@item Two last arguments are the parameter of the 2-D block-cyclic distribution
-grid, see @uref{http://www.netlib.org/scalapack/slug/node75.html, ScaLAPACK}.
-To be able to use other data distribution over the nodes,
-@code{MORSE_Desc_Create_User} function should be used.
-
-@end itemize
-
-
-@node Step3
-@subsubsection Step3
-
-This program makes use of the same interface than Step2 (tile interface) but
-does not allocate LAPACK matrices anymore so that no copy between LAPACK matrix
-layout and tile matrix layout are necessary to call MORSE routines.
-To generate random right hand-sides you can use:
-@verbatim
-/* Allocate memory and initialize descriptor B */
-MORSE_Desc_Create(&descB,  NULL, MorseRealDouble,
-                  NB, NB,  NB*NB, N, NRHS,
-                  0, 0, N, NRHS, 1, 1);
-/* generate RHS with random values */
-MORSE_dplrnt_Tile( descB, 5673 );
-@end verbatim
-
-The other important point is that is it possible to create a descriptor, the
-necessary structure to call MORSE efficiently, by giving your own pointer to
-tiles if your matrix is not organized as a 1-D array column-major.
-This can be achieved with the @code{MORSE_Desc_Create_User} routine.
-Here is an example:
-@verbatim
-MORSE_Desc_Create_User(&descA, matA, MorseRealDouble,
-                       NB, NB, NB*NB, N, N,
-                       0, 0, N, N, 1, 1,
-                       user_getaddr_arrayofpointers,
-                       user_getblkldd_arrayofpointers,
-                       user_getrankof_zero);
-@end verbatim
-Firsts arguments are the same than @code{MORSE_Desc_Create} routine.
-Following arguments allows you to give pointer to functions that manage the
-access to tiles from the structure given as second argument.
-Here for example, @code{matA} is an array containing addresses to tiles, see
-The three functions you have to define for @code{Desc_Create_User} are:
-@itemize @bullet
-@item a function that returns address of tile @math{A(m,n)}, m and n standing
-for the indexes of the tile in the global matrix. Lets consider a matrix
-@math{4x4} with tile size @math{2x2}, the matrix contains four tiles of
-indexes: @math{A(m=0,n=0)}, @math{A(m=0,n=1)}, @math{A(m=1,n=0)},
-@math{A(m=1,n=1)}
-@item a function that returns the leading dimension of tile @math{A(m,*)}
-@item a function that returns MPI rank of tile @math{A(m,n)}
-@end itemize
-Note that the way we define these functions is related to the tile matrix
-format and to the data distribution considered.
-This example should not be used with MPI since all tiles are affected to
-processus @code{0}, which means a large amount of data will be
-potentially transfered between nodes.
-
-@node Step4
-@subsubsection Step4
-This program is a copy of step2 but instead of using the tile interface, it
-uses the tile async interface.
-The goal is to exhibit the runtime synchronization barriers.
-Keep in mind that when the tile interface is called, like
-@code{MORSE_dpotrf_Tile}, a synchronization function, waiting for the actual
-execution and termination of all tasks, is called to ensure the
-proper completion of the algorithm (i.e. data are up-to-date).
-The code shows how to exploit the async interface to pipeline subsequent
-algorithms so that less synchronisations are done.
-The code becomes:
-@verbatim
-/* Morse structure containing parameters and a structure to interact with
- * the Runtime system */
-MORSE_context_t *morse;
-/* MORSE sequence uniquely identifies a set of asynchronous function calls
- * sharing common exception handling */
-MORSE_sequence_t *sequence = NULL;
-/* MORSE request uniquely identifies each asynchronous function call */
-MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
-int status;
-
-...
-
-morse_sequence_create(morse, &sequence);
-
-/* Factorization: */
-MORSE_dpotrf_Tile_Async( UPLO, descA, sequence, &request );
-
-/* Solve: */
-MORSE_dpotrs_Tile_Async( UPLO, descA, descX, sequence, &request);
-
-/* Synchronization barrier (the runtime ensures that all submitted tasks
- * have been terminated */
-RUNTIME_barrier(morse);
-/* Ensure that all data processed on the gpus we are depending on are back
- * in main memory */
-RUNTIME_desc_getoncpu(descA);
-RUNTIME_desc_getoncpu(descX);
-
-status = sequence->status;
-
-@end verbatim
-Here the sequence of @code{dpotrf} and @code{dpotrs} algorithms is processed
-without synchronization so that some tasks of @code{dpotrf} and @code{dpotrs}
-can be concurently executed which could increase performances.
-The async interface is very similar to the tile one.
-It is only necessary to give two new objects @code{MORSE_sequence_t} and
-@code{MORSE_request_t} used to handle asynchronous function calls.
-
-@center @image{potri_async,13cm,8cm}
-POTRI (POTRF, TRTRI, LAUUM) algorithm with and without synchronization
-barriers, courtesey of the @uref{http://icl.cs.utk.edu/plasma/, PLASMA} team.
-
-@node Step5
-@subsubsection Step5
-
-Step5 shows how to set some important parameters.
-This program is a copy of Step4 but some additional parameters are given by
-the user.
-The parameters that can be set are:
-@itemize @bullet
-@item number of Threads
-@item number of GPUs
-
-The number of workers can be given as argument to the executable with
-@option{--threads=} and @option{--gpus=} options.
-It is important to notice that we assign one thread per gpu to optimize data
-transfer between main memory and devices memory.
-The number of workers of each type @code{CPU} and @code{CUDA} must be given at
-@code{MORSE_Init}.
-@verbatim
-if ( iparam[IPARAM_THRDNBR] == -1 ) {
-    get_thread_count( &(iparam[IPARAM_THRDNBR]) );
-    /* reserve one thread par cuda device to optimize memory transfers */
-    iparam[IPARAM_THRDNBR] -= iparam[IPARAM_NCUDAS];
-}
-NCPU = iparam[IPARAM_THRDNBR];
-NGPU = iparam[IPARAM_NCUDAS];
-
-/* initialize MORSE with main parameters */
-MORSE_Init( NCPU, NGPU );
-@end verbatim
-
-@item matrix size
-@item number of right-hand sides
-@item block (tile) size
-
-The problem size is given with @option{--n=} and @option{--nrhs=} options.
-The tile size is given with option @option{--nb=}.
-These parameters are required to create descriptors.
-The size tile @code{NB} is a key parameter to get performances since it
-defines the granularity of tasks.
-If @code{NB} is too large compared to @code{N}, there are few tasks to
-schedule.
-If the number of workers is large this leads to limit parallelism.
-On the contrary, if @code{NB} is too small (@emph{i.e.} many small tasks),
-workers could not be correctly fed and the runtime systems operations
-could represent a substantial overhead.
-A trade-off has to be found depending on many parameters: problem size,
-algorithm (drive data dependencies), architecture (number of workers,
-workers speed, workers uniformity, memory bus speed).
-By default it is set to 128.
-Do not hesitate to play with this parameter and compare performances on your
-machine.
-
-@item inner-blocking size
-
-The inner-blocking size is given with option @option{--ib=}.
-This parameter is used by kernels (optimized algorithms applied on tiles) to
-perform subsequent operations with data block-size that fits the cache of
-workers.
-Parameters @code{NB} and @code{IB} can be given with @code{MORSE_Set} function:
-@verbatim
-MORSE_Set(MORSE_TILE_SIZE,        iparam[IPARAM_NB] );
-MORSE_Set(MORSE_INNER_BLOCK_SIZE, iparam[IPARAM_IB] );
-@end verbatim
-@end itemize
-
-@node Step6
-@subsubsection Step6
-
-This program is a copy of Step5 with some additional parameters to be set for
-the data distribution.
-To use this program properly MORSE must use StarPU Runtime system and MPI
-option must be activated at configure.
-The data distribution used here is 2-D block-cyclic, see for example
-@uref{http://www.netlib.org/scalapack/slug/node75.html, ScaLAPACK} for
-explanation.
-The user can enter the parameters of the distribution grid at execution with
-@option{--p=} option.
-Example using OpenMPI on four nodes with one process per node:
-@example
-mpirun -np 4 ./step6 --n=10000 --nb=320 --ib=64 \
-                     --threads=8 --gpus=2 --p=2
-@end example
-
-In this program we use the tile data layout from PLASMA so that the call
-@verbatim
-MORSE_Desc_Create_User(&descA, NULL, MorseRealDouble,
-                       NB, NB, NB*NB, N, N,
-                       0, 0, N, N,
-                       GRID_P, GRID_Q,
-                       morse_getaddr_ccrb,
-                       morse_getblkldd_ccrb,
-                       morse_getrankof_2d);
-@end verbatim
-is equivalent to the following call
-@verbatim
-MORSE_Desc_Create(&descA, NULL, MorseRealDouble,
-                  NB, NB, NB*NB, N, N,
-                  0, 0, N, N,
-                  GRID_P, GRID_Q);
-@end verbatim
-functions @code{morse_getaddr_ccrb}, @code{morse_getblkldd_ccrb},
-@code{morse_getrankof_2d} being used in @code{Desc_Create}.
-It is interesting to notice that the code is almost the same as Step5.
-The only additional information to give is the way tiles are distributed
-through the third function given to @code{MORSE_Desc_Create_User}.
-Here, because we have made experiments only with a 2-D block-cyclic
-distribution, we have parameters P and Q in the interface of @code{Desc_Create}
-but they have sense only for 2-D block-cyclic distribution and then using
-@code{morse_getrankof_2d} function.
-Of course it could be used with other distributions, being no more the
-parameters of a 2-D block-cyclic grid but of another distribution.
-
-@node Step7
-@subsubsection Step7
-
-This program is a copy of step6 with some additional calls to
-build a matrix from within chameleon using a function provided by the user.
-This can be seen as a replacement of the function like @code{MORSE_dplgsy_Tile()} that can be used
-to fill the matrix with random data, @code{MORSE_dLapack_to_Tile()} to fill the matrix
-with data stored in a lapack-like buffer, or @code{MORSE_Desc_Create_User()} that can be used
-to describe an arbitrary tile matrix structure.
-In this example, the build callback function are just wrapper towards @code{CORE_xxx()} functions, so the output
-of the program step7 should be exactly similar to that of step6.
-The difference is that the function used to fill the tiles is provided by the user,
-and therefore this approach is much more flexible.
-
-The new function to understand is @code{MORSE_dbuild_Tile}, e.g.
-@verbatim
-struct data_pl data_A={(double)N, 51, N};
-MORSE_dbuild_Tile(MorseUpperLower, descA, (void*)&data_A, Morse_build_callback_plgsy);
-@end verbatim
-The idea here is to let Chameleon fill the matrix data in a task-based fashion
-(parallel) by using a function given by the user.
-First, the user should define if all the blocks must be entirelly filled or just
-the upper/lower part with, e.g. @code{MorseUpperLower}.
-We still relies on the same structure @code{MORSE_desc_t} which must be
-initialized with the proper parameters, by calling for example
-@code{MORSE_Desc_Create}.
-Then, an opaque pointer is used to let the user give some extra data used by
-his function.
-The last parameter is the pointer to the user's function.
-
-@node List of available routines
-@subsection List of available routines
-
-@menu
-* Auxiliary routines:: Init, Finalize, Version, etc
-* Descriptor routines:: To handle descriptors
-* Options routines:: To set options
-* Sequences routines:: To manage asynchronous function calls
-* Linear Algebra routines:: Computional routines
-@end menu
-
-@node Auxiliary routines
-@subsubsection Auxiliary routines
-
-Reports MORSE version number.
-@verbatim
-int MORSE_Version        (int *ver_major, int *ver_minor, int *ver_micro);
-@end verbatim
-
-Initialize MORSE: initialize some parameters, initialize the runtime and/or MPI.
-@verbatim
-int MORSE_Init           (int nworkers, int ncudas);
-@end verbatim
-
-Finalyze MORSE: free some data and finalize the runtime and/or MPI.
-@verbatim
-int MORSE_Finalize       (void);
-@end verbatim
-
-Return the MPI rank of the calling process.
-@verbatim
-int MORSE_My_Mpi_Rank    (void);
-@end verbatim
-
-Suspend MORSE runtime to poll for new tasks, to avoid useless CPU consumption when
-no tasks have to be executed by MORSE runtime system.
-@verbatim
-int MORSE_Pause          (void);
-@end verbatim
-
-Symmetrical call to MORSE_Pause, used to resume the workers polling for new tasks.
-@verbatim
-int MORSE_Resume         (void);
-@end verbatim
-
-Conversion from LAPACK layout to tile layout.
-@verbatim
-int MORSE_Lapack_to_Tile (void *Af77, int LDA, MORSE_desc_t *A);
-@end verbatim
-
-Conversion from tile layout to LAPACK layout.
-@verbatim
-int MORSE_Tile_to_Lapack (MORSE_desc_t *A, void *Af77, int LDA);
-@end verbatim
-
-@node Descriptor routines
-@subsubsection Descriptor routines
-
-@c /* Descriptor */
-Create matrix descriptor, internal function.
-@verbatim
-int MORSE_Desc_Create  (MORSE_desc_t **desc, void *mat, MORSE_enum dtyp,
-                        int mb, int nb, int bsiz, int lm, int ln,
-                        int i, int j, int m, int n, int p, int q);
-@end verbatim
-
-Create matrix descriptor, user function.
-@verbatim
-int MORSE_Desc_Create_User(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp,
-                           int mb, int nb, int bsiz, int lm, int ln,
-                           int i, int j, int m, int n, int p, int q,
-                           void* (*get_blkaddr)( const MORSE_desc_t*, int, int),
-                           int (*get_blkldd)( const MORSE_desc_t*, int ),
-                           int (*get_rankof)( const MORSE_desc_t*, int, int ));
-@end verbatim
-
-Destroys matrix descriptor.
-@verbatim
-int MORSE_Desc_Destroy (MORSE_desc_t **desc);
-@end verbatim
-
-Ensure that all data are up-to-date in main memory (even if some tasks have
-been processed on GPUs)
-@verbatim
-int MORSE_Desc_Flush(MORSE_desc_t  *desc, MORSE_sequence_t *sequence);
-@end verbatim
-
-@node Options routines
-@subsubsection Options routines
-
-@c /* Options */
-Enable MORSE feature.
-@verbatim
-int MORSE_Enable  (MORSE_enum option);
-@end verbatim
-Feature to be enabled:
-@itemize @bullet
-@item @code{MORSE_WARNINGS}:   printing of warning messages,
-@item @code{MORSE_ERRORS}:     printing of error messages,
-@item @code{MORSE_AUTOTUNING}: autotuning for tile size and inner block size,
-@item @code{MORSE_PROFILING_MODE}:  activate kernels profiling.
-@end itemize
-
-Disable MORSE feature.
-@verbatim
-int MORSE_Disable (MORSE_enum option);
-@end verbatim
-Symmetric to @code{MORSE_Enable}.
-
-Set MORSE parameter.
-@verbatim
-int MORSE_Set     (MORSE_enum param, int  value);
-@end verbatim
-Parameters to be set:
-@itemize @bullet
-@item @code{MORSE_TILE_SIZE}:        size matrix tile,
-@item @code{MORSE_INNER_BLOCK_SIZE}: size of tile inner block,
-@item @code{MORSE_HOUSEHOLDER_MODE}: type of householder trees (FLAT or TREE),
-@item @code{MORSE_HOUSEHOLDER_SIZE}: size of the groups in householder trees,
-@item @code{MORSE_TRANSLATION_MODE}: related to the
-@end itemize
-
-Get value of MORSE parameter.
-@verbatim
-int MORSE_Get     (MORSE_enum param, int *value);
-@end verbatim
-
-@node Sequences routines
-@subsubsection Sequences routines
-
-@c /* Sequences */
-Create a sequence.
-@verbatim
-int MORSE_Sequence_Create  (MORSE_sequence_t **sequence);
-@end verbatim
-
-Destroy a sequence.
-@verbatim
-int MORSE_Sequence_Destroy (MORSE_sequence_t *sequence);
-@end verbatim
-
-Wait for the completion of a sequence.
-@verbatim
-int MORSE_Sequence_Wait    (MORSE_sequence_t *sequence);
-@end verbatim
-
-@node Linear Algebra routines
-@subsubsection Linear Algebra routines
-
-Routines computing linear algebra of the form
-@code{MORSE_name[_Tile[_Async]]} (@code{name} follows LAPACK naming scheme, see
-@uref{http://www.netlib.org/lapack/lug/node24.html} availables:
-
-@verbatim
-/**
- *  Declarations of computational functions (LAPACK layout)
- **/
-int MORSE_zgelqf(int M, int N, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT);
-
-int MORSE_zgelqs(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgels(MORSE_enum trans, int M, int N, int NRHS,
-                MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT,
-                MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgemm(MORSE_enum transA, MORSE_enum transB, int M, int N, int K,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta,
-                MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zgeqrf(int M, int N, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT);
-
-int MORSE_zgeqrs(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgesv_incpiv(int N, int NRHS, MORSE_Complex64_t *A, int LDA,
-                       MORSE_desc_t *descL, int *IPIV,
-                       MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgesv_nopiv(int N, int NRHS, MORSE_Complex64_t *A, int LDA,
-                      MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgetrf_incpiv(int M, int N, MORSE_Complex64_t *A, int LDA,
-                        MORSE_desc_t *descL, int *IPIV);
-
-int MORSE_zgetrf_nopiv(int M, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zgetrs_incpiv(MORSE_enum trans, int N, int NRHS,
-                        MORSE_Complex64_t *A, int LDA,
-                        MORSE_desc_t *descL, int *IPIV,
-                        MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zgetrs_nopiv(MORSE_enum trans, int N, int NRHS,
-                       MORSE_Complex64_t *A, int LDA,
-                       MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zhemm(MORSE_enum side, MORSE_enum uplo, int M, int N,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta,
-                MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zherk(MORSE_enum uplo, MORSE_enum trans, int N, int K,
-                double alpha, MORSE_Complex64_t *A, int LDA,
-                double beta, MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zher2k(MORSE_enum uplo, MORSE_enum trans, int N, int K,
-                 MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                 MORSE_Complex64_t *B, int LDB, double beta,
-                 MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zlacpy(MORSE_enum uplo, int M, int N,
-                 MORSE_Complex64_t *A, int LDA,
-                 MORSE_Complex64_t *B, int LDB);
-
-double MORSE_zlange(MORSE_enum norm, int M, int N,
-                    MORSE_Complex64_t *A, int LDA);
-
-double MORSE_zlanhe(MORSE_enum norm, MORSE_enum uplo, int N,
-                    MORSE_Complex64_t *A, int LDA);
-
-double MORSE_zlansy(MORSE_enum norm, MORSE_enum uplo, int N,
-                    MORSE_Complex64_t *A, int LDA);
-
-double MORSE_zlantr(MORSE_enum norm, MORSE_enum uplo, MORSE_enum diag,
-                    int M, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zlaset(MORSE_enum uplo, int M, int N, MORSE_Complex64_t alpha,
-                 MORSE_Complex64_t beta, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zlauum(MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zplghe( double bump, MORSE_enum uplo, int N,
-                  MORSE_Complex64_t *A, int LDA,
-                  unsigned long long int seed );
-
-int MORSE_zplgsy( MORSE_Complex64_t bump, MORSE_enum uplo, int N,
-                  MORSE_Complex64_t *A, int LDA,
-                  unsigned long long int seed );
-
-int MORSE_zplrnt( int M, int N, MORSE_Complex64_t *A, int LDA,
-                  unsigned long long int seed );
-
-int MORSE_zposv(MORSE_enum uplo, int N, int NRHS,
-                MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zpotrf(MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zsytrf(MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zpotri(MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zpotrs(MORSE_enum uplo, int N, int NRHS,
-                 MORSE_Complex64_t *A, int LDA,
-                 MORSE_Complex64_t *B, int LDB);
-
-#if defined (PRECISION_c) || defined(PRECISION_z)
-int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS,
-                 MORSE_Complex64_t *A, int LDA,
-                 MORSE_Complex64_t *B, int LDB);
-#endif
-
-int MORSE_zsymm(MORSE_enum side, MORSE_enum uplo, int M, int N,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta,
-                MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC);
-
-int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K,
-                 MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                 MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta,
-                 MORSE_Complex64_t *C, int LDC);
-
-int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo,
-                MORSE_enum transA, MORSE_enum diag,
-                int N, int NRHS,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB);
-
-int MORSE_ztrsm(MORSE_enum side, MORSE_enum uplo,
-                MORSE_enum transA, MORSE_enum diag,
-                int N, int NRHS,
-                MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                MORSE_Complex64_t *B, int LDB);
-
-int MORSE_ztrsmpl(int N, int NRHS, MORSE_Complex64_t *A, int LDA,
-                  MORSE_desc_t *descL, int *IPIV,
-                  MORSE_Complex64_t *B, int LDB);
-
-int MORSE_ztrsmrv(MORSE_enum side, MORSE_enum uplo,
-                  MORSE_enum transA, MORSE_enum diag,
-                  int N, int NRHS,
-                  MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA,
-                  MORSE_Complex64_t *B, int LDB);
-
-int MORSE_ztrtri(MORSE_enum uplo, MORSE_enum diag, int N,
-                 MORSE_Complex64_t *A, int LDA);
-
-int MORSE_zunglq(int M, int N, int K, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zungqr(int M, int N, int K, MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT, MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zunmlq(MORSE_enum side, MORSE_enum trans, int M, int N, int K,
-                 MORSE_Complex64_t *A, int LDA,
-                 MORSE_desc_t *descT,
-                 MORSE_Complex64_t *B, int LDB);
-
-int MORSE_zunmqr(MORSE_enum side, MORSE_enum trans, int M, int N, int K,
-                 MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT,
-                 MORSE_Complex64_t *B, int LDB);
-
-/**
- *  Declarations of computational functions (tile layout)
- **/
-int MORSE_zgelqf_Tile(MORSE_desc_t *A, MORSE_desc_t *T);
-
-int MORSE_zgelqs_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B);
-
-int MORSE_zgels_Tile(MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *T,
-                     MORSE_desc_t *B);
-
-int MORSE_zgemm_Tile(MORSE_enum transA, MORSE_enum transB,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_desc_t *B, MORSE_Complex64_t beta,
-                     MORSE_desc_t *C);
-
-int MORSE_zgeqrf_Tile(MORSE_desc_t *A, MORSE_desc_t *T);
-
-int MORSE_zgeqrs_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B);
-
-int MORSE_zgesv_incpiv_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
-                            MORSE_desc_t *B);
-
-int MORSE_zgesv_nopiv_Tile(MORSE_desc_t *A, MORSE_desc_t *B);
-
-int MORSE_zgetrf_incpiv_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV);
-
-int MORSE_zgetrf_nopiv_Tile(MORSE_desc_t *A);
-
-int MORSE_zgetrs_incpiv_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
-                             MORSE_desc_t *B);
-
-int MORSE_zgetrs_nopiv_Tile(MORSE_desc_t *A, MORSE_desc_t *B);
-
-int MORSE_zhemm_Tile(MORSE_enum side, MORSE_enum uplo,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_desc_t *B, MORSE_Complex64_t beta,
-                     MORSE_desc_t *C);
-
-int MORSE_zherk_Tile(MORSE_enum uplo, MORSE_enum trans,
-                     double alpha, MORSE_desc_t *A,
-                     double beta, MORSE_desc_t *C);
-
-int MORSE_zher2k_Tile(MORSE_enum uplo, MORSE_enum trans,
-                      MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                      MORSE_desc_t *B, double beta, MORSE_desc_t *C);
-
-int MORSE_zlacpy_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
-
-double MORSE_zlange_Tile(MORSE_enum norm, MORSE_desc_t *A);
-
-double MORSE_zlanhe_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A);
-
-double MORSE_zlansy_Tile(MORSE_enum norm, MORSE_enum uplo, MORSE_desc_t *A);
-
-double MORSE_zlantr_Tile(MORSE_enum norm, MORSE_enum uplo,
-                         MORSE_enum diag, MORSE_desc_t *A);
-
-int MORSE_zlaset_Tile(MORSE_enum uplo, MORSE_Complex64_t alpha,
-                      MORSE_Complex64_t beta, MORSE_desc_t *A);
-
-int MORSE_zlauum_Tile(MORSE_enum uplo, MORSE_desc_t *A);
-
-int MORSE_zplghe_Tile(double bump, MORSE_enum uplo, MORSE_desc_t *A,
-                      unsigned long long int seed);
-
-int MORSE_zplgsy_Tile(MORSE_Complex64_t bump, MORSE_enum uplo, MORSE_desc_t *A,
-                      unsigned long long int seed );
-
-int MORSE_zplrnt_Tile(MORSE_desc_t *A, unsigned long long int seed );
-
-int MORSE_zposv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
-
-int MORSE_zpotrf_Tile(MORSE_enum uplo, MORSE_desc_t *A);
-
-int MORSE_zsytrf_Tile(MORSE_enum uplo, MORSE_desc_t *A);
-
-int MORSE_zpotri_Tile(MORSE_enum uplo, MORSE_desc_t *A);
-
-int MORSE_zpotrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
-
-#if defined (PRECISION_c) || defined(PRECISION_z)
-int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
-#endif
-
-int MORSE_zsymm_Tile(MORSE_enum side, MORSE_enum uplo,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_desc_t *B, MORSE_Complex64_t beta,
-                     MORSE_desc_t *C);
-
-int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_Complex64_t beta, MORSE_desc_t *C);
-
-int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans,
-                      MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                      MORSE_desc_t *B, MORSE_Complex64_t beta,
-                      MORSE_desc_t *C);
-
-int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo,
-                     MORSE_enum transA, MORSE_enum diag,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_desc_t *B);
-
-int MORSE_ztrsm_Tile(MORSE_enum side, MORSE_enum uplo,
-                     MORSE_enum transA, MORSE_enum diag,
-                     MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                     MORSE_desc_t *B);
-
-int MORSE_ztrsmpl_Tile(MORSE_desc_t *A, MORSE_desc_t *L,
-                       int *IPIV, MORSE_desc_t *B);
-
-int MORSE_ztrsmrv_Tile(MORSE_enum side, MORSE_enum uplo,
-                       MORSE_enum transA, MORSE_enum diag,
-                       MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                       MORSE_desc_t *B);
-
-int MORSE_ztrtri_Tile(MORSE_enum uplo, MORSE_enum diag, MORSE_desc_t *A);
-
-int MORSE_zunglq_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B);
-
-int MORSE_zungqr_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B);
-
-int MORSE_zunmlq_Tile(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A,
-                      MORSE_desc_t *T, MORSE_desc_t *B);
-
-int MORSE_zunmqr_Tile(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A,
-                      MORSE_desc_t *T, MORSE_desc_t *B);
-
-/**
- *  Declarations of computational functions
- *  (tile layout, asynchronous execution)
- **/
-int MORSE_zgelqf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zgelqs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zgels_Tile_Async(MORSE_enum trans, MORSE_desc_t *A,
-                           MORSE_desc_t *T, MORSE_desc_t *B,
-                           MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zgemm_Tile_Async(MORSE_enum transA, MORSE_enum transB,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_desc_t *B, MORSE_Complex64_t beta,
-                           MORSE_desc_t *C, MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zgeqrf_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request)
-
-int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zgesv_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L,
-                                  int *IPIV, MORSE_desc_t *B,
-                                  MORSE_sequence_t *sequence,
-                                  MORSE_request_t *request);
-
-int MORSE_zgesv_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *B,
-                                 MORSE_sequence_t *sequence,
-                                 MORSE_request_t *request);
-
-int MORSE_zgetrf_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L,
-                                   int *IPIV, MORSE_sequence_t *sequence,
-                                   MORSE_request_t *request);
-
-int MORSE_zgetrf_nopiv_Tile_Async(MORSE_desc_t *A,
-                                  MORSE_sequence_t *sequence,
-                                  MORSE_request_t *request);
-
-int MORSE_zgetrs_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L,
-                                   int *IPIV, MORSE_desc_t *B,
-                                   MORSE_sequence_t *sequence,
-                                   MORSE_request_t *request);
-
-int MORSE_zgetrs_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *B,
-                                  MORSE_sequence_t *sequence,
-                                  MORSE_request_t *request);
-
-int MORSE_zhemm_Tile_Async(MORSE_enum side, MORSE_enum uplo,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_desc_t *B, MORSE_Complex64_t beta,
-                           MORSE_desc_t *C, MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zherk_Tile_Async(MORSE_enum uplo, MORSE_enum trans,
-                           double alpha, MORSE_desc_t *A,
-                           double beta, MORSE_desc_t *C,
-                           MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zher2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans,
-                            MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                            MORSE_desc_t *B, double beta, MORSE_desc_t *C,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlacpy_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlange_Tile_Async(MORSE_enum norm, MORSE_desc_t *A, double *value,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlanhe_Tile_Async(MORSE_enum norm, MORSE_enum uplo,
-                            MORSE_desc_t *A, double *value,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlansy_Tile_Async(MORSE_enum norm, MORSE_enum uplo,
-                            MORSE_desc_t *A, double *value,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlantr_Tile_Async(MORSE_enum norm, MORSE_enum uplo,
-                            MORSE_enum diag, MORSE_desc_t *A, double *value,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlaset_Tile_Async(MORSE_enum uplo, MORSE_Complex64_t alpha,
-                            MORSE_Complex64_t beta, MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zlauum_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zplghe_Tile_Async(double bump, MORSE_enum uplo, MORSE_desc_t *A,
-                            unsigned long long int seed,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request );
-
-int MORSE_zplgsy_Tile_Async(MORSE_Complex64_t bump, MORSE_enum uplo, MORSE_desc_t *A,
-                            unsigned long long int seed,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request );
-
-int MORSE_zplrnt_Tile_Async(MORSE_desc_t *A, unsigned long long int seed,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request );
-
-int MORSE_zposv_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                           MORSE_desc_t *B,
-                           MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zpotrf_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zsytrf_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zpotri_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zpotrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-#if defined (PRECISION_c) || defined(PRECISION_z)
-int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A,
-                            MORSE_desc_t *B,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-#endif
-
-int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_desc_t *B, MORSE_Complex64_t beta,
-                           MORSE_desc_t *C, MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_Complex64_t beta, MORSE_desc_t *C,
-                           MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans,
-                            MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                            MORSE_desc_t *B, MORSE_Complex64_t beta,
-                            MORSE_desc_t *C, MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo,
-                           MORSE_enum transA, MORSE_enum diag,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_ztrsm_Tile_Async(MORSE_enum side, MORSE_enum uplo,
-                           MORSE_enum transA, MORSE_enum diag,
-                           MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                           MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                           MORSE_request_t *request);
-
-int MORSE_ztrsmpl_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
-                             MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                             MORSE_request_t *request);
-
-int MORSE_ztrsmrv_Tile_Async(MORSE_enum side, MORSE_enum uplo,
-                             MORSE_enum transA, MORSE_enum diag,
-                             MORSE_Complex64_t alpha, MORSE_desc_t *A,
-                             MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                             MORSE_request_t *request);
-
-int MORSE_ztrtri_Tile_Async(MORSE_enum uplo, MORSE_enum diag,
-                            MORSE_desc_t *A,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zunglq_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zungqr_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B,
-                            MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zunmlq_Tile_Async(MORSE_enum side, MORSE_enum trans,
-                            MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans,
-                            MORSE_desc_t *A, MORSE_desc_t *T,
-                            MORSE_desc_t *B, MORSE_sequence_t *sequence,
-                            MORSE_request_t *request);
-
-@end verbatim
-
-@c -nofor_main
diff --git a/doc/texinfo/figures/morse_header.png b/doc/texinfo/figures/morse_header.png
deleted file mode 100644
index ada315a235dfd4ee4a35064e13ae0d680b480059..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/morse_header.png and /dev/null differ
diff --git a/doc/texinfo/figures/potri_async.png b/doc/texinfo/figures/potri_async.png
deleted file mode 100644
index 85ebe6ad9af3db6070cd898323400a8a584b7583..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/potri_async.png and /dev/null differ
diff --git a/doc/texinfo/figures/tile_layout.jpg b/doc/texinfo/figures/tile_layout.jpg
deleted file mode 100644
index 16a44b08afab7de2c15a75f200baf210c7fe6d3e..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/tile_layout.jpg and /dev/null differ
diff --git a/doc/texinfo/figures/tile_layout.pdf b/doc/texinfo/figures/tile_layout.pdf
deleted file mode 100644
index f5df80dbe06de18346c1df6c14a20c6e1c24edd1..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/tile_layout.pdf and /dev/null differ
diff --git a/doc/texinfo/figures/tile_lu.jpg b/doc/texinfo/figures/tile_lu.jpg
deleted file mode 100644
index 9da660ab607fae57cec55eb3c8ddc0512ea7fd62..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/tile_lu.jpg and /dev/null differ
diff --git a/doc/texinfo/figures/tile_lu.pdf b/doc/texinfo/figures/tile_lu.pdf
deleted file mode 100644
index c9b6df65197c83449c6335ebb1da393d92cd683f..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/tile_lu.pdf and /dev/null differ
diff --git a/doc/texinfo/figures/trace_qr.jpg b/doc/texinfo/figures/trace_qr.jpg
deleted file mode 100644
index 92504d096fe829e3a0d9f2a296262c00cef3e792..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/trace_qr.jpg and /dev/null differ
diff --git a/doc/texinfo/figures/trace_qr.pdf b/doc/texinfo/figures/trace_qr.pdf
deleted file mode 100644
index e030af5e252dd26828aa156e17c6b1d240a534db..0000000000000000000000000000000000000000
Binary files a/doc/texinfo/figures/trace_qr.pdf and /dev/null differ
diff --git a/doc/texinfo/morse.css b/doc/texinfo/morse.css
deleted file mode 100644
index 41adb20078f7e5b0af0af434fa51bdb361af022f..0000000000000000000000000000000000000000
--- a/doc/texinfo/morse.css
+++ /dev/null
@@ -1,72 +0,0 @@
-body {
-  padding: 2em 1em 2em 70px;
-  margin: 0;
-  font-family: sans-serif;
-  color: black;
-  background: white;
-  background-position: top left;
-  background-attachment: fixed;
-  background-repeat: no-repeat;
-}
-:link { color: #00C; background: transparent }
-:visited { color: #609; background: transparent }
-a:active { color: #C00; background: transparent }
-
-a:link img, a:visited img { border-style: none }
-
-a img { color: white; }
-@media all {
-  a img { color: inherit; }
-}
-
-th, td {
-  font-family: sans-serif;
-}
-
-h1, h2, h3, h4, h5, h6 { text-align: left }
-h1, h2, h3 { color: #005A9C; background: white }
-h1 { font: 170% sans-serif }
-h2 { font: 140% sans-serif }
-h3 { font: 120% sans-serif }
-h4 { font: bold 100% sans-serif }
-h5 { font: italic 100% sans-serif }
-h6 { font: small-caps 100% sans-serif }
-
-.hide { display: none }
-
-div.head { margin-bottom: 1em }
-div.head h1 { margin-top: 2em; clear: both }
-div.head table { margin-left: 2em; margin-top: 2em }
-
-p.copyright { font-size: small }
-p.copyright small { font-size: small }
-
-@media screen {
-a[href]:hover { background: #ffa }
-}
-
-pre { margin-left: 2em }
-
-dt, dd { margin-top: 0; margin-bottom: 0 }
-dt { font-weight: bold }
-
-pre, code { font-family: monospace }
-
-ul.toc, ol.toc {
-  list-style: disc;
-  list-style: none;
-}
-
-@media aural {  
-  h1, h2, h3 { stress: 20; richness: 90 }
-  .hide { speak: none }
-  p.copyright { volume: x-soft; speech-rate: x-fast }
-  dt { pause-before: 20% }
-  pre { speak-punctuation: code } 
-}
-
-/*
-body {
-  background-image: url();
-}
-*/
diff --git a/doc/texinfo/users_guide.texi.in b/doc/texinfo/users_guide.texi.in
deleted file mode 100644
index 79051a895a58462647682e37bd8986eb5d544ee2..0000000000000000000000000000000000000000
--- a/doc/texinfo/users_guide.texi.in
+++ /dev/null
@@ -1,150 +0,0 @@
-\input texinfo   @c -*-texinfo-*-
-
-@c %**start of header
-@setfilename users_guide.info
-@settitle CHAMELEON User's Guide
-@c %**end of header
-
-@include version.texi
-
-@c #############################################################################
-
-@copying
-Copyright @copyright{} 2017 Inria
-
-@noindent
-Copyright @copyright{} 2014 The University of Tennessee
-
-@noindent
-Copyright @copyright{} 2014 King Abdullah University of Science and Technology
-
-@quotation
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-@itemize @bullet
-@item
-Redistributions of source code must retain the above copyright notice, this
-list
-of conditions and the following disclaimer.
-
-@item
-Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer listed in this license in the
-documentation and/or other materials provided with the distribution.
-
-@item
-Neither the name of the copyright holders nor the names of its contributors may
-be used to endorse or promote products derived from this software without
-specific prior written permission.
-@end itemize
-
-This software is provided by the copyright holders and contributors "as is" and
-any express or implied warranties, including, but not limited to, the implied
-warranties of merchantability and fitness for a particular purpose are
-disclaimed.
-In no event shall the copyright owner or contributors be liable for any direct,
-indirect, incidental, special, exemplary, or consequential damages (including,
-but not limited to, procurement of substitute goods or services; loss of use,
-data, or profits; or business interruption) however caused and on any theory of
-liability, whether in contract, strict liability, or tort (including negligence
-or otherwise) arising in any way out of the use of this software, even if
-advised of the possibility of such damage.
-@end quotation
-@end copying
-
-@c #############################################################################
-
-@titlepage
-@c @flushleft
-@c @image{morse_header}
-@c @end flushleft
-@title CHAMELEON User's Guide
-@subtitle Software of MORSE project
-
-@flushright
-@strong{A dense linear algebra software for heterogeneous architectures}
-@strong{Version @value{VERSION}}
-
-@strong{Inria}
-@strong{University of Tennessee}
-@strong{University of Colorado Denver}
-@strong{King Abdullah University of Science and Technology}
-
-@end flushright
-
-@page
-@vskip 0pt plus 1filll
-
-@insertcopying
-
-@end titlepage
-
-@c #############################################################################
-
-@setchapternewpage odd
-@dircategory Development
-@direntry
-* CHAMELEON: (chameleon).             CHAMELEON User's Guide
-@end direntry
-
-
-@c @summarycontents
-@contents
-@page
-
-@ifnottex
-@node Top
-@top Preface
-
-This manual documents the usage of CHAMELEON version @value{VERSION}.
-It was last updated on @value{UPDATED}.
-
-@insertcopying
-@end ifnottex
-
-@comment
-@comment  When you add a new menu item, please keep the right hand
-@comment  aligned to the same column.  Do not use tabs.  This provides
-@comment  better formatting.
-@comment
-@menu
-* Introduction::                Getting started
-* Installing CHAMELEON::      How to configure, build and install CHAMELEON
-* Configuring CHAMELEON::     How to configure CHAMELEON
-* Using CHAMELEON::           How to run CHAMELEON application
-@end menu
-
-@c ---------------------------------------------------------------------
-@c Introduction to CHAMELEON
-@c ---------------------------------------------------------------------
-
-@node Introduction
-@chapter Introduction to CHAMELEON
-@include @CMAKE_CURRENT_SOURCE_DIR@/chapters/introduction.texi
-
-@c ---------------------------------------------------------------------
-@c Installing CHAMELEON
-@c ---------------------------------------------------------------------
-
-@node Installing CHAMELEON
-@chapter Installing CHAMELEON
-@include @CMAKE_CURRENT_SOURCE_DIR@/chapters/installing.texi
-
-@c ---------------------------------------------------------------------
-@c Configuration options
-@c ---------------------------------------------------------------------
-
-@node Configuring CHAMELEON
-@chapter Configuring CHAMELEON
-@include @CMAKE_CURRENT_SOURCE_DIR@/chapters/configuration.texi
-
-@c ---------------------------------------------------------------------
-@c Using CHAMELEON
-@c ---------------------------------------------------------------------
-
-@node Using CHAMELEON
-@chapter Using CHAMELEON
-@include @CMAKE_CURRENT_SOURCE_DIR@/chapters/using.texi
-
-
-@bye
diff --git a/doc/texinfo/version.texi.in b/doc/texinfo/version.texi.in
deleted file mode 100644
index 4af718ef2db0e49e2646f7aa0f5bd3630dd0577f..0000000000000000000000000000000000000000
--- a/doc/texinfo/version.texi.in
+++ /dev/null
@@ -1,4 +0,0 @@
-@set UPDATED 30 January 2017
-@set UPDATED-MONTH January 2017
-@set EDITION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_MICRO@
-@set VERSION @CHAMELEON_VERSION_MAJOR@.@CHAMELEON_VERSION_MINOR@.@CHAMELEON_VERSION_MICRO@