Commit 0a1d5b29 authored by Mathieu Faverge's avatar Mathieu Faverge

Remove references to texinfo

parent bd9b8f15
......@@ -31,11 +31,11 @@ developers must follow and that should be read by contributors.
*** Prerequisites
To generate the documentation you need to have [[http://www.stack.nl/~dimitri/doxygen/][Doxygen]] and
[[https://www.gnu.org/software/texinfo/][Texinfo]] installed on your system.
[[https://orgmode.org/][org-mode]] installed on your system.
For example, on Debian systems:
#+begin_src sh
sudo apt install doxygen texinfo texlive texlive-latex-extra emacs
sudo apt install doxygen org-mode texlive texlive-latex-extra emacs
#+end_src
*** configure + make documentation
......
......@@ -35,7 +35,6 @@ cmake_minimum_required(VERSION 2.8)
#############################################
add_subdirectory(doxygen)
add_subdirectory(orgmode)
#add_subdirectory(texinfo)
###
### END CMakeLists.txt
###
###
#
# @file CMakeLists.txt
#
# @copyright 2009-2014 The University of Tennessee and The University of
# Tennessee Research Foundation. All rights reserved.
# @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
#
# @project MORSE
# MORSE is a software package provided by:
# Inria Bordeaux - Sud-Ouest,
# Univ. of Tennessee,
# King Abdullah Univesity of Science and Technology
# Univ. of California Berkeley,
# Univ. of Colorado Denver.
#
# @version 1.0.0
# @author Cedric Castagnede
# @author Emmanuel Agullo
# @author Mathieu Faverge
# @author Florent Pruvost
# @date 2012-07-13
#
###
cmake_minimum_required(VERSION 2.8)
# Create file version.texi
# ------------------------
configure_file("version.texi.in"
"version.texi"
@ONLY)
configure_file("users_guide.texi.in"
"users_guide.texi"
@ONLY)
set(FIGURES
tile_lu.pdf
tile_lu.jpg
tile_layout.pdf
tile_layout.jpg
trace_qr.pdf
trace_qr.jpg
potri_async.png
morse_header.png
)
set(FIGURES_HTML
tile_lu.jpg
tile_layout.jpg
trace_qr.jpg
potri_async.png
morse_header.png
)
foreach(_fig ${FIGURES})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/figures/${_fig}
${CMAKE_CURRENT_BINARY_DIR}/${_fig}
COPYONLY)
endforeach()
# Looking which version we can compile
# ------------------------------------
FIND_PROGRAM(MAKEINFO_COMPILER makeinfo)
FIND_PROGRAM(TEXI2DVI_COMPILER texi2dvi)
FIND_PROGRAM(TEX_COMPILER tex)
FIND_PROGRAM(DOT_COMPILER dot)
# Looking for makeinfo
# --------------------
if(MAKEINFO_COMPILER)
# Add target
# ----------
add_custom_command(OUTPUT users_guide.info
COMMAND ${MAKEINFO_COMPILER}
ARGS users_guide.texi
DEPENDS users_guide.texi.in
)
add_custom_command(OUTPUT users_guide.html
COMMAND ${MAKEINFO_COMPILER}
ARGS --html
--no-split
--css-include=${CMAKE_CURRENT_SOURCE_DIR}/morse.css
users_guide.texi
DEPENDS users_guide.texi.in
)
add_custom_target(doc-info-users_guide ALL DEPENDS users_guide.info)
add_custom_target(doc-html-users_guide ALL DEPENDS users_guide.html)
# Installation
# ------------
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.info
DESTINATION share/chameleon/info)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.html
DESTINATION share/chameleon/html)
foreach(_fig ${FIGURES_HTML})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${_fig}
DESTINATION share/chameleon/html)
endforeach()
else(MAKEINFO_COMPILER)
message(STATUS "Looking for makeinfo - not found")
endif(MAKEINFO_COMPILER)
# Looking for texi2dvi
# --------------------
if(TEXI2DVI_COMPILER AND TEX_COMPILER)
# Add target
# ----------
add_custom_command(OUTPUT users_guide.pdf
COMMAND ${TEXI2DVI_COMPILER}
ARGS --pdf
--batch
users_guide.texi
DEPENDS users_guide.texi.in
)
add_custom_target(doc-pdf-users_guide ALL DEPENDS users_guide.pdf)
# Installation
# ------------
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/users_guide.pdf
DESTINATION share/chameleon/pdf)
else()
message(STATUS "Looking for texi2dvi - not found")
endif()
###
### END CMakeLists.txt
###
@c -*-texinfo-*-
@c This file is part of the MORSE Handbook.
@c Copyright (C) 2017 Inria
@c Copyright (C) 2014 The University of Tennessee
@c Copyright (C) 2014 King Abdullah University of Science and Technology
@c See the file ../chameleon.texi for copying conditions.
@menu
* Compilation configuration::
* Dependencies detection::
@c * Dependencies compilation::
* Use FxT profiling through StarPU::
* Use simulation mode with StarPU-SimGrid::
* Use out of core support with StarPU::
@end menu
@c @code{} @option{}
@c @table @code
@c @item truc
@c @item muche
@c @item et zut
@c @c @end table
@node Compilation configuration
@section Compilation configuration
The following arguments can be given to the @command{cmake <path to source
directory>} script.
In this chapter, the following convention is used:
@itemize @bullet
@item
@option{path} is a path in your filesystem,
@item
@option{var} is a string and the correct value or an example will be given,
@item
@option{trigger} is an CMake option and the correct value is @code{ON} or
@code{OFF}.
@end itemize
Using CMake there are several ways to give options:
@enumerate
@item directly as CMake command line arguments
@item invoque @command{cmake <path to source directory>} once and then use
@command{ccmake <path to source directory>} to edit options through a
minimalist gui (required
@samp{cmake-curses-gui} installed on a Linux system)
@item invoque @command{cmake-gui} command and fill information about the
location of the sources and where to build the project, then you have
access to options through a user-friendly Qt interface (required
@samp{cmake-qt-gui} installed on a Linux system)
@end enumerate
Example of configuration using the command line
@example
cmake ~/chameleon/ -DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=~/install \
-DCHAMELEON_USE_CUDA=ON \
-DCHAMELEON_USE_MPI=ON \
-DBLA_VENDOR=Intel10_64lp \
-DSTARPU_DIR=~/install/starpu-1.1 \
-DCHAMELEON_ENABLE_TRACING=ON
@end example
You can get the full list of options with @option{-L[A][H]} options of
@command{cmake} command:
@example
cmake -LH <path to source directory>
@end example
@menu
* General CMake options::
* CHAMELEON options::
@end menu
@node General CMake options
@subsection General CMake options
@table @code
@item -DCMAKE_INSTALL_PREFIX=@option{path} (default:@option{path=/usr/local})
Install directory used by @code{make install} where some headers and libraries
will be copied.
Permissions have to be granted to write onto @option{path} during @code{make
install} step.
@item -DCMAKE_BUILD_TYPE=@option{var} (default: @option{Release})
Define the build type and the compiler optimization level.
The possible values for @option{var} are:
@table @code
@item empty
@item Debug
@item Release
@item RelWithDebInfo
@item MinSizeRel
@end table
@item -DBUILD_SHARED_LIBS=@option{trigger} (default:@option{OFF})
Indicate wether or not CMake has to build CHAMELEON static (@option{OFF}) or
shared (@option{ON}) libraries.
@end table
@node CHAMELEON options
@subsection CHAMELEON options
List of CHAMELEON options that can be enabled/disabled (value=@code{ON}
or @code{OFF}):
@table @code
@item @option{-DCHAMELEON_SCHED_STARPU}=@option{trigger} (default: @code{ON})
to link with StarPU library (runtime system)
@item @option{-DCHAMELEON_SCHED_QUARK}=@option{trigger} (default: @code{OFF})
to link with QUARK library (runtime system)
@item @option{-DCHAMELEON_USE_CUDA}=@option{trigger} (default: @code{OFF})
to link with CUDA runtime (implementation paradigm for accelerated codes on
GPUs) and cuBLAS library (optimized BLAS kernels on GPUs), can only be used with
StarPU
@item @option{-DCHAMELEON_USE_MPI}=@option{trigger} (default: @code{OFF})
to link with MPI library (message passing implementation for use of multiple
nodes with distributed memory), can only be used with StarPU
@item @option{-DCHAMELEON_ENABLE_TRACING}=@option{trigger} (default: @code{OFF})
to enable trace generation during execution of timing drivers.
It requires StarPU to be linked with FxT library (trace execution of kernels on workers).
@item @option{-DCHAMELEON_SIMULATION=trigger} (default: @code{OFF})
to enable simulation mode, means CHAMELEON will not really execute tasks,
see details in section @ref{Use simulation mode with StarPU-SimGrid}.
This option must be used with StarPU compiled with
@uref{http://simgrid.gforge.inria.fr/, SimGrid} allowing to guess the
execution time on any architecture.
This feature should be used to make experiments on the scheduler behaviors and
performances not to produce solutions of linear systems.
@item @option{-DCHAMELEON_ENABLE_DOCS=trigger} (default: @code{ON})
@item @option{-DCHAMELEON_ENABLE_EXAMPLE=trigger} (default: @code{ON})
to control build of the examples executables (API usage)
@item @option{-DCHAMELEON_ENABLE_TESTING=trigger} (default: @code{ON})
to control build of testing executables (numerical check) contained in
@item @option{-DCHAMELEON_ENABLE_TIMING=trigger} (default: @code{ON})
to control build of timing executables (performances check) contained in
@item @option{-DCHAMELEON_PREC_S=trigger} (default: @code{ON})
to enable the support of simple arithmetic precision (float in C)
@item @option{-DCHAMELEON_PREC_D=trigger} (default: @code{ON})
to enable the support of double arithmetic precision (double in C)
@item @option{-DCHAMELEON_PREC_C=trigger} (default: @code{ON})
to enable the support of complex arithmetic precision (complex in C)
@item @option{-DCHAMELEON_PREC_Z=trigger} (default: @code{ON})
to enable the support of double complex arithmetic precision (double complex
in C)
@item @option{-DBLAS_VERBOSE=trigger} (default: @code{OFF})
to make BLAS library discovery verbose
@item @option{-DLAPACK_VERBOSE=trigger} (default: @code{OFF})
to make LAPACK library discovery verbose (automatically enabled if
@option{BLAS_VERBOSE=@code{ON}})
@end table
List of CHAMELEON options that needs a specific value:
@table @code
@item @option{-DBLA_VENDOR=@option{var}} (default: @option{empty})
The possible values for @option{var} are:
@table @code
@item empty
@item all
@item Intel10_64lp
@item Intel10_64lp_seq
@item ACML
@item Apple
@item Generic
@item ...
@end table
to force CMake to find a specific BLAS library, see the full list of BLA_VENDOR
By default @option{BLA_VENDOR} is empty so that CMake tries to detect all
possible BLAS vendor with a preference for Intel MKL.
@end table
List of CHAMELEON options which requires to give a path:
@table @code
@item @option{-DLIBNAME_DIR=@option{path}} (default: empty)
root directory of the LIBNAME library installation
@item @option{-DLIBNAME_INCDIR=@option{path}} (default: empty)
directory of the LIBNAME library headers installation
@item @option{-DLIBNAME_LIBDIR=@option{path}} (default: empty)
directory of the LIBNAME libraries (.so, .a, .dylib, etc) installation
@end table
LIBNAME can be one of the following: BLAS - CBLAS - FXT - HWLOC -
LAPACK - LAPACKE - QUARK - STARPU - TMG.
See paragraph about @ref{Dependencies detection} for details.
Libraries detected with an official CMake module (see module files in
@itemize @bullet
@item CUDA
@item MPI
@item Threads
@end itemize
Libraries detected with CHAMELEON cmake modules (see module files in
@itemize @bullet
@item BLAS
@item CBLAS
@item FXT
@item HWLOC
@item LAPACK
@item LAPACKE
@item QUARK
@item STARPU
@item TMG
@end itemize
@node Dependencies detection
@section Dependencies detection
You have different choices to detect dependencies on your system, either by
setting some environment variables containing paths to the libs and headers or
by specifying them directly at cmake configure.
Different cases :
@enumerate
@item detection of dependencies through environment variables:
@itemize @bullet
@item @env{LD_LIBRARY_PATH} environment variable should contain the list of
paths
where to find the libraries:
@example
export @env{LD_LIBRARY_PATH}=$@env{LD_LIBRARY_PATH}:path/to/your/libs
@end example
@item @env{INCLUDE} environment variable should contain the list of paths
where to find the header files of libraries
@example
export @env{INCLUDE}=$@env{INCLUDE}:path/to/your/headers
@end example
@end itemize
@item detection with user's given paths:
@itemize @bullet
@item you can specify the path at cmake configure by invoking
@example
cmake <path to SOURCE_DIR> -DLIBNAME_DIR=path/to/your/lib
@end example
where LIB stands for the name of the lib to look for, example
@example
cmake <path to SOURCE_DIR> -DSTARPU_DIR=path/to/starpudir \
-DCBLAS_DIR= ...
@end example
@item it is also possible to specify headers and library directories
separately, example
@example
cmake <path to SOURCE_DIR> \
-DSTARPU_INCDIR=path/to/libstarpu/include/starpu/1.1 \
-DSTARPU_LIBDIR=path/to/libstarpu/lib
@end example
@item Note BLAS and LAPACK detection can be tedious so that we provide a
verbose mode. Use @option{-DBLAS_VERBOSE=ON} or @option{-DLAPACK_VERBOSE=ON} to
enable it.
@end itemize
@end enumerate
@c @node Dependencies compilation
@c @section Dependencies compilation
@node Use FxT profiling through StarPU
@section Use FxT profiling through StarPU
StarPU can generate its own trace log files by compiling it with the
@option{--with-fxt}
option at the configure step (you can have to specify the directory where you
installed FxT by giving @option{--with-fxt=...} instead of @option{--with-fxt}
alone).
By doing so, traces are generated after each execution of a program which uses
StarPU in the directory pointed by the @env{STARPU_FXT_PREFIX} environment
variable. Example:
@example
export @env{STARPU_FXT_PREFIX}=/home/yourname/fxt_files/
@end example
When executing a @command{./timing/...} CHAMELEON program, if it has been
enabled (StarPU compiled with FxT and @option{-DCHAMELEON_ENABLE_TRACING=ON}), you
can give the option @option{--trace} to tell the program to generate trace log
files.
Finally, to generate the trace file which can be opened with
@uref{http://vite.gforge.inria.fr/, Vite} program, you have to use the
@command{starpu_fxt_tool} executable of StarPU.
You can use it to generate the trace file like this:
@itemize @bullet
@item @command{path/to/your/install/starpu/bin/starpu_fxt_tool -i prof_filename}
There is one file per mpi processus (prof_filename_0, prof_filename_1 ...).
To generate a trace of mpi programs you can call it like this:
@item @command{path/to/your/install/starpu/bin/starpu_fxt_tool -i
prof_filename*}
The trace file will be named paje.trace (use -o option to specify an output
name).
@end itemize
Alternatively, one can also generate directly .paje trace files after the execution
by setting @env{STARPU_GENERATE_TRACE=1}.
@node Use simulation mode with StarPU-SimGrid
@section Use simulation mode with StarPU-SimGrid
Simulation mode can be enabled by setting the cmake option
@option{-DCHAMELEON_SIMULATION=ON}.
This mode allows you to simulate execution of algorithms with StarPU compiled
with @uref{http://simgrid.gforge.inria.fr/, SimGrid}.
directory of CHAMELEON sources.
To use these perfmodels, please set the following
@itemize @bullet
@item @env{STARPU_HOME} environment variable to:
@example
@code{<path to SOURCE_DIR>/simucore/perfmodels}
@end example
@item @env{STARPU_HOSTNAME} environment variable to the name of the machine to
simulate. For example, on our platform (PlaFRIM) with GPUs at Inria Bordeaux
@example
@env{STARPU_HOSTNAME}=mirage
@end example
Note that only POTRF kernels with block sizes of 320 or 960 (simple and double
precision) on mirage machine are available for now.
Database of models is subject to change, it should be enrich in a near future.
@end itemize
@node Use out of core support with StarPU
@section Use out of core support with StarPU
If the matrix can not fit in the main memory, StarPU can automatically evict
tiles to the disk. The descriptors for the matrices which can not fit in the
main memory need to be created with @code{MORSE_Desc_Create_OOC}, so that MORSE
does not force StarPU to keep it in the main memory.
The following variables then need to be set:
@itemize @bullet
@item @env{STARPU_DISK_SWAP} environment variable to a place where to store
evicted tiles, for example:
@example
@env{STARPU_DISK_SWAP}=/tmp
@end example
@item @env{STARPU_DISK_SWAP_BACKEND} environment variable to the I/O method,
for example:
@example
@env{STARPU_DISK_SWAP_BACKEND}=unistd_o_direct
@end example
This will create a hierarchy of directory to store one file per tile. If that
poses problems, one can use the hdf5 I/O method which stores all tiles in a
single file.
@item @env{STARPU_LIMIT_CPU_MEM} environment variable to the amount of memory
that can be used in MBytes, for example:
@example
@env{STARPU_LIMIT_CPU_MEM}=1000
@end example
@end itemize
@c -*-texinfo-*-
@c This file is part of the CHAMELEON Handbook.
@c Copyright (C) 2017 Inria
@c Copyright (C) 2014 The University of Tennessee
@c Copyright (C) 2014 King Abdullah University of Science and Technology
@c See the file ../chameleon.texi for copying conditions.
@menu
* Downloading CHAMELEON::
* Build process of CHAMELEON::
@end menu
CHAMELEON can be built and installed by the standard means of CMake
(@uref{http://www.cmake.org/}).
General information about CMake, as well as installation binaries and CMake
source code are available from
@uref{http://www.cmake.org/cmake/resources/software.html}.
The following chapter is intended to briefly remind how these tools can be used
to install CHAMELEON.
@node Downloading CHAMELEON
@section Downloading CHAMELEON
@menu
* Getting Sources::
* Required dependencies::
* Optional dependencies::
@end menu
@node Getting Sources
@subsection Getting Sources
The latest official release tarballs of CHAMELEON sources are available for
download from
@uref{https://gforge.inria.fr/frs/download.php/file/34884/chameleon-0.9.1.tar.gz, chameleon-0.9.1}.
The latest development snapshot is available on gitlab:
@uref{https://gitlab.inria.fr/solverstack/chameleon}
@node Required dependencies
@subsection Required dependencies
@menu
* a BLAS implementation::
* CBLAS::
* a LAPACK implementation::
* LAPACKE::
* libtmg::
* QUARK::
* StarPU::
* hwloc::
* pthread::
@end menu
@node a BLAS implementation
@subsubsection a BLAS implementation
@uref{http://www.netlib.org/blas/, BLAS} (Basic Linear Algebra Subprograms),
are a de facto standard for basic linear algebra operations such as vector and
matrix multiplication.
FORTRAN implementation of BLAS is available from Netlib.
Also, C implementation of BLAS is included in GSL (GNU Scientific Library).
Both these implementations are reference implementation of BLAS, are not
optimized for modern processor architectures and provide an order of magnitude
lower performance than optimized implementations.
Highly optimized implementations of BLAS are available from many hardware
vendors, such as Intel MKL, IBM ESSL and AMD ACML.
Fast implementations are also available as academic packages, such as ATLAS and
OpenBLAS.
The standard interface to BLAS is the FORTRAN interface.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the reference BLAS from NETLIB, OpenBLAS and Intel MKL.
@node CBLAS
@subsubsection CBLAS
@uref{http://www.netlib.org/blas/#_cblas, CBLAS} is a C language interface to
BLAS.
Most commercial and academic implementations of BLAS also provide CBLAS.
Netlib provides a reference implementation of CBLAS on top of FORTRAN BLAS
(Netlib CBLAS).
Since GSL is implemented in C, it naturally provides CBLAS.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the reference CBLAS from NETLIB, OpenBLAS and Intel MKL.
@node a LAPACK implementation
@subsubsection a LAPACK implementation
@uref{http://www.netlib.org/lapack/, LAPACK} (Linear Algebra PACKage) is a
software library for numerical linear algebra, a successor of LINPACK and
EISPACK and a predecessor of CHAMELEON.
LAPACK provides routines for solving linear systems of equations, linear least
square problems, eigenvalue problems and singular value problems.
Most commercial and academic BLAS packages also provide some LAPACK routines.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the reference LAPACK from NETLIB, OpenBLAS and Intel MKL.
@node LAPACKE
@subsubsection LAPACKE
@uref{http://www.netlib.org/lapack/, LAPACKE} is a C language interface to
LAPACK (or CLAPACK).
It is produced by Intel in coordination with the LAPACK team and is available
in source code from Netlib in its original version (Netlib LAPACKE) and from
CHAMELEON website in an extended version (LAPACKE for CHAMELEON).
In addition to implementing the C interface, LAPACKE also provides routines
which automatically handle workspace allocation, making the use of LAPACK much
more convenient.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the reference LAPACKE from NETLIB, OpenBLAS and Intel MKL.
@node libtmg
@subsubsection libtmg
@uref{http://www.netlib.org/lapack/, libtmg} is a component of the LAPACK
library, containing routines for generation
of input matrices for testing and timing of LAPACK.
The testing and timing suites of LAPACK require libtmg, but not the library
itself. Note that the LAPACK library can be built and used without libtmg.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the reference TMGLIB from NETLIB, OpenBLAS and Intel MKL.
@node QUARK
@subsubsection QUARK
@uref{http://icl.cs.utk.edu/quark/, QUARK} (QUeuing And Runtime for Kernels)
provides a library that enables the dynamic execution of tasks with data
dependencies in a multi-core, multi-socket, shared-memory environment.
One of QUARK or StarPU Runtime systems has to be enabled in order to schedule
tasks on the architecture.
If QUARK is enabled then StarPU is disabled and conversely.
Note StarPU is enabled by default.
When CHAMELEON is linked with QUARK, it is not possible to exploit neither
CUDA (for GPUs) nor MPI (distributed-memory environment).
You can use StarPU to do so.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
the QUARK library 0.9.
@node StarPU
@subsubsection StarPU
@uref{http://runtime.bordeaux.inria.fr/StarPU/, StarPU} is a task programming
library for hybrid architectures.
StarPU handles run-time concerns such as:
@itemize @bullet
@item Task dependencies
@item Optimized heterogeneous scheduling
@item Optimized data transfers and replication between main memory and discrete
memories
@item Optimized cluster communications
@end itemize
StarPU can be used to benefit from GPUs and distributed-memory environment.
One of QUARK or StarPU runtime system has to be enabled in order to schedule
tasks on the architecture.
If StarPU is enabled then QUARK is disabled and conversely.
Note StarPU is enabled by default.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
StarPU-1.1 and 1.2 releases.
@node hwloc
@subsubsection hwloc
@uref{http://www.open-mpi.org/projects/hwloc/, hwloc} (Portable Hardware
Locality) is a software package for accessing the topology of a multicore
system including components like: cores, sockets, caches and NUMA nodes.
@c The topology discovery library, @code{hwloc}, is not mandatory to use StarPU
@c but strongly recommended.
It allows to increase performance, and to perform some topology aware
scheduling.
@code{hwloc} is available in major distributions and for most OSes and can be
downloaded from @uref{http://www.open-mpi.org/software/hwloc}.
@strong{Caution about the compatibility:} hwloc should be compatible with the
version of StarPU used.
@node pthread
@subsubsection pthread
POSIX threads library is required to run CHAMELEON on Unix-like systems.
It is a standard component of any such system.
@comment Windows threads are used on Microsoft Windows systems.
@node Optional dependencies
@subsection Optional dependencies
@menu
* OpenMPI::
* Nvidia CUDA Toolkit::
* FxT::
@end menu
@node OpenMPI
@subsubsection OpenMPI
@uref{http://www.open-mpi.org/, OpenMPI} is an open source Message Passing
Interface implementation for execution on multiple nodes with
distributed-memory environment.
MPI can be enabled only if the runtime system chosen is StarPU (default).
To use MPI through StarPU, it is necessary to compile StarPU with MPI
enabled.
@strong{Caution about the compatibility:} OpenMPI should be built with the
--enable-mpi-thread-multiple option.
@node Nvidia CUDA Toolkit
@subsubsection Nvidia CUDA Toolkit
@uref{https://developer.nvidia.com/cuda-toolkit, Nvidia CUDA Toolkit} provides
a
comprehensive development environment for C and C++ developers building
GPU-accelerated applications.
CHAMELEON can use a set of low level optimized kernels coming from cuBLAS to
accelerate computations on GPUs.
The @uref{http://docs.nvidia.com/cuda/cublas/, cuBLAS} library is an
implementation of BLAS (Basic Linear Algebra Subprograms) on top of the Nvidia
CUDA runtime.
cuBLAS is normaly distributed with Nvidia CUDA Toolkit.
CUDA/cuBLAS can be enabled in CHAMELEON only if the runtime system chosen
is StarPU (default).
To use CUDA through StarPU, it is necessary to compile StarPU with CUDA
enabled.
@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with
CUDA releases from versions 4 to 7.5.
Your compiler must be compatible with CUDA.
@node FxT
@subsubsection FxT
@uref{http://download.savannah.gnu.org/releases/fkt/, FxT} stands for both
FKT (Fast Kernel Tracing) and FUT (Fast User Tracing).
This library provides efficient support for recording traces.
CHAMELEON can trace kernels execution on the different workers and produce
.paje files if FxT is enabled.
FxT can only be used through StarPU and StarPU must be compiled with FxT
enabled, see how to use this feature here @ref{Use FxT profiling through