diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a0a0318583a6d8bbcd2f37571c5611d402ce045e..6cd75ba0e3aba25be8cc0587e1331b1188da755d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,7 +19,7 @@ starpu-simgrid: - cd build - . $HOME/spack/share/spack/setup-env.sh - spack load hwloc && spack load fxt && spack load simgrid && spack load starpu - - cmake .. -DCHAMELEON_SIMULATION=ON -DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MAGMA=OFF -DCHAMELEON_USE_MPI=OFF + - cmake .. -DCHAMELEON_SIMULATION=ON -DCHAMELEON_USE_CUDA=ON -DCHAMELEON_USE_MPI=OFF - make -j2 - ctest -V diff --git a/CMakeLists.txt b/CMakeLists.txt index 0802349b53656984d5be450c6cfbb1da1665df58..6105696a81e83d57a93184bbf344f782ec3045c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,14 +190,6 @@ if (CHAMELEON_ENABLE_CUDA AND NOT CHAMELEON_USE_CUDA) message("-- ${BoldGreen}CHAMELEON_USE_CUDA is set to OFF, turn it ON to use CUDA (unsupported by Quark)${ColourReset}") endif() -# Enable MAGMA advanced kernels if CUDA is enabled -cmake_dependent_option(CHAMELEON_USE_MAGMA - "Enable MAGMA Cuda kernels" OFF - "CHAMELEON_USE_CUDA" OFF) -if (CHAMELEON_USE_CUDA AND NOT CHAMELEON_USE_MAGMA) - message("-- ${BoldGreen}CHAMELEON_USE_MAGMA is set to OFF, turn it ON to use MAGMA (only with StarPU)${ColourReset}") -endif() - # Enable FXT if StarPU option(CHAMELEON_ENABLE_TRACING "Enable tracing support" OFF) if (NOT CHAMELEON_ENABLE_TRACING) @@ -469,43 +461,6 @@ if(NOT CHAMELEON_SIMULATION) #endif() endif (CUDA_FOUND) - # CHAMELEON depends on MAGMA gpu kernels - # call our cmake module to test (in cmake_modules) - # change this call position if not appropriated - #---------------------------------------------- - if ( CUDA_FOUND AND CHAMELEON_USE_MAGMA ) - set(CHAMELEON_MAGMA_VERSION "1.4" CACHE STRING "oldest MAGMA version desired") - find_package(MAGMA ${CHAMELEON_MAGMA_VERSION}) - if ( MAGMA_FOUND ) - message("-- ${Blue}Add definition CHAMELEON_USE_MAGMA" - " - Use GPU kernels from MAGMA${ColourReset}") - set(CHAMELEON_USE_MAGMA 1) - if(MAGMA_INCLUDE_DIRS) - include_directories(${MAGMA_INCLUDE_DIRS}) - endif() - if(MAGMA_LIBRARY_DIRS) - # the RPATH to be used when installing - list(APPEND CMAKE_INSTALL_RPATH "${MAGMA_LIBRARY_DIRS}") - endif() - if (MAGMA_LIBRARIES) - list(INSERT EXTRA_LIBRARIES_CUDA 0 ${MAGMA_LIBRARIES}) - endif() - else( MAGMA_FOUND ) - if(MORSE_VERBOSE_FIND_PACKAGE) - if (NOT MAGMA_magma.h_DIRS) - Print_Find_Header_Status(magma magma.h) - endif () - if (NOT MAGMA_magma_LIBRARY) - Print_Find_Library_Status(magma libmagma) - endif () - else() - message(WARNING "MAGMA library has not been found and MORSE_VERBOSE_FIND_PACKAGE is set to OFF." - " Try to activate MORSE_VERBOSE_FIND_PACKAGE option (-DMORSE_VERBOSE_FIND_PACKAGE=ON) to get some hints for the detection") - endif() - message(FATAL_ERROR "MAGMA library is required but has not been found") - endif( MAGMA_FOUND ) - endif() - list(APPEND EXTRA_LIBRARIES ${EXTRA_LIBRARIES_CUDA}) endif(CHAMELEON_USE_CUDA) @@ -559,29 +514,17 @@ else (NOT CHAMELEON_SIMULATION) # Guard against mis-used simulation mode if(NOT DEFINED CHAMELEON_USE_CUDA) message(FATAL_ERROR "${BoldBlue}" - "In simulation mode CHAMELEON_USE_CUDA and CHAMELEON_USE_MAGMA should be set to" + "In simulation mode CHAMELEON_USE_CUDA should be set to" " ensure that the user is aware of the version to be used. If Chameleon's" " kernels are available for NVIDIA CUDA GPUs and if the according" " perfmodels are available in STARPU_HOME then use CHAMELEON_USE_CUDA=ON" - " else set CHAMELEON_USE_CUDA=OFF. The same idea is applicable with MAGMA." + " else set CHAMELEON_USE_CUDA=OFF." "${ColourReset}") endif() - if(NOT DEFINED CHAMELEON_USE_MAGMA) - message(WARNING "${BoldBlue}" - "In simulation mode CHAMELEON_USE_CUDA and CHAMELEON_USE_MAGMA should be set to" - " ensure that the user is aware of the version to be used. If Chameleon's" - " MAGMA kernels are available for NVIDIA CUDA GPUs and if the according" - " perfmodels are available in STARPU_HOME then use CHAMELEON_USE_MAGMA=ON" - " else set CHAMELEON_USE_MAGMA=OFF.${ColourReset}") - endif() # Add CUDA definition if required if (CHAMELEON_USE_CUDA) set(CHAMELEON_USE_CUDA 1) - # Add MAGMA definition if required - if (CHAMELEON_USE_MAGMA) - set(CHAMELEON_USE_MAGMA 1) - endif() endif() if (NOT CHAMELEON_SCHED_STARPU) @@ -1034,9 +977,6 @@ if(NOT CHAMELEON_SIMULATION) if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() endif() # Save extra dependencies (all required links) diff --git a/CTestConfig.cmake b/CTestConfig.cmake index bb95c68b23d5341d3da52ffd92abe579ae1d5789..ddb6f3e0c1e6d7050496a35081ad4958f1b9c587 100644 --- a/CTestConfig.cmake +++ b/CTestConfig.cmake @@ -4,18 +4,19 @@ ## # The following are required to uses Dart and the Cdash dashboard ## ENABLE_TESTING() ## INCLUDE(CTest) -set(CTEST_PROJECT_NAME "Morse-Magma") +set(CTEST_PROJECT_NAME "Chameleon") set(CTEST_NIGHTLY_START_TIME "00:00:00 GMT") set(CTEST_DROP_METHOD "http") set(CTEST_DROP_SITE "cdash.inria.fr") +# Shouldn't we change that to Chameleon ? set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Morse-Magma") set(CTEST_DROP_SITE_CDASH TRUE) #-------------------------------------------------------------------- # BUILDNAME variable construction -# This variable will be used to set the build name which will appear -# on the Morse-Magma dashboard http://cdash.inria.fr/CDash/ +# This variable will be used to set the build name which will appear +# on the Chameleon dashboard http://cdash.inria.fr/CDash/ #-------------------------------------------------------------------- # Start with the short system name, e.g. "Linux", "FreeBSD" or "Windows" if(NOT BUILDNAME) @@ -38,7 +39,7 @@ if(NOT BUILDNAME) set(BUILDNAME "${BUILDNAME}-${CMAKE_BUILD_TYPE}") endif(CMAKE_BUILD_TYPE) - # Specific options of Magma-Morse + # Specific options of Chameleon if(CHAMELEON_SCHED_QUARK) set(BUILDNAME "${BUILDNAME}-Quark") endif(CHAMELEON_SCHED_QUARK) diff --git a/ChangeLog b/ChangeLog index dcdeab28521c5860a56baf28363ff78ff7591ccd..f58b24a5b830349053cc0885cc8797b74372c21c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ -chameleon-??? +chameleon-1.0.0 ------------------------------------------------------------------------ +- MAGMA kernels are no longer supported in Chameleon - Add SVD/EVD drivers based on parallel first stage, and sequential LAPACK second stage and solve - Add First stage algorithm fo r the SVD/EVD solvers - add timing drivers time_zpotrs_tile and time_zgeqrs_tile diff --git a/INSTALL.txt b/INSTALL.txt index 9470afb3ccb637bdbd7f873e39d610116cf23c5f..a35db9bbde9b180c45d907190711ad613288bfb8 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -5,18 +5,18 @@ This is a brief discussion about CHAMELEON usage. For more information, please read the document users_guide. -Compilation of CHAMELEON libraries and executables are done with CMake +Compilation of CHAMELEON libraries and executables are done with CMake (http://www.cmake.org/). This version has been tested with CMake 2.8.8. -Usage: three steps are required to compile and install CHAMELEON +Usage: three steps are required to compile and install CHAMELEON -1) configure : -> cmake path/to/chameleon -DOPTION1= -DOPTION2= ... +1) configure : +> cmake path/to/chameleon -DOPTION1= -DOPTION2= ... see the "Options" section to get list of options see the "Dependencies detection" for details about libraries detection -2) build : -> make +2) build : +> make do not hesitate to use -j[ncores] option to speedup the compilation 3) install (optional) : @@ -37,7 +37,7 @@ lapack : netlib, openblas, eigen or intel mkl lapacke : netlib, openblas or intel mkl tmg : netlib, openblas or intel mkl -runtime : quark (http://icl.cs.utk.edu/quark/) or +runtime : quark (http://icl.cs.utk.edu/quark/) or starpu (http://runtime.bordeaux.inria.fr/StarPU/) hwloc : (http://www.open-mpi.org/projects/hwloc/) @@ -46,7 +46,6 @@ optional libraries cuda : (https://developer.nvidia.com/cuda-downloads) cublas : comes with cuda (http://docs.nvidia.com/cuda/cublas/) -magma : (http://icl.cs.utk.edu/magma/) (version 1.6.2 recommended) mpi : openmpi (http://www.open-mpi.org/) FxT : linux package (libfxt) or releases here http://download.savannah.gnu.org/releases/fkt/ @@ -59,19 +58,19 @@ Please look at the distrib/ directory which gives some hints for the installatio * Dependencies detection * ------------------------------------------- -You have different choices to detect dependencies on your system, either by -setting some environment variables containing paths to the libs and headers or +You have different choices to detect dependencies on your system, either by +setting some environment variables containing paths to the libs and headers or by specifying them directly at cmake configure. Different cases : -1) detection of dependencies through environment variables: +1) detection of dependencies through environment variables: - LD_LIBRARY_PATH should contain the list of paths where to find the libraries: * export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:install/path/to/your/lib - INCLUDE should contain the list of paths where to find the header files of libraries * export INCLUDE=$INCLUDE:install/path/to/your/headers 2) detection with user's given paths: - - you can specify the path at cmake configure by invoking - cmake path/to/your/CMakeLists.txt -DLIB_DIR=path/to/your/lib + - you can specify the path at cmake configure by invoking + cmake path/to/your/CMakeLists.txt -DLIB_DIR=path/to/your/lib where LIB stands for the name of the lib to look for * example: cmake path/to/your/CMakeLists.txt -DSTARPU_DIR=path/to/starpudir \ -DCBLAS_DIR= ... @@ -86,16 +85,15 @@ by specifying them directly at cmake configure. Different cases : * Options * ------------------------------------------- -You can optionally activate some options at cmake configure (like CUDA, MPI, ...) +You can optionally activate some options at cmake configure (like CUDA, MPI, ...) invoking cmake path/to/your/CMakeLists.txt -DOPTION1= -DOPTION2= ... example: cmake /home/toto/chameleon/ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=/home/toto/install/ \ - -DCHAMELEON_USE_CUDA=ON \ - -DCHAMELEON_USE_MAGMA=ON \ + -DCHAMELEON_USE_CUDA=ON \ -DCHAMELEON_USE_MPI=ON \ - -DBLA_VENDOR=Intel10_64lp \ + -DBLA_VENDOR=Intel10_64lp \ -DSTARPU_DIR=/home/toto/install/starpu-1.1/build/include/starpu/1.1/ \ - -DCHAMELEON_ENABLE_TRACING=ON + -DCHAMELEON_ENABLE_TRACING=ON You can get the full list of options with -L[A][H] options of cmake command example: cmake -LH /home/toto/chameleon/ @@ -109,7 +107,7 @@ Some options (non-exhaustive list) : Basic CMake: ------------ CMAKE_BUILD_TYPE=Debug|Release -CMAKE_INSTALL_PREFIX=path/to/your/install/dir (where headers and libraries will be copied +CMAKE_INSTALL_PREFIX=path/to/your/install/dir (where headers and libraries will be copied when invoking make install) Related to specific modules (find_package): @@ -120,44 +118,43 @@ STARPU_DIR=... STARPU_INCDIR=... STARPU_LIBDIR=... # same idea can be used for some packages, replace STARPU by one of these: -BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - MAGMA - QUARK - TMG +BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - QUARK - TMG CHAMELEON specific: -------------------- CHAMELEON_USE_MPI=ON|OFF (default OFF) -CHAMELEON_USE_CUDA=ON|OFF (default OFF) -CHAMELEON_USE_MAGMA=ON|OFF (default OFF) +CHAMELEON_USE_CUDA=ON|OFF (default OFF) CHAMELEON_ENABLE_TRACING=ON|OFF (default OFF) CHAMELEON_SCHED_STARPU=ON|OFF (default ON) CHAMELEON_SCHED_QUARK=ON|OFF (default OFF) CHAMELEON_SIMULATION=ON|OFF (default OFF) Libraries detected with an official cmake module (see module files in CMAKE_ROOT/Modules/): -CUDA - MPI - Threads +CUDA - MPI - Threads Libraries detected with our cmake modules (see module files in cmake_modules/morse/find/ directory of CHAMELEON sources): -BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - MAGMA - QUARK - STARPU - TMG +BLAS - CBLAS - FXT - HWLOC - LAPACK - LAPACKE - QUARK - STARPU - TMG ------------------------------------------- * Use FxT profiling through StarPU * ------------------------------------------- -StarPU can generate its own trace log files by compiling it with the --with-fxt -option at the configure step (you can have to specify the directory where you -installed FxT by giving --with-fxt=... instead of --with-fxt alone). -By doing so, traces are generated after each execution of a program which uses +StarPU can generate its own trace log files by compiling it with the --with-fxt +option at the configure step (you can have to specify the directory where you +installed FxT by giving --with-fxt=... instead of --with-fxt alone). +By doing so, traces are generated after each execution of a program which uses StarPU in the directory pointed by the STARPU_FXT_PREFIX environment variable. example: export STARPU_FXT_PREFIX=/home/toto/fxt_files/ -When executing a ./timing/... CHAMELEON program, if it has been enabled -(StarPU compiled with FxT and -DCHAMELEON_ENABLE_TRACING=ON), you can give the option --trace +When executing a ./timing/... CHAMELEON program, if it has been enabled +(StarPU compiled with FxT and -DCHAMELEON_ENABLE_TRACING=ON), you can give the option --trace to tell the program to generate trace log files. -Finally, to generate the trace file which can be opened with Vite program -(http://vite.gforge.inria.fr/), you have to use the starpu_fxt_tool tool of StarPU. -This tool should be in $STARPU_INSTALL_REPOSITORY/bin. -You can use it to generate the trace file like this: +Finally, to generate the trace file which can be opened with Vite program +(http://vite.gforge.inria.fr/), you have to use the starpu_fxt_tool tool of StarPU. +This tool should be in $STARPU_INSTALL_REPOSITORY/bin. +You can use it to generate the trace file like this: > path/to/your/install/starpu/bin/starpu_fxt_tool -i prof_filename There is one file per mpi processus (prof_filename_0, prof_filename_1 ...). To generate a trace of mpi programs you can call it like this: diff --git a/cmake_modules/GenPkgConfig.cmake b/cmake_modules/GenPkgConfig.cmake index 16749d22039efaea07f120678cacf60a151b0dfe..44b431e1454cbe86285dce51eed25c7b950b3c0b 100644 --- a/cmake_modules/GenPkgConfig.cmake +++ b/cmake_modules/GenPkgConfig.cmake @@ -16,7 +16,7 @@ # Univ. of Tennessee, # King Abdullah Univesity of Science and Technology # Univ. of California Berkeley, -# Univ. of Colorado Denver. +# Univ. of Colorado Denver. # # @version 0.9.1 # @author Cedric Castagnede @@ -57,7 +57,7 @@ ENDMACRO(CONVERT_LIBSTYLE_TO_PKGCONFIG) ### # # CLEAN_LIB_LIST: clean libraries lists to follow the pkg-config style -# used in GENERATE_PKGCONFIG_FILE +# used in GENERATE_PKGCONFIG_FILE # ### MACRO(CLEAN_LIB_LIST _package) @@ -96,7 +96,7 @@ MACRO(GENERATE_PKGCONFIG_FILE) set(CHAMELEON_PKGCONFIG_REQUIRED "") set(COREBLAS_PKGCONFIG_REQUIRED "") set(CUDABLAS_PKGCONFIG_REQUIRED "") - + # A list of private packages required by this package but not exposed to # applications set(CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE "") @@ -127,12 +127,6 @@ MACRO(GENERATE_PKGCONFIG_FILE) list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "coreblas") if(CHAMELEON_USE_CUDA) - if(CHAMELEON_USE_MAGMA) - list(APPEND CUDABLAS_PKGCONFIG_REQUIRED_PRIVATE magma) - list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE magma) - else() - - endif() list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_LIBRARIES}) list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "cudablas") endif() @@ -142,7 +136,7 @@ MACRO(GENERATE_PKGCONFIG_FILE) if(CHAMELEON_USE_CUDA) list(APPEND CHAMELEON_PKGCONFIG_LIBS -lcudablas) endif() - list(APPEND CHAMELEON_PKGCONFIG_LIBS + list(APPEND CHAMELEON_PKGCONFIG_LIBS -lcoreblas ${EXTRA_LIBRARIES} ) @@ -158,7 +152,7 @@ MACRO(GENERATE_PKGCONFIG_FILE) if(CHAMELEON_USE_CUDA) CLEAN_LIB_LIST(CUDABLAS) endif() - + # Create .pc file # --------------- SET(_output_chameleon_file "${CMAKE_BINARY_DIR}/chameleon.pc") diff --git a/cmake_modules/PrintOpts.cmake b/cmake_modules/PrintOpts.cmake index 1e7c9208c91a7357422e152821c86ef20f8d6d2b..f21bce9af93d87884c32e816234189d28dbda16b 100644 --- a/cmake_modules/PrintOpts.cmake +++ b/cmake_modules/PrintOpts.cmake @@ -16,7 +16,7 @@ # Univ. of Tennessee, # King Abdullah Univesity of Science and Technology # Univ. of California Berkeley, -# Univ. of Colorado Denver. +# Univ. of Colorado Denver. # # @version 0.9.0 # @author Florent Pruvost @@ -53,7 +53,6 @@ set(dep_message "${dep_message}" " Kernels specific\n" " BLAS ................: ${BLAS_VENDOR_FOUND}\n" " LAPACK...............: ${LAPACK_VENDOR_FOUND}\n" -" MAGMA ...............: ${CHAMELEON_USE_MAGMA}\n" "\n" " Trace ...............: ${CHAMELEON_ENABLE_TRACING}\n" " Simulation mode .....: ${CHAMELEON_SIMULATION}\n" diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index 00aae15be5fd9d9662c9d4bf35453cafe9d66f81..0a974775b7192227b887dcba77515305083c1f13 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit 00aae15be5fd9d9662c9d4bf35453cafe9d66f81 +Subproject commit 0a974775b7192227b887dcba77515305083c1f13 diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c index 712ded8af103e0b98a4e4fcc26237b04f5f8c8f2..36cf19e1b672bb219fdbd2eb6f28f0e221ebeb28 100644 --- a/compute/pzgelqf.c +++ b/compute/pzgelqf.c @@ -86,23 +86,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * zgelqt = ib * A->nb + 3 * ib * ib + A->nb - * ztslqt = 3 * ib * A->nb + ib * ib + A->nb - */ - ws_host = chameleon_max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); - ws_host = chameleon_max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index b24b6db8919a3426e3471d61fd1e866583bd3711..57b1d613cd415ba91dee83c99dfc968d8d15bc58 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -87,23 +87,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * zgelqt = ib * A->nb + 3 * ib * ib + A->nb - * ztslqt = 3 * ib * A->nb + ib * ib + A->nb - */ - ws_host = chameleon_max( ws_host, ib * A->nb + 3 * ib * ib + A->nb ); - ws_host = chameleon_max( ws_host, 3 * ib * A->nb + ib * ib + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c index a54aa885338de18240b261d50f746cbcbf220b5b..6e061f0ed2fdf05f64649c31c677f55928f7dd51 100644 --- a/compute/pzgeqrf.c +++ b/compute/pzgeqrf.c @@ -81,23 +81,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * zgeqrt = ib * (A->mb+3*ib) + A->mb ) - * ztsqrt = 2 * ib * (A->nb+ib) + A->nb - */ - ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c index a5e828c8b41c0b16b20bb39a9cc40b3d90bb254e..c5b026358376ce0d2e06481bd166e760983aa90c 100644 --- a/compute/pzgeqrfrh.c +++ b/compute/pzgeqrfrh.c @@ -85,23 +85,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * zgeqrt = ib * (A->nb+3*ib) + A->nb ) - * ztsqrt = 2 * ib * (A->nb+ib) + A->nb - */ - ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c index f960873e6d6382cdb13b9594f1cebe459f7cad5e..5da79407cf9855db1d9c97ed607c53b6c0094516 100644 --- a/compute/pzgetrf_incpiv.c +++ b/compute/pzgetrf_incpiv.c @@ -64,13 +64,9 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, RUNTIME_options_init(&options, morse, sequence, request); ib = MORSE_IB; -#if defined(CHAMELEON_USE_MAGMA) - h_work_size = sizeof(MORSE_Complex64_t)*( 2*ib + 2*L->nb )*2*A->mb; - d_work_size = sizeof(MORSE_Complex64_t)*( ib )*2*A->mb; -#else h_work_size = sizeof(MORSE_Complex64_t)*( ib*L->nb ); d_work_size = 0; -#endif + RUNTIME_options_ws_alloc( &options, h_work_size, d_work_size ); /* necessary to avoid dependencies between tasks regarding the diag tile */ diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c index 497b574d4b812e9b7f36e86975da76e3d120b608..2173faa09def0a8a31440baf84f563dbce432f6f 100644 --- a/compute/pzhetrd_he2hb.c +++ b/compute/pzhetrd_he2hb.c @@ -85,23 +85,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * zgeqrt = ib * (A->mb+3*ib) + A->mb ) - * ztsqrt = 2 * ib * (A->nb+ib) + A->nb - */ - ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/compute/pzpotrf.c b/compute/pzpotrf.c index 44b7d25f58f0dd5d51a64082e7fa55223f013ca5..bc648c74cc5e60867dae15ac5c6d32594a358742 100644 --- a/compute/pzpotrf.c +++ b/compute/pzpotrf.c @@ -54,16 +54,6 @@ void morse_pzpotrf(MORSE_enum uplo, MORSE_desc_t *A, return; RUNTIME_options_init(&options, morse, sequence, request); -#ifdef CHAMELEON_USE_MAGMA - if (0) /* Disable the workspace as long as it is is not used (See StarPU codelet) */ - { - int nb = MORSE_IB; /* Approximate nb for simulation */ -#if !defined(CHAMELEON_SIMULATION) - nb = magma_get_zpotrf_nb(A->nb); -#endif - ws_host = sizeof(MORSE_Complex64_t)*nb*nb; - } -#endif RUNTIME_options_ws_alloc( &options, 0, ws_host ); /* diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c index d1b7323c1718a52d1a49eed776e6337e0b30b822..a9d779b3049f23beaf128bd0bea2fadeb74f016b 100644 --- a/compute/pzpotrimm.c +++ b/compute/pzpotrimm.c @@ -57,17 +57,6 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de return; RUNTIME_options_init(&options, morse, sequence, request); -#ifdef CHAMELEON_USE_MAGMA - { -#if !defined(CHAMELEON_SIMULATION) - int nb = magma_get_zpotrf_nb(A->nb); -#else - int nb = A->nb; -#endif - RUNTIME_options_ws_alloc( &options, nb*nb, 0 ); - } -#endif - /* * MorseLower */ @@ -489,8 +478,5 @@ void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_de } } -#ifdef CHAMELEON_USE_MAGMA - RUNTIME_options_ws_free(&options); -#endif RUNTIME_options_finalize(&options, morse); } diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c index 1807857a266542ca2a902ea6511df151fcee439c..d20512803bd4d269d2cb28a0e77800d26f9d9e0a 100644 --- a/compute/pztpqrt.c +++ b/compute/pztpqrt.c @@ -70,21 +70,6 @@ void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); #endif -#if defined(CHAMELEON_USE_MAGMA) - /* Worker space - * - * ztpqrt = max( A->nb * (ib+1), ib * (ib + A->nb) ) - */ - ws_worker = chameleon_max( ws_worker, ib * (ib + A->nb) ); - - /* Host space - * - * ztpqrt = 2 * ib * (A->nb+ib) + A->nb - */ - ws_host = chameleon_max( ws_host, ib * (A->mb + 3 * ib) + A->mb ); - ws_host = chameleon_max( ws_host, 2 * ib * (A->nb + ib) + A->nb ); -#endif - ws_worker *= sizeof(MORSE_Complex64_t); ws_host *= sizeof(MORSE_Complex64_t); diff --git a/control/common.h b/control/common.h index ab1a98867bfed1be80fd09e0db14969773b93e6b..194359d3b1658da48915651773335cea23f67e79 100644 --- a/control/common.h +++ b/control/common.h @@ -69,17 +69,9 @@ #include <mpi.h> #endif - -/** **************************************************************************** - * Linear Algebra headers - **/ -#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION) -#include <magma.h> -#endif - /** **************************************************************************** - * Line to avoid conflict with magma, because, we don't know why - * but lapacke provide a wrong interface of lapack in fortran + * Line to avoid conflict with other linear algebra libraries, because, we + * don't know why but lapacke provide a wrong interface of lapack in fortran **/ #ifndef LAPACK_NAME #define LAPACK_NAME(a, b) lapackef77_##a diff --git a/control/config.h.in b/control/config.h.in index 6691c4c16293cf8a3c8aec769a1496d16d1e6973..48c3ce5aa77dae52d2e5f4b7b39ea116def31699 100644 --- a/control/config.h.in +++ b/control/config.h.in @@ -42,7 +42,4 @@ #cmakedefine HAVE_STARPU_MPI_COMM_RANK #cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE -/* MAGMA functions */ -#cmakedefine HAVE_MAGMA_GETRF_INCPIV_GPU - #endif /* CONFIG_H_HAS_BEEN_INCLUDED */ diff --git a/control/control.c b/control/control.c index 9fa06b7811bdbd3bcff345ff6df39c664dfb2da2..7bb54781d91930dd1b50bb3a1ba6915893b79f73 100644 --- a/control/control.c +++ b/control/control.c @@ -110,9 +110,6 @@ int MORSE_InitPar(int ncpus, int ncudas, int nthreads_per_worker) } } # endif -#endif -#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION) - magma_init(); #endif RUNTIME_init_scheduler( morse, ncpus, ncudas, nthreads_per_worker ); return MORSE_SUCCESS; @@ -142,9 +139,6 @@ int MORSE_Finalize(void) RUNTIME_barrier(morse); # endif RUNTIME_finalize_scheduler( morse ); -#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION) - magma_finalize(); -#endif #if defined(CHAMELEON_USE_MPI) if (!morse->mpi_outer_init) diff --git a/control/workspace.c b/control/workspace.c index 097a26711706660712005db66c7e858a2af827e9..efcb49f8c9f5be51ed3f9cbf6cdc3bcaff815b69 100644 --- a/control/workspace.c +++ b/control/workspace.c @@ -133,10 +133,6 @@ int morse_alloc_ipiv(int M, int N, MORSE_enum func, int type, MORSE_desc_t **des NB = MORSE_NB; IB = MORSE_IB; -#if defined(CHAMELEON_USE_MAGMA) -/* IB *= 2; */ -#endif - NT = (N%NB==0) ? (N/NB) : ((N/NB)+1); MT = (M%NB==0) ? (M/NB) : ((M/NB)+1); diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt index 20e012939889e6f16ea762c5f432c39c3e61f675..5ab3c65c497794515305cb1a7157fb585a55c7db 100644 --- a/cudablas/compute/CMakeLists.txt +++ b/cudablas/compute/CMakeLists.txt @@ -55,22 +55,23 @@ if( CHAMELEON_USE_CUBLAS_V2 ) ) endif( CHAMELEON_USE_CUBLAS_V2 ) -if( CHAMELEON_USE_MAGMA ) - set(ZSRC - ${ZSRC} - cuda_zgelqt.c - cuda_zgeqrt.c - cuda_zgessm.c - cuda_zgetrf.c - cuda_zlauum.c - cuda_zpotrf.c - cuda_zssssm.c - cuda_ztrtri.c - cuda_ztslqt.c - cuda_ztsqrt.c - cuda_ztstrf.c - ) -endif() +# Former MAGMA files that are no longer supported +# if( CHAMELEON_USE_MAGMA ) +# set(ZSRC +# ${ZSRC} +# cuda_zgelqt.c +# cuda_zgeqrt.c +# cuda_zgessm.c +# cuda_zgetrf.c +# cuda_zlauum.c +# cuda_zpotrf.c +# cuda_zssssm.c +# cuda_ztrtri.c +# cuda_ztslqt.c +# cuda_ztsqrt.c +# cuda_ztstrf.c +# ) +# endif() precisions_rules_py( CUDABLAS_SRCS_GENERATED "${ZSRC}" @@ -94,9 +95,6 @@ add_dependencies(cudablas cudablas_include) set_property(TARGET cudablas PROPERTY LINKER_LANGUAGE Fortran) target_link_libraries(cudablas coreblas ${CUDA_LIBRARIES}) -if(CHAMELEON_USE_MAGMA) - target_link_libraries(cudablas ${MAGMA_LIBRARIES}) -endif(CHAMELEON_USE_MAGMA) # installation # ------------ diff --git a/cudablas/include/cudablas.h b/cudablas/include/cudablas.h index b181fc8332a36c1024f80c97d175e29f4ad1e0ae..6732a0b5b9301f06ba422b32c7d855178693ac2d 100644 --- a/cudablas/include/cudablas.h +++ b/cudablas/include/cudablas.h @@ -65,10 +65,6 @@ #endif /* defined(CHAMELEON_USE_CUBLAS_V2) */ -#if defined(CHAMELEON_USE_MAGMA) -#include <magma.h> -#endif - /** **************************************************************************** * MORSE types and constants **/ diff --git a/cudablas/include/cudablas_z.h b/cudablas/include/cudablas_z.h index 7d9ccbbf7228ff7f8ba48929e345ee1bf74469d5..0413fd5417dc3a06962c9c7de968413277304705 100644 --- a/cudablas/include/cudablas_z.h +++ b/cudablas/include/cudablas_z.h @@ -56,21 +56,6 @@ int CUDA_zttmqr( MORSE_enum side, MORSE_enum trans, int M1, int N1, int M2, int int CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, int M, int N, int K, int IB, const cuDoubleComplex *A, int LDA, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM ); int CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, int M, int N, int K, int IB, const cuDoubleComplex *A, int LDA, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM ); -#if defined(CHAMELEON_USE_MAGMA) -int CUDA_zgelqt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *v, magma_int_t ldv, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM ); -int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *v, magma_int_t ldv, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM ); -int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info ); -int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info ); -int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info ); -int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info ); -int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info ); -int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info ); -int CUDA_ztrtri( magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info ); -int CUDA_ztslqt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da1, magma_int_t ldda1, magmaDoubleComplex *da2, magma_int_t ldda2, magmaDoubleComplex *a2, magma_int_t lda2, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM ); -int CUDA_ztsqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da1, magma_int_t ldda1, magmaDoubleComplex *da2, magma_int_t ldda2, magmaDoubleComplex *a2, magma_int_t lda2, magmaDoubleComplex *dt, magma_int_t lddt, magmaDoubleComplex *t, magma_int_t ldt, magmaDoubleComplex *dd, magmaDoubleComplex *d, magma_int_t ldd, magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, CUBLAS_STREAM_PARAM ); -int CUDA_ztstrf( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, cuDoubleComplex *hU, magma_int_t ldhu, cuDoubleComplex *dU, magma_int_t lddu, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *hwork, magma_int_t ldhwork, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info ); -#endif - #ifdef __cplusplus } #endif diff --git a/docs/texinfo/chapters/configuration.texi b/docs/texinfo/chapters/configuration.texi index 883e82f38e672448996adf010735b3739dc440eb..92aec0af675cd1e3faf2736986293c155fc56d11 100644 --- a/docs/texinfo/chapters/configuration.texi +++ b/docs/texinfo/chapters/configuration.texi @@ -57,7 +57,6 @@ Example of configuration using the command line cmake ~/chameleon/ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=~/install \ -DCHAMELEON_USE_CUDA=ON \ - -DCHAMELEON_USE_MAGMA=ON \ -DCHAMELEON_USE_MPI=ON \ -DBLA_VENDOR=Intel10_64lp \ -DSTARPU_DIR=~/install/starpu-1.1 \ @@ -120,9 +119,6 @@ to link with QUARK library (runtime system) to link with CUDA runtime (implementation paradigm for accelerated codes on GPUs) and cuBLAS library (optimized BLAS kernels on GPUs), can only be used with StarPU -@item @option{-DCHAMELEON_USE_MAGMA}=@option{trigger} (default: @code{OFF}) -to link with MAGMA library (kernels on GPUs, higher level than cuBLAS), can only -be used with StarPU @item @option{-DCHAMELEON_USE_MPI}=@option{trigger} (default: @code{OFF}) to link with MPI library (message passing implementation for use of multiple @@ -200,7 +196,7 @@ directory of the LIBNAME library headers installation directory of the LIBNAME libraries (.so, .a, .dylib, etc) installation @end table LIBNAME can be one of the following: BLAS - CBLAS - FXT - HWLOC - -LAPACK - LAPACKE - MAGMA - QUARK - STARPU - TMG. +LAPACK - LAPACKE - QUARK - STARPU - TMG. See paragraph about @ref{Dependencies detection} for details. Libraries detected with an official CMake module (see module files in @@ -220,7 +216,6 @@ Libraries detected with CHAMELEON cmake modules (see module files in @item HWLOC @item LAPACK @item LAPACKE -@item MAGMA @item QUARK @item STARPU @item TMG @@ -343,9 +338,6 @@ precision) on mirage machine are available for now. Database of models is subject to change, it should be enrich in a near future. @end itemize -One can additionally decide to enable the magma kernels by setting the cmake -option @option{-DCHAMELEON_SIMULATION_MAGMA=ON} . - @node Use out of core support with StarPU @section Use out of core support with StarPU diff --git a/docs/texinfo/chapters/installing.texi b/docs/texinfo/chapters/installing.texi index fec50baab727989d058965e42c973823436c69e8..edde68e8d3ab181a9e3f737f15c3def389a37b10 100644 --- a/docs/texinfo/chapters/installing.texi +++ b/docs/texinfo/chapters/installing.texi @@ -194,7 +194,6 @@ It is a standard component of any such system. @menu * OpenMPI:: * Nvidia CUDA Toolkit:: -* MAGMA:: * FxT:: @end menu @@ -231,27 +230,7 @@ enabled. @strong{Caution about the compatibility:} CHAMELEON has been mainly tested with CUDA releases from versions 4 to 7.5. -Your compiler and MAGMA library must be compatible with CUDA. - -@node MAGMA -@subsubsection MAGMA - -@uref{http://icl.cs.utk.edu/magma/, MAGMA} project aims to develop a dense -linear algebra library similar to LAPACK but for heterogeneous/hybrid -architectures, starting with current "Multicore+GPU" systems. -CHAMELEON can use a set of high level MAGMA routines to accelerate -computations on GPUs. -To fully benefit from GPUs, the user should enable MAGMA in addition to -CUDA/cuBLAS. - -@strong{Caution about the compatibility:} CHAMELEON has been mainly tested with -MAGMA releases from versions 1.4 to 1.6. -MAGMA library must be compatible with CUDA. -MAGMA library should be built with sequential versions of BLAS/LAPACK. -We should not get some MAGMA link flags embarking multithreaded -BLAS/LAPACK because it could affect performances (take care about the -MAGMA link flag @option{-lmkl_intel_thread} for example that we could inheritate -from the pkg-config file @file{magma.pc}). +Your compiler must be compatible with CUDA. @node FxT @subsubsection FxT diff --git a/docs/texinfo/chapters/introduction.texi b/docs/texinfo/chapters/introduction.texi index 695d252cdedd97883d5f183c3bf74d9a57e92aa8..b94921f37cc78db466d9e7e40806d004f5b9e7c9 100644 --- a/docs/texinfo/chapters/introduction.texi +++ b/docs/texinfo/chapters/introduction.texi @@ -181,11 +181,11 @@ task-based algorithms behave regarding different runtime systems implementations. Using CHAMELEON with @uref{http://runtime.bordeaux.inria.fr/StarPU/, StarPU} runtime system allows to exploit GPUs through -kernels provided by @uref{https://developer.nvidia.com/cublas, cuBLAS} and -@uref{http://icl.cs.utk.edu/magma/, MAGMA} and clusters of interconnected -nodes with distributed memory (using @uref{http://www.open-mpi.org/, MPI}). -Computation of very large systems with dense matrices on a cluster of nodes is -still being experimented and stabilized. +kernels provided by @uref{https://developer.nvidia.com/cublas, cuBLAS} +and clusters of interconnected nodes with distributed memory (using +@uref{http://www.open-mpi.org/, MPI}). Computation of very large +systems with dense matrices on a cluster of nodes is still being +experimented and stabilized. It is not expected to get stable performances with the current version using MPI. diff --git a/example/basic_zposv/CMakeLists.txt b/example/basic_zposv/CMakeLists.txt index 89208ee18e6d85b2c7602f8c8b796905f0dcc479..555e7f72e27859f786328dc2a1f81cf030b5c11d 100644 --- a/example/basic_zposv/CMakeLists.txt +++ b/example/basic_zposv/CMakeLists.txt @@ -68,16 +68,13 @@ endif() if(NOT CHAMELEON_SIMULATION) - if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + if(CHAMELEON_USE_CUDA) list(APPEND libs_for_examples cudablas) endif() if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() list(APPEND libs_for_examples coreblas diff --git a/example/lapack_to_morse/CMakeLists.txt b/example/lapack_to_morse/CMakeLists.txt index 00ad921835813900f02c4240234d6afb25da6117..c627c5eea1e59ef8294fa973fe146b7a2cbc9669 100644 --- a/example/lapack_to_morse/CMakeLists.txt +++ b/example/lapack_to_morse/CMakeLists.txt @@ -25,7 +25,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) # compilation for other sources step1 and > -set(LTM_SOURCES +set(LTM_SOURCES step1.c step2.c step3.c @@ -63,22 +63,19 @@ elseif(CHAMELEON_SCHED_QUARK) endif() -# specific compilation for step0 because we potentially want to use +# specific compilation for step0 because we potentially want to use # multithreaded BLAS and LAPACK libraries for this step unset(libs_for_step0) if(NOT CHAMELEON_SIMULATION) - if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + if(CHAMELEON_USE_CUDA) list(APPEND libs_for_ltm cudablas) endif() if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() list(APPEND libs_for_step0 ${libs_for_ltm}) @@ -104,7 +101,7 @@ if(NOT CHAMELEON_SIMULATION) list(APPEND libs_for_step0 coreblas ${LAPACKE_LIBRARIES} - ${CBLAS_LIBRARIES} + ${CBLAS_LIBRARIES} ${LAPACK_PAR_LIBRARIES} ${BLAS_PAR_LIBRARIES} ${HWLOC_LIBRARIES} diff --git a/example/lapack_to_morse/step1.c b/example/lapack_to_morse/step1.c index cb87ad8ea0e98b7e0e25c6618e3eb09609ddc9d0..7af0347280792a7226ffe2de9ad2774112123e74 100644 --- a/example/lapack_to_morse/step1.c +++ b/example/lapack_to_morse/step1.c @@ -40,7 +40,7 @@ * MORSE_Set to give some specific parameters. * This code allows you to expoit parallelism coming from all the cores of your * computer and from gpus if you have properly linked with pthread and CUDA - * ( + CUBLAS and MAGMA optionnaly ). + * ( + CUBLAS optionnaly ). * The precision is: double */ int main(int argc, char *argv[]) { diff --git a/example/link_chameleon/CMakeLists.txt b/example/link_chameleon/CMakeLists.txt index fbaab584516a9544f10bde89bc0d9ab88f64b960..e13eab1a9e4a85ad5aa099fd8eb13809fc8943cb 100644 --- a/example/link_chameleon/CMakeLists.txt +++ b/example/link_chameleon/CMakeLists.txt @@ -27,7 +27,7 @@ if (MORSE_DISTRIB_DIR) if (MORSE_CHAMELEON_USE_QUARK) find_package(CHAMELEON COMPONENTS QUARK) else() - find_package(CHAMELEON COMPONENTS STARPU MPI CUDA MAGMA FXT) + find_package(CHAMELEON COMPONENTS STARPU MPI CUDA FXT) endif() if (CHAMELEON_FOUND) link_directories(${CHAMELEON_LIBRARY_DIRS_DEP}) diff --git a/example/link_chameleon/link_chameleon.c b/example/link_chameleon/link_chameleon.c index b38b12d501abf720d6073a284947c3bec23da801..b63b45e327f14fa0cc3ffdc8adc5dee8e135e954 100644 --- a/example/link_chameleon/link_chameleon.c +++ b/example/link_chameleon/link_chameleon.c @@ -164,7 +164,7 @@ static void read_args(int argc, char *argv[], int *iparam){ * Print a header message to summarize main parameters */ static void print_header(char *prog_name, int * iparam) { -#if defined(MAGMAMORSE_SIMULATION) +#if defined(CHAMELEON_SIMULATION) double eps = 0.; #else double eps = LAPACKE_dlamch_work( 'e' ); @@ -208,7 +208,7 @@ static void print_header(char *prog_name, int * iparam) { } /* - * test external application link with magmamorse + * test external application link with chameleon */ int main(int argc, char *argv[]) { diff --git a/example/out_of_core/CMakeLists.txt b/example/out_of_core/CMakeLists.txt index 14aec88478b51dd70c1d072e57b6e7fa2a3cd76c..01bb2704125c0a655626d31dde07a5d7e66edffe 100644 --- a/example/out_of_core/CMakeLists.txt +++ b/example/out_of_core/CMakeLists.txt @@ -42,16 +42,13 @@ link_directories(${STARPU_LIBRARY_DIRS}) if(NOT CHAMELEON_SIMULATION) - if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + if(CHAMELEON_USE_CUDA) list(APPEND libs_for_ooc cudablas) endif() if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() list(APPEND libs_for_ooc coreblas diff --git a/include/chameleon_config.h.in b/include/chameleon_config.h.in index 065558af0fff44f708f4534f535ccce782506d38..83350ba3a9532ab721de01357d3bddb647e13abc 100644 --- a/include/chameleon_config.h.in +++ b/include/chameleon_config.h.in @@ -41,7 +41,6 @@ #cmakedefine CHAMELEON_USE_CUDA #cmakedefine CHAMELEON_USE_CUBLAS #cmakedefine CHAMELEON_USE_CUBLAS_V2 -#cmakedefine CHAMELEON_USE_MAGMA /* Simulating */ #cmakedefine CHAMELEON_SIMULATION diff --git a/include/morse_fortran.h b/include/morse_fortran.h index 5cf58ae11509daf183e860396b822ed8b6eb2217..813a31b011577249061b2e5262672c332765a027 100644 --- a/include/morse_fortran.h +++ b/include/morse_fortran.h @@ -6,19 +6,19 @@ ! This software is a computer program whose purpose is to process ! Matrices Over Runtime Systems @ Exascale (MORSE). More information ! can be found on the following website: http://www.inria.fr/en/teams/morse. -! +! ! This software is governed by the CeCILL-B license under French law and -! abiding by the rules of distribution of free software. You can use, +! abiding by the rules of distribution of free software. You can use, ! modify and/ or redistribute the software under the terms of the CeCILL-B ! license as circulated by CEA, CNRS and INRIA at the following URL -! "http://www.cecill.info". -! +! "http://www.cecill.info". +! ! As a counterpart to the access to the source code and rights to copy, ! modify and redistribute granted by the license, users are provided only ! with a limited warranty and the software's author, the holder of the ! economic rights, and the successive licensors have only limited -! liability. -! +! liability. +! ! In this respect, the user's attention is drawn to the risks associated ! with loading, using, modifying and/or developing or reproducing the ! software by the user in light of its specific status of free software, @@ -26,10 +26,10 @@ ! therefore means that it is reserved for developers and experienced ! professionals having in-depth computer knowledge. Users are therefore ! encouraged to load and test the software's suitability as regards their -! requirements in conditions enabling the security of their systems and/or -! data to be ensured and, more generally, to use and operate it in the -! same conditions as regards security. -! +! requirements in conditions enabling the security of their systems and/or +! data to be ensured and, more generally, to use and operate it in the +! same conditions as regards security. +! ! The fact that you are presently reading this means that you have had ! knowledge of the CeCILL-B license and that you accept its terms. ! @@ -123,14 +123,14 @@ parameter ( MorseNonsymPosv = 243 ) parameter ( MorseSymPosv = 244 ) - integer MorseNoPacking - integer MorsePackSubdiag - integer MorsePackSupdiag - integer MorsePackColumn - integer MorsePackLowerBand - integer MorsePackRow - integer MorsePackUpeprBand - integer MorsePackAll + integer MorseNoPacking + integer MorsePackSubdiag + integer MorsePackSupdiag + integer MorsePackColumn + integer MorsePackLowerBand + integer MorsePackRow + integer MorsePackUpeprBand + integer MorsePackAll parameter ( MorseNoPacking = 291 ) parameter ( MorsePackSubdiag = 292 ) parameter ( MorsePackSupdiag = 293 ) @@ -239,4 +239,3 @@ parameter ( PRIORITY = 16 ) parameter ( CALLBACK = 17 ) parameter ( REDUX = 18 ) - diff --git a/include/morse_kernels.h b/include/morse_kernels.h index 2d29233992f8ea1e80d7f262caf5b89437f1f5a9..d043eba37bdf729988a8dccb5434c2e2552e248f 100644 --- a/include/morse_kernels.h +++ b/include/morse_kernels.h @@ -12,8 +12,8 @@ * * @file morse_kernels.h * - * MAGMA codelets kernel - * MAGMA is a software package provided by Univ. of Tennessee, + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, * Univ. of California Berkeley and Univ. of Colorado Denver, * and INRIA Bordeaux Sud-Ouest * diff --git a/include/morse_struct.h b/include/morse_struct.h index 2aacefcdf501d36326ba63fbc2081400fae2af65..a87b8ca47c7f4716666ba6164450c1fe5416f415 100644 --- a/include/morse_struct.h +++ b/include/morse_struct.h @@ -161,7 +161,7 @@ typedef struct morse_request_s { * sharing common exception handling. **/ typedef struct morse_sequence_s { - MORSE_bool status; /* MAGMA_SUCCESS or appropriate error code */ + MORSE_bool status; /* MORSE_SUCCESS or appropriate error code */ MORSE_request_t *request; /* failed request */ void *schedopt; } MORSE_sequence_t; diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt index f572592156b65137e0c182086de6137de4646f26..41ee03942296bcdd9d40483b3aa3efb809c5a6b7 100644 --- a/runtime/parsec/CMakeLists.txt +++ b/runtime/parsec/CMakeLists.txt @@ -29,17 +29,6 @@ cmake_minimum_required(VERSION 2.8) -# check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon -if ( CBLAS_FOUND AND LAPACKE_FOUND AND LAPACK_FOUND AND CUDA_FOUND AND CUDA_CUBLAS_LIBRARIES AND MAGMA_FOUND ) - set(CMAKE_REQUIRED_LIBRARIES "${CBLAS_LIBRARIES};${LAPACKE_LIBRARIES};${LAPACK_SEQ_LIBRARIES};${CUDA_LIBRARIES};${CUDA_CUBLAS_LIBRARIES};${MAGMA_LIBRARIES};${COREBLAS_LIBRARIES}") - unset(MAGMA_DGETRF_INCPIV_GPU_FOUND CACHE) - check_function_exists(magma_dgetrf_incpiv_gpu MAGMA_DGETRF_INCPIV_GPU_FOUND) - if ( MAGMA_DGETRF_INCPIV_GPU_FOUND ) - message(STATUS "Set HAVE_MAGMA_GETRF_INCPIV_GPU") - set(HAVE_MAGMA_GETRF_INCPIV_GPU 1) - endif() -endif() - # Generate headers for all possible precisions # -------------------------------------------- set(RUNTIME_HDRS_GENERATED "") diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index fa7952a15e44766d18c1b6cd04613444cb720f38..e1cff588501ba375e0b5491167bac6084adf6ce5 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -29,7 +29,7 @@ cmake_minimum_required(VERSION 2.8) -# Generate the magma headers for all possible precisions +# Generate the quark headers for all possible precisions # ------------------------------------------------------ set(RUNTIME_HDRS_GENERATED "") set(ZHDR @@ -105,9 +105,9 @@ set_property(TARGET chameleon_quark PROPERTY LINKER_LANGUAGE Fortran) set_property(TARGET chameleon_quark PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") target_link_libraries(chameleon_quark coreblas ${QUARK_LIBRARIES_DEP}) -if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) +if(CHAMELEON_USE_CUDA) target_link_libraries(chameleon_quark cudablas) -endif(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) +endif(CHAMELEON_USE_CUDA) add_dependencies(chameleon_quark chameleon_include diff --git a/runtime/quark/include/morse_quark.h b/runtime/quark/include/morse_quark.h index e841a251e55f571a39002967e7fd3cc3933b830e..2c4dc590b80725a669b731606761b240a0de9579 100644 --- a/runtime/quark/include/morse_quark.h +++ b/runtime/quark/include/morse_quark.h @@ -12,8 +12,8 @@ * * @file morse_quark.h * - * MAGMA codelets kernel - * MAGMA is a software package provided by Univ. of Tennessee, + * MORSE codelets kernel + * MORSE is a software package provided by Univ. of Tennessee, * Univ. of California Berkeley and Univ. of Colorado Denver, * and INRIA Bordeaux Sud-Ouest * @@ -23,10 +23,6 @@ * @date 2011-06-01 * **/ - -/******************************************************************************* - * MAGMA facilities of interest to both src and magmablas directories - **/ #ifndef _MORSE_QUARK_H_ #define _MORSE_QUARK_H_ diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 7b297366715f1852d8bb178dfbfa279e3b1a3750..f2109a8ce4956edc2a27b3b0b5d9e8912e6a7855 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -29,17 +29,6 @@ cmake_minimum_required(VERSION 2.8) -# check if magma_dgetrf_incpiv_gpu is accessible in libmagma and activate it in chameleon -if ( CBLAS_FOUND AND LAPACKE_FOUND AND LAPACK_FOUND AND CUDA_FOUND AND CUDA_CUBLAS_LIBRARIES AND MAGMA_FOUND ) - set(CMAKE_REQUIRED_LIBRARIES "${CBLAS_LIBRARIES};${LAPACKE_LIBRARIES};${LAPACK_SEQ_LIBRARIES};${CUDA_LIBRARIES};${CUDA_CUBLAS_LIBRARIES};${MAGMA_LIBRARIES};${COREBLAS_LIBRARIES}") - unset(MAGMA_DGETRF_INCPIV_GPU_FOUND CACHE) - check_function_exists(magma_dgetrf_incpiv_gpu MAGMA_DGETRF_INCPIV_GPU_FOUND) - if ( MAGMA_DGETRF_INCPIV_GPU_FOUND ) - message(STATUS "Set HAVE_MAGMA_GETRF_INCPIV_GPU") - set(HAVE_MAGMA_GETRF_INCPIV_GPU 1) - endif() -endif() - # Generate headers for all possible precisions # -------------------------------------------- set(RUNTIME_HDRS_GENERATED "") diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c index 22355977823a4df1aa73e905cc7db4d158025d1f..e215ce20f9898969a609b186b433522efcf77126 100644 --- a/runtime/starpu/codelets/codelet_zgelqt.c +++ b/runtime/starpu/codelets/codelet_zgelqt.c @@ -155,55 +155,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg) } #endif /* !defined(CHAMELEON_SIMULATION) */ -#if defined(CHAMELEON_USE_MAGMA) -#if !defined(CHAMELEON_SIMULATION) -static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_starpu_ws_t *h_work; - int m; - int n; - int ib; - cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU; - cuDoubleComplex *d_A, *d_T, *d_D, *d_W; - int lda, ldt; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - - /* Gather pointer to data on device */ - d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* m*ib + ib*ib*/ - d_D = d_W + m*ib; - - /* scratch data on host */ - /* ib*n + ib*ib + max(m,n) + ib*ib + ib*ib */ - h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); - - /* Gather pointer to scratch data on host */ - h_T = h_A + ib*n; - h_TAU = h_T + ib*ib; - h_W = h_TAU + chameleon_max(m,n); - h_D = h_W + ib*ib; - - RUNTIME_getStream(stream); - - CUDA_zgelqt( - m, n, ib, - d_A, lda, h_A, ib, - d_T, ldt, h_T, ib, - d_D, h_D, ib, h_TAU, - h_W, d_W, stream ); - - cudaThreadSynchronize(); -} -#endif /* defined(CHAMELEON_USE_MAGMA) */ -#endif /* !defined(CHAMELEON_SIMULATION) */ - /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(zgelqt, 3, cl_zgelqt_cpu_func, cl_zgelqt_cuda_func, 0) -#else CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c index 868c10c16064001632a036ec7b5a7791f6fbb0fa..595bafa1b86321d57ec47e76f2dfdd96922801d9 100644 --- a/runtime/starpu/codelets/codelet_zgeqrt.c +++ b/runtime/starpu/codelets/codelet_zgeqrt.c @@ -154,56 +154,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg) WORK = TAU + chameleon_max( m, n ); CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK); } - - -#if defined(CHAMELEON_USE_MAGMA) -static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_starpu_ws_t *h_work; - int m; - int n; - int ib; - cuDoubleComplex *h_A, *h_T, *h_D, *h_W, *h_TAU; - cuDoubleComplex *d_A, *d_T, *d_D, *d_W; - int lda, ldt; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldt, &h_work); - - /* Gather pointer to data on device */ - d_A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); /* ib*n + ib * ib*/ - d_D = d_W + ib*n; - - /* scratch data on host */ - /* m*ib + ib*ib + max(m,n) + ib*ib + ib*ib */ - h_A = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); - - /* Gather pointer to scratch data on host */ - h_T = h_A + m*ib; - h_TAU = h_T + ib*ib; - h_W = h_TAU + chameleon_max(m,n); - h_D = h_W + ib*ib; - - RUNTIME_getStream(stream); - - CUDA_zgeqrt( - m, n, ib, - d_A, lda, h_A, m, - d_T, ldt, h_T, ib, - d_D, h_D, ib, h_TAU, - h_W, d_W, stream); - - cudaThreadSynchronize(); -} -#endif /* defined(CHAMELEON_USE_MAGMA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(zgeqrt, 3, cl_zgeqrt_cpu_func, cl_zgeqrt_cuda_func, 0) -#else CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c index 547111784edd1c1e30720fbb22e6f4f44d5485dd..1b5a72908c1b76b9a1b34b46d62e3a65cbf02153 100644 --- a/runtime/starpu/codelets/codelet_zgessm.c +++ b/runtime/starpu/codelets/codelet_zgessm.c @@ -137,42 +137,9 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda); CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda); } - -#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) -static void cl_zgessm_cuda_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int k; - int ib; - int *IPIV; - cuDoubleComplex *dL, *dD, *dA; - int lddl, lddd, ldda; - int info = 0; - /* - * hwork => nb*nb - */ - dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - dD = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &lddl, &lddd, &ldda); - - CUDA_zgessm( - MagmaColMajor, m, n, k, ib, - IPIV, dL, lddl, dD, lddd, dA, ldda, &info ); - - cudaThreadSynchronize(); - - return; -} -#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)) -CODELETS(zgessm, 3, cl_zgessm_cpu_func, cl_zgessm_cuda_func, 0) -#else CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c index ad3475cb764564a20fac78f3afd4b6e063bfe58f..bc91e972a6d224bd783a0dc1ba02cfae6fdbf734 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c @@ -147,99 +147,10 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work); CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info); - -#if defined(CHAMELEON_USE_MAGMA) - { - MORSE_Complex64_t *L = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]); - /* - * L stores: - * L1 L2 L3 ... - * L1^-1 L2^-1 L3^-1 ... - */ - /* Compute L-1 in lower rectangle of L */ - if ( ldl >= 2*ib ) - { - int i, sb; - - L += ib; - for (i=0; i<n; i+=ib) { - sb = chameleon_min( ib, n-i ); - CORE_zlacpy(MorseUpperLower, sb, sb, A+(i*lda+i), lda, L+(i*ldl), ldl ); - - CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl), ldl, &info ); - if (info != 0 ) { - fprintf(stderr, "ERROR, trtri returned with info = %d\n", info); - } - } - } - } -#endif -} - - -/* - * Codelet GPU - */ -#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) -static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - cuDoubleComplex *hA, *dA; - cuDoubleComplex *hL, *dL; - cuDoubleComplex *dwork; - MORSE_starpu_ws_t *h_work; - int lda, ldl; - int *IPIV; - MORSE_bool check_info; - int iinfo; - int info; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work); - - dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - /* - * hwork => at least (IB+NB)*IB contains all hA and hL - * dwork => at least IB*NB - */ - hA = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); - dwork = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - - hL = hA + lda*ib; - - /* Initialize L to 0 */ - memset(hL, 0, ib*ib*sizeof(cuDoubleComplex)); - - if ( ldl >= 2*ib ) { - /* Let's compute the inverses in the bottom part of L */ - dL += ib; - } else { - /* We prefer to stick with TRSM */ - dL = NULL; - hL = NULL; - } - - CUDA_zgetrf_incpiv( - MagmaColMajor, m, n, ib, - hA, lda, dA, lda, - hL, ib, dL, ldl, - IPIV, - dwork, lda, - &info ); - - cudaThreadSynchronize(); } -#endif /* defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) */ #endif /* !defined(CHAMELEON_SIMULATION) */ - /* * Codelet definition */ -#if defined(HAVE_MAGMA_GETRF_INCPIV_GPU) && ( defined(CHAMELEON_USE_MAGMA) ) -CODELETS(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func, cl_zgetrf_incpiv_cuda_func, 0) -#else CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c index ab9d87acebd7cfafb43e597c38b3f220e5ac2133..8ca85664bf98c89e205b848e96642a6a0d859175 100644 --- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c +++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c @@ -123,35 +123,9 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo); CORE_zgetrf_nopiv(m, n, ib, A, lda, &info); } - -/* - * Codelet GPU - */ -#if defined(CHAMELEON_USE_MAGMA) -static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg) -{ - int m; - int n; - int ib; - cuDoubleComplex *dA; - int lda; - int iinfo; - - int info = 0; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo); - dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - CUDA_zgetrf_nopiv( m, n, dA, lda, &info ); - cudaThreadSynchronize(); -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func, cl_zgetrf_nopiv_cuda_func, 0) -#else CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c index f578695ff8599d3a0804d44f38d75ecf7125771a..7f23c56821d3f39b14fda2e2828da23cb118f17f 100644 --- a/runtime/starpu/codelets/codelet_zlauum.c +++ b/runtime/starpu/codelets/codelet_zlauum.c @@ -77,30 +77,9 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); CORE_zlauum(uplo, N, A, LDA); } - -#if defined(CHAMELEON_USE_MAGMA) -static void cl_zlauum_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - int info = 0; - int N; - cuDoubleComplex *A; - int LDA; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA); - CUDA_zlauum( uplo, N, A, LDA, &info); - cudaThreadSynchronize(); - return; -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(zlauum, 1, cl_zlauum_cpu_func, cl_zlauum_cuda_func, 0) -#else CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c index 857cf3356edda7507b788e90ce56682cdd1f4608..686814274424ef112cf8bb90ca6c44333e8af9f3 100644 --- a/runtime/starpu/codelets/codelet_zpotrf.c +++ b/runtime/starpu/codelets/codelet_zpotrf.c @@ -83,48 +83,10 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); CORE_zpotrf(uplo, n, A, lda, &info); } - -#ifdef CHAMELEON_USE_MAGMA -static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg) -{ - cudaStream_t stream[2], currentt_stream; - MORSE_enum uplo; - int n; - cuDoubleComplex *A; - /* cuDoubleComplex *hA; */ - int lda; - int iinfo; - int info = 0; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo); - - /* /\* */ - /* * hwork => nb*nb */ - /* *\/ */ - /* hA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); */ - -/* stream[0] = starpu_cuda_get_local_stream(); */ -/* if ( cudaStreamCreate( stream+1 ) != CUDA_SUCCESS ){ */ -/* fprintf(stderr, "Error while creating stream in codelet_zpotrf\n"); */ -/* exit(-1); */ -/* } */ - - CUDA_zpotrf( uplo, n, A, lda, &info); - - cudaThreadSynchronize(); -/* cudaStreamDestroy( stream[1] ); */ - - return; -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined CHAMELEON_USE_MAGMA -CODELETS(zpotrf, 1, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, 0) -#else CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func) -#endif + diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c index a1fae08bb05f5eb552ea5a2492b543009b294a71..6d1c3ee08cefee4ab99bd72354fb4c1cb6684ef9 100644 --- a/runtime/starpu/codelets/codelet_zssssm.c +++ b/runtime/starpu/codelets/codelet_zssssm.c @@ -174,56 +174,10 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV); } - -#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU) -static void cl_zssssm_cuda_func(void *descr[], void *cl_arg) -{ - int m1; - int n1; - int m2; - int n2; - int k; - int ib; - cuDoubleComplex *dA1; - int lda1; - cuDoubleComplex *dA2; - int lda2; - cuDoubleComplex *dL1; - int ldl1; - cuDoubleComplex *dL2; - int ldl2; - int *IPIV; - int info; - - starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV); - - dA1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - dA2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - dL1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - dL2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); - - if ( ldl1 >= 2*ib ) { - /* dL1 stores L and invL and the kernel is just using the inverted part */ - dL1 += ib; - } - - CUDA_zssssm( - MagmaColMajor, m1, n1, m2, n2, k, ib, - dA1, lda1, dA2, lda2, - dL1, ldl1, dL2, ldl2, - IPIV, &info); - - cudaThreadSynchronize(); -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if (defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)) -CODELETS(zssssm, 4, cl_zssssm_cpu_func, cl_zssssm_cuda_func, 0) -#else CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c index 1f619cd097dbd1a75dbdc1f6f6b9d606ef3020be..73d1a439e4c6baca68e701d2d96645fceca82b80 100644 --- a/runtime/starpu/codelets/codelet_ztrtri.c +++ b/runtime/starpu/codelets/codelet_ztrtri.c @@ -85,32 +85,9 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo); CORE_ztrtri(uplo, diag, N, A, LDA, &info); } - -#if defined(CHAMELEON_USE_MAGMA) -static void cl_ztrtri_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_enum uplo; - MORSE_enum diag; - int N; - cuDoubleComplex *A; - int LDA; - int iinfo; - int info = 0; - - A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo); - CUDA_ztrtri( uplo, diag, N, A, LDA, &info); - cudaThreadSynchronize(); - return; -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(ztrtri, 1, cl_ztrtri_cpu_func, cl_ztrtri_cuda_func, 0) -#else CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_ztslqt.c b/runtime/starpu/codelets/codelet_ztslqt.c index 101feebfe80cede6ddb1f759a5883da3a495ce79..56c278094de88b9bc90214642700e670b3312cd9 100644 --- a/runtime/starpu/codelets/codelet_ztslqt.c +++ b/runtime/starpu/codelets/codelet_ztslqt.c @@ -174,54 +174,9 @@ static void cl_ztslqt_cpu_func(void *descr[], void *cl_arg) WORK = TAU + chameleon_max( m, n ); CORE_ztslqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } - -#if defined(CHAMELEON_USE_MAGMA) && 0 -static void cl_ztslqt_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_starpu_ws_t *h_work; - int m; - int n; - int ib; - cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; - cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; - int lda1, lda2, ldt; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - - /* Gather pointer to data on device */ - d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*m + ib*ib */ - d_D = d_W + 2*ib*m; - - /* scratch data on host */ - /* ib*n + ib*n + max(m,n) + ib*m + ib*ib */ - h_A2 = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); - h_T = h_A2 + ib*n; - h_TAU = h_T + ib*n; - h_W = h_TAU + chameleon_max(m,n); - h_D = h_W + ib*m; - - RUNTIME_getStream(stream); - CUDA_ztslqt( - m, n, ib, - d_A1, lda1, d_A2, lda2, - h_A2, ib, - d_T, ldt, h_T, ib, - d_D, h_D, ib, h_TAU, - h_W, d_W, stream); - - cudaThreadSynchronize(); -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if (defined(CHAMELEON_USE_MAGMA)) && 0 -CODELETS(ztslqt, 4, cl_ztslqt_cpu_func, cl_ztslqt_cuda_func, 0) -#else CODELETS_CPU(ztslqt, 4, cl_ztslqt_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_ztsqrt.c b/runtime/starpu/codelets/codelet_ztsqrt.c index 4c5d03fe2b4eeb60fa30dd14f9a250f3bfae080c..969da0611698b3c9c72b23eaa04d869b3411f105 100644 --- a/runtime/starpu/codelets/codelet_ztsqrt.c +++ b/runtime/starpu/codelets/codelet_ztsqrt.c @@ -165,53 +165,9 @@ static void cl_ztsqrt_cpu_func(void *descr[], void *cl_arg) WORK = TAU + chameleon_max( m, n ); CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK); } - -#if defined(CHAMELEON_USE_MAGMA) -static void cl_ztsqrt_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_starpu_ws_t *h_work; - int m; - int n; - int ib; - cuDoubleComplex *h_A2, *h_T, *h_D, *h_TAU, *h_W; - cuDoubleComplex *d_A1, *d_A2, *d_T, *d_D, *d_W; - int lda1, lda2, ldt; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda1, &lda2, &ldt, &h_work); - - /* Gather pointer to data on device */ - d_A1 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - d_A2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - d_T = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - d_W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* 2*ib*n + ib*ib */ - d_D = d_W + 2*ib*n; - - /* scratch data on host */ - /* m*ib + ib*ib + max(m,n) + ib*n + ib*ib */ - h_A2 = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(h_work); - h_T = h_A2 + m*ib; - h_TAU = h_T + ib*ib; - h_W = h_TAU + chameleon_max(m,n); - h_D = h_W + ib*n; - - RUNTIME_getStream(stream); - CUDA_ztsqrt( - m, n, ib, - d_A1, lda1, d_A2, lda2, - h_A2, lda2, - d_T, ldt, h_T, ib, - d_D, h_D, ib, h_TAU, - h_W, d_W, stream); - cudaThreadSynchronize(); -} -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) -CODELETS(ztsqrt, 4, cl_ztsqrt_cpu_func, cl_ztsqrt_cuda_func, 0) -#else CODELETS_CPU(ztsqrt, 4, cl_ztsqrt_cpu_func) -#endif diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c index eae5108ee9c28ba1335d524f6b6a5dd9a65c060f..59f7428077125914ce178b330265057444137afb 100644 --- a/runtime/starpu/codelets/codelet_ztstrf.c +++ b/runtime/starpu/codelets/codelet_ztstrf.c @@ -176,102 +176,11 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg) starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &IPIV, &d_work, &ldwork, &check_info, &iinfo); CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info); - -#if defined(CHAMELEON_USE_MAGMA) - /* - * L stores the following if enough place: - * L1 L2 L3 ... - * L1^-1 L2^-1 L3^-1 ... - */ - /* Compute L-1 in lower rectangle of L */ - if ( ldl >= 2*ib ) - { - int i, sb; - for (i=0; i<n; i+=ib) { - sb = chameleon_min( ib, n-i ); - CORE_zlacpy(MorseUpperLower, sb, sb, L+(i*ldl), ldl, L+(i*ldl)+ib, ldl ); - - CORE_ztrtri( MorseLower, MorseUnit, sb, L+(i*ldl)+ib, ldl, &info ); - if (info != 0 ) { - fprintf(stderr, "ERROR, trtri returned with info = %d\n", info); - } - } - } -#endif -} - - -/* - * Codelet GPU - */ -/* TODO/WARNING: tstrf is not working on GPU for now */ -#if defined(CHAMELEON_USE_MAGMA) && 0 -static void cl_ztstrf_cuda_func(void *descr[], void *cl_arg) -{ - MORSE_starpu_ws_t *d_work; - int m; - int n; - int ib; - int nb; - cuDoubleComplex *hU, *dU; - int ldu; - cuDoubleComplex *hA, *dA; - int lda; - cuDoubleComplex *hL, *dL; - int ldl; - int *ipiv; - cuDoubleComplex *hw2, *hw, *dw; - int ldwork; - MORSE_bool check_info; - int iinfo; - int info; - - starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl, &ipiv, - &d_work, &ldwork, &check_info, &iinfo); - - dU = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]); - dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]); - dL = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); - - /* - * hwork => 2*nb*(2*ib+2nb) - * dwork => 2*ib*nb - */ - hw2 = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); - dw = (cuDoubleComplex*)RUNTIME_starpu_ws_getlocal(d_work); - - hU = hw2; - hA = hU + ldu * nb; - hL = hA + lda * nb; - hw = hL + ldl * nb; - - /* Download first panel from A and U */ - cublasGetMatrix( nb, n, sizeof(cuDoubleComplex), dU, ldu, hU, ldu ); - cublasGetMatrix( m, ib, sizeof(cuDoubleComplex), dA, lda, hA, lda ); - - /* Initialize L to 0 */ - memset(hL, 0, ldl*nb*sizeof(cuDoubleComplex)); - - CUDA_ztstrf( - MagmaColMajor, m, n, ib, nb, - hU, ldu, dU, ldu, - hA, lda, dA, lda, - hL, ldl, dL, ldl, - ipiv, - hw, ldwork, dw, lda, - &info ); - - cudaThreadSynchronize(); } -#endif #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if (defined(CHAMELEON_USE_MAGMA) && 0) -CODELETS(ztstrf, 4, cl_ztstrf_cpu_func, cl_ztstrf_cuda_func, 0) -#else CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func) -#endif diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h index f0649aeaf585e3f45fc4f5a548b8ab0a5d5c8401..095b6886bb62c956501fcc40c8d4fb22ba67b26b 100644 --- a/runtime/starpu/include/morse_starpu.h +++ b/runtime/starpu/include/morse_starpu.h @@ -23,12 +23,6 @@ * @date 2011-06-01 * **/ - -/******************************************************************************/ - -/* - * MORSE facilities of interest to both src and magmablas directories - **/ #ifndef _MORSE_STARPU_H_ #define _MORSE_STARPU_H_ diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index a095b33b38da178dcab0ba38304ce5f919ecffcf..0bd42f7ce230e7afe08e49f33f6e4b94ff32ef9d 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -124,17 +124,13 @@ endif() if(NOT CHAMELEON_SIMULATION) - if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + if(CHAMELEON_USE_CUDA) list(APPEND libs_for_tests - cudablas - ) + cudablas) endif() if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() list(APPEND libs_for_tests coreblas diff --git a/testing/lin/CMakeLists.txt b/testing/lin/CMakeLists.txt index da3f049c41fbed9c7edb5fa6f27ac04ce0629461..290647bb4df6a729e77894b0c9e396bb4d86c418 100644 --- a/testing/lin/CMakeLists.txt +++ b/testing/lin/CMakeLists.txt @@ -153,7 +153,7 @@ elseif(MORSE_SCHED_QUARK) list(APPEND libs_for_tests coreblas) endif() -foreach(_dep MAGMA LAPACKE LAPACK CBLAS BLAS CUDA HWLOC MPI DL) +foreach(_dep LAPACKE LAPACK CBLAS BLAS CUDA HWLOC MPI DL) if(HAVE_${_dep}) list(APPEND libs_for_tests ${${_dep}_LIBRARY}) endif() diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index d30584ac43575b725abc8d244c92385a3d77bc17..61334fa4209b3e44a91e7381968027af4fe67b1e 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -222,17 +222,13 @@ endif() if(NOT CHAMELEON_SIMULATION) - if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + if(CHAMELEON_USE_CUDA) list(APPEND libs_for_timings - cudablas - ) + cudablas) endif() if(CHAMELEON_USE_CUDA) link_directories(${CUDA_LIBRARY_DIRS}) endif() - if(CHAMELEON_USE_MAGMA) - link_directories(${MAGMA_LIBRARY_DIRS}) - endif() list(APPEND libs_for_timings coreblas diff --git a/timing/time_zgetrf_incpiv_tile.c b/timing/time_zgetrf_incpiv_tile.c index 907f53ac3423f1fbce55a4966274d5da2e3e733d..a0a20509a40e50a32e48b5c45bc2c42225253d62 100644 --- a/timing/time_zgetrf_incpiv_tile.c +++ b/timing/time_zgetrf_incpiv_tile.c @@ -36,13 +36,6 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) check = 0; } -#if defined(CHAMELEON_USE_MAGMA) - if ( iparam[IPARAM_NB]%iparam[IPARAM_IB] != 0 ) { - fprintf(stderr, "NB must be a multiple of IB for LU on GPU\n"); - exit(-1); - } -#endif - /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); diff --git a/timing/time_zgetrs_incpiv_tile.c b/timing/time_zgetrs_incpiv_tile.c index 3ba081e9cdbd64632b5a7d2c39e1cf61a9529e54..29b9c41b2ec4bbc83896f124d2c70785601a84c8 100644 --- a/timing/time_zgetrs_incpiv_tile.c +++ b/timing/time_zgetrs_incpiv_tile.c @@ -37,13 +37,6 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) check = 0; } -#if defined(CHAMELEON_USE_MAGMA) - if ( iparam[IPARAM_NB]%iparam[IPARAM_IB] != 0 ) { - fprintf(stderr, "NB must be a multiple of IB for LU on GPU\n"); - exit(-1); - } -#endif - /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS );