Compare revisions

Mathieu Faverge · Mathieu Faverge · Mathieu Faverge · PRUVOST Florent · Mathieu Faverge · PRUVOST Florent
--- a/.gitignore
+++ b/.gitignore
@@ -83,3 +83,569 @@ install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake

+#################################################################
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+#################################################################
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.log
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.sdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*~
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+node_modules/
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+__pycache__/
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+
+#################################################################
+#  https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
+#################################################################
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
--- a/.gitlab/build.yml
+++ b/.gitlab/build.yml
@@ -36,12 +36,12 @@ build_starpu_hip:
    BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=ON"
    VERSION: starpu_hip

-# build_starpu_hipcuda:
-#   extends: .build_script_template
-#   variables:
-#     CHAM_CI_ENV_ARG: hipcuda
-#     BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_CUDA=ON -DCHAMELEON_HIPBLAS_PATH=/home/gitlab/hipcuda/hipblas -DCHAMELEON_USE_MPI=ON"
-#     VERSION: starpu_hipcuda
+build_starpu_hipcuda:
+  extends: .build_script_template
+  variables:
+    CHAM_CI_ENV_ARG: hipcuda
+    BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_CUDA=ON -DCHAMELEON_HIPBLAS_PATH=/home/gitlab/hipcuda/hipblas -DCHAMELEON_USE_MPI=ON"
+    VERSION: starpu_hipcuda

 build_starpu_simgrid:
  extends: .build_script_template

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -230,11 +230,10 @@ cmake_dependent_option(CHAMELEON_USE_HIP_ROC
                    "Enable HIP kernels with ROCclr backend" OFF
                    "CHAMELEON_ENABLE_HIP" OFF)

-set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled")
 if( CHAMELEON_USE_HIP_CUDA OR CHAMELEON_USE_HIP_ROC )
-  set(CHAMELEON_USE_HIP ON)
+    set(CHAMELEON_USE_HIP ON CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled" FORCE)
 else()
-  set(CHAMELEON_USE_HIP OFF)
+    set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled" FORCE)
 endif()

 # Enable Hmat-OSS kernels
@@ -690,18 +689,18 @@ set(CHAMELEON_SOURCES_TARGETS "" CACHE INTERNAL "List of targets of sources")

 if (NOT CHAMELEON_SIMULATION)
  ###############################################################################
-  # Coreblas library (kernels for CPUs) #
-  #######################################
+  # coreblas library (kernels for CPUs, interface to cblas/lapacke) #
+  ###################################################################
  add_subdirectory(coreblas)

  ###############################################################################
-  # Cudablas library (kernels for CUDAs) #
-  ########################################
+  # gpucublas library (kernels for GPUs, interface to cublas or hibblas) #
+  ########################################################################
  if(CHAMELEON_USE_CUDA)
-    add_subdirectory(cudablas)
+    add_subdirectory(gpucublas)
  endif()
  if(CHAMELEON_USE_HIP)
-    add_subdirectory(hipblas)
+    add_subdirectory(gpuhipblas)
  endif()

 endif()
@@ -779,6 +778,12 @@ endif()
 # Export targets #
 ##################

+# Threads::Threads may be a dependency of BLAS/LAPACK and CUDA
+# the target may be required for cmake users linking with CHAMELEONconfig.cmake
+if (TARGET Threads::Threads)
+    morse_export_imported_target(Threads Threads threads chameleon)
+endif()
+
 # see https://cmake.org/cmake/help/latest/module/CMakePackageConfigHelpers.html
 include(CMakePackageConfigHelpers)


--- a/cmake_modules/CHAMELEONConfig.cmake.in
+++ b/cmake_modules/CHAMELEONConfig.cmake.in
@@ -11,6 +11,9 @@ check_required_components(CHAMELEON)

 # dependencies of CHAMELEON
 include("${CMAKE_CURRENT_LIST_DIR}/mTargets.cmake")
+if (EXISTS "${CMAKE_CURRENT_LIST_DIR}/threadsTargets.cmake")
+  include("${CMAKE_CURRENT_LIST_DIR}/threadsTargets.cmake")
+endif()
 if (NOT @CHAMELEON_SIMULATION@)
  include("${CMAKE_CURRENT_LIST_DIR}/cblasTargets.cmake")
  include("${CMAKE_CURRENT_LIST_DIR}/lapackeTargets.cmake")
@@ -50,7 +53,10 @@ if (NOT @CHAMELEON_SIMULATION@)
  include("${CMAKE_CURRENT_LIST_DIR}/coreblasTargets.cmake")
 endif()
 if (@CHAMELEON_USE_CUDA@ AND NOT @CHAMELEON_SIMULATION@)
-  include("${CMAKE_CURRENT_LIST_DIR}/cudablasTargets.cmake")
+  include("${CMAKE_CURRENT_LIST_DIR}/gpucublasTargets.cmake")
+endif()
+if (@CHAMELEON_USE_HIP@ AND NOT @CHAMELEON_SIMULATION@)
+  include("${CMAKE_CURRENT_LIST_DIR}/gpuhipblasTargets.cmake")
 endif()
 if (@CHAMELEON_SCHED_OPENMP@)
  include("${CMAKE_CURRENT_LIST_DIR}/chameleon_openmpTargets.cmake")

--- a/cmake_modules/GenPkgConfig.cmake
+++ b/cmake_modules/GenPkgConfig.cmake
@@ -74,7 +74,7 @@ ENDMACRO(CLEAN_LIB_LIST)

 ###
 #
-# GENERATE_PKGCONFIG_FILE: generate files chameleon.pc, coreblas.pc and cudablas.pc
+# GENERATE_PKGCONFIG_FILE: generate files chameleon_lapack.pc, chameleon.pc, coreblas.pc and gpucublas.pc or gpuhipblas.pc
 #
 ###
 MACRO(GENERATE_PKGCONFIG_FILE)
@@ -83,39 +83,39 @@ MACRO(GENERATE_PKGCONFIG_FILE)
    set(CHAMELEON_LAPACK_PKGCONFIG_DEFINITIONS "")
    set(CHAMELEON_PKGCONFIG_DEFINITIONS "")
    set(COREBLAS_PKGCONFIG_DEFINITIONS "")
-    set(CUDABLAS_PKGCONFIG_DEFINITIONS "")
-    set(HIPBLAS_PKGCONFIG_DEFINITIONS "")
+    set(GPUCUBLAS_PKGCONFIG_DEFINITIONS "")
+    set(GPUHIPBLAS_PKGCONFIG_DEFINITIONS "")

    # The link flags specific to this package and any required libraries
    # that don't support PkgConfig
    set(CHAMELEON_LAPACK_PKGCONFIG_LIBS "-lchameleon_lapack")
    set(CHAMELEON_PKGCONFIG_LIBS "-lchameleon")
    set(COREBLAS_PKGCONFIG_LIBS  "-lcoreblas")
-    set(CUDABLAS_PKGCONFIG_LIBS  "-lcudablas")
-    set(HIPBLAS_PKGCONFIG_LIBS  "-lhipblas")
+    set(GPUCUBLAS_PKGCONFIG_LIBS  "-lgpucublas")
+    set(GPUHIPBLAS_PKGCONFIG_LIBS  "-lgpuhipblas")

    # The link flags for private libraries required by this package but not
    # exposed to applications
    set(CHAMELEON_LAPACK_PKGCONFIG_LIBS_PRIVATE "")
    set(CHAMELEON_PKGCONFIG_LIBS_PRIVATE "")
    set(COREBLAS_PKGCONFIG_LIBS_PRIVATE  "")
-    set(CUDABLAS_PKGCONFIG_LIBS_PRIVATE  "")
-    set(HIPBLAS_PKGCONFIG_LIBS_PRIVATE  "")
+    set(GPUCUBLAS_PKGCONFIG_LIBS_PRIVATE  "")
+    set(GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE  "")

    # A list of packages required by this package
    set(CHAMELEON_LAPACK_PKGCONFIG_REQUIRED "chameleon")
    set(CHAMELEON_PKGCONFIG_REQUIRED "hqr")
    set(COREBLAS_PKGCONFIG_REQUIRED  "")
-    set(CUDABLAS_PKGCONFIG_REQUIRED  "")
-    set(HIPBLAS_PKGCONFIG_REQUIRED  "")
+    set(GPUCUBLAS_PKGCONFIG_REQUIRED  "")
+    set(GPUHIPBLAS_PKGCONFIG_REQUIRED  "")

    # A list of private packages required by this package but not exposed to
    # applications
    set(CHAMELEON_LAPACK_PKGCONFIG_REQUIRED_PRIVATE "")
    set(CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE "")
    set(COREBLAS_PKGCONFIG_REQUIRED_PRIVATE  "")
-    set(CUDABLAS_PKGCONFIG_REQUIRED_PRIVATE  "")
-    set(HIPBLAS_PKGCONFIG_REQUIRED_PRIVATE  "")
+    set(GPUCUBLAS_PKGCONFIG_REQUIRED_PRIVATE  "")
+    set(GPUHIPBLAS_PKGCONFIG_REQUIRED_PRIVATE  "")

    if(CHAMELEON_SCHED_OPENMP)
        list(APPEND CHAMELEON_PKGCONFIG_LIBS -lchameleon_openmp)
@@ -144,15 +144,15 @@ MACRO(GENERATE_PKGCONFIG_FILE)
        list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "coreblas")

        if(CHAMELEON_USE_CUDA)
-            list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_CUBLAS_LIBRARIES})
-            list(APPEND CUDABLAS_PKGCONFIG_REQUIRED "cuda")
-            list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "cudablas")
+            list(APPEND GPUCUBLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_CUBLAS_LIBRARIES})
+            list(APPEND GPUCUBLAS_PKGCONFIG_REQUIRED "cuda")
+            list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "gpucublas")
        endif()

        if(CHAMELEON_USE_HIP)
-            list(APPEND HIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIPBLAS_LIBRARIES})
-            list(APPEND HIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIP_LIBRARIES})
-            list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "hipblas")
+            list(APPEND GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIPBLAS_LIBRARIES})
+            list(APPEND GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIP_LIBRARIES})
+            list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "gpuhipblas")
        endif()

    endif(NOT CHAMELEON_SIMULATION)
@@ -170,10 +170,10 @@ MACRO(GENERATE_PKGCONFIG_FILE)
    CLEAN_LIB_LIST(CHAMELEON)
    CLEAN_LIB_LIST(COREBLAS)
    if(CHAMELEON_USE_CUDA)
-        CLEAN_LIB_LIST(CUDABLAS)
+        CLEAN_LIB_LIST(GPUCUBLAS)
    endif()
    if(CHAMELEON_USE_HIP)
-        CLEAN_LIB_LIST(HIPBLAS)
+        CLEAN_LIB_LIST(GPUHIPBLAS)
    endif()

    # Create .pc file
@@ -182,30 +182,30 @@ MACRO(GENERATE_PKGCONFIG_FILE)
    SET(_output_chameleon_file "${CMAKE_BINARY_DIR}/chameleon.pc")
    SET(_output_coreblas_file "${CMAKE_BINARY_DIR}/coreblas.pc")
    if(CHAMELEON_USE_CUDA)
-        SET(_output_cudablas_file "${CMAKE_BINARY_DIR}/cudablas.pc")
+        SET(_output_gpucublas_file "${CMAKE_BINARY_DIR}/gpucublas.pc")
    endif()
    if(CHAMELEON_USE_HIP)
-        SET(_output_hipblas_file "${CMAKE_BINARY_DIR}/hipblas.pc")
+        SET(_output_gpuhipblas_file "${CMAKE_BINARY_DIR}/gpuhipblas.pc")
    endif()

    # TODO: add url of CHAMELEON releases in .pc file
    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/chameleon_lapack.pc.in" "${_output_chameleon_lapack_file}" @ONLY)
    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/chameleon.pc.in" "${_output_chameleon_file}" @ONLY)
-    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/coreblas.pc.in"  "${_output_coreblas_file}" @ONLY)
+    CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/coreblas.pc.in" "${_output_coreblas_file}" @ONLY)
    if(CHAMELEON_USE_CUDA)
-        CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/cudablas.pc.in"  "${_output_cudablas_file}" @ONLY)
+        CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/gpucublas.pc.in" "${_output_gpucublas_file}" @ONLY)
    endif()
    if(CHAMELEON_USE_HIP)
-        CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/hipblas.pc.in"  "${_output_hipblas_file}" @ONLY)
+        CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/gpuhipblas.pc.in" "${_output_gpuhipblas_file}" @ONLY)
    endif()

    # installation
    # ------------
    INSTALL(FILES ${_output_chameleon_lapack_file} DESTINATION lib/pkgconfig)
    INSTALL(FILES ${_output_chameleon_file} DESTINATION lib/pkgconfig)
-    INSTALL(FILES ${_output_coreblas_file}  DESTINATION lib/pkgconfig)
-    INSTALL(FILES ${_output_cudablas_file}  DESTINATION lib/pkgconfig)
-    INSTALL(FILES ${_output_hipblas_file}  DESTINATION lib/pkgconfig)
+    INSTALL(FILES ${_output_coreblas_file} DESTINATION lib/pkgconfig)
+    INSTALL(FILES ${_output_gpucublas_file} DESTINATION lib/pkgconfig)
+    INSTALL(FILES ${_output_gpuhipblas_file} DESTINATION lib/pkgconfig)

 ENDMACRO(GENERATE_PKGCONFIG_FILE)


--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -344,13 +344,13 @@ if (CHAMELEON_USE_MPI)
 endif()
 if (CHAMELEON_USE_CUDA)
  if (NOT CHAMELEON_SIMULATION)
-    target_link_libraries(chameleon PUBLIC cudablas)
+    target_link_libraries(chameleon PUBLIC gpucublas)
    target_link_libraries(chameleon PUBLIC CUDA::CUBLAS)
  endif()
 endif()
 if (CHAMELEON_USE_HIP)
  if (NOT CHAMELEON_SIMULATION)
-    target_link_libraries(chameleon PUBLIC hipblas)
+    target_link_libraries(chameleon PUBLIC gpuhipblas)
    target_link_libraries(chameleon PUBLIC HIP::HIPBLAS)
  endif()
 endif()

--- a/compute/map.c
+++ b/compute/map.c
@@ -27,7 +27,16 @@
 *
 *******************************************************************************
 *
- * @param[in,out] uplo
+ * @param[in] access
+ *          - ChamR: A is accessed in read-only mode.
+ *          - ChamW: A is accessed in write-only mode.
+ *           WARNING: if the descriptor is set for allocation on the fly, the
+ *           flush call included in this synchronous API will free all allocated
+ *           data, prefer asynchronous call if you want to initialiaze data
+ *           before submitting another algorithm.
+ *          - ChamRW: A is accessed in read-write mode.
+ *
+ * @param[in] uplo
 *          - ChamUpper: Only the upper triangular part of the matrix is touched
 *          - ChamLower: Only the lower triangular part of the matrix is touched
 *          - ChamUpperLower: The entire the matrix is touched
@@ -51,7 +60,8 @@
 * @sa CHAMELEON_map_Tile_Async
 *
 */
-int CHAMELEON_map_Tile( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile( cham_access_t         access,
+                        cham_uplo_t           uplo,
                        CHAM_desc_t          *A,
                        cham_unary_operator_t op_fct,
                        void                 *op_args )
@@ -68,7 +78,7 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
    }
    chameleon_sequence_create( chamctxt, &sequence );

-    CHAMELEON_map_Tile_Async( uplo, A, op_fct, op_args, sequence, &request );
+    CHAMELEON_map_Tile_Async( access, uplo, A, op_fct, op_args, sequence, &request );

    CHAMELEON_Desc_Flush( A, sequence );

@@ -89,6 +99,13 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
 *
 *******************************************************************************
 *
+ * @param[in] access
+ *          - ChamR: A is accessed in read-only mode.
+ *          - ChamW: A is accessed in write-only mode.
+ *          INFO: tile of A can be unallocated before the call if the
+ *          descriptor is set for allocation on the fly.
+ *          - ChamRW: A is accessed in read-write mode.
+ *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
@@ -105,7 +122,8 @@ int CHAMELEON_map_Tile( cham_uplo_t           uplo,
 * @sa CHAMELEON_map_Tile
 *
 */
-int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
+int CHAMELEON_map_Tile_Async( cham_access_t         access,
+                              cham_uplo_t           uplo,
                              CHAM_desc_t          *A,
                              cham_unary_operator_t op_fct,
                              void                 *op_args,
@@ -146,7 +164,7 @@ int CHAMELEON_map_Tile_Async( cham_uplo_t           uplo,
        return CHAMELEON_SUCCESS;
    }

-    chameleon_pmap( uplo, A, op_fct, op_args, sequence, request );
+    chameleon_pmap( access, uplo, A, op_fct, op_args, sequence, request );

    return CHAMELEON_SUCCESS;
 }
--- a/compute/pmap.c
+++ b/compute/pmap.c
@@ -20,7 +20,7 @@
 /**
 *  chameleon_pmap - Generate a random matrix by tiles.
 */
-void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
                     cham_unary_operator_t op_fct, void *op_args,
                     RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
@@ -39,12 +39,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
            for (m = 0; m < n; m++) {
                INSERT_TASK_map(
                    &options,
-                    ChamUpperLower, A(m, n),
+                    access, ChamUpperLower, A(m, n),
                    op_fct, op_args );
            }
            INSERT_TASK_map(
                &options,
-                uplo, A(n, n),
+                access, uplo, A(n, n),
                op_fct, op_args );
        }
        break;
@@ -53,12 +53,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
        for (n = 0; n < A->nt; n++) {
            INSERT_TASK_map(
                &options,
-                uplo, A(n, n),
+                access, uplo, A(n, n),
                op_fct, op_args );
            for (m = n+1; m < A->mt; m++) {
                INSERT_TASK_map(
                    &options,
-                    ChamUpperLower, A(m, n),
+                    access, ChamUpperLower, A(m, n),
                    op_fct, op_args );
            }
        }
@@ -70,7 +70,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
            for (n = 0; n < A->nt; n++) {
                INSERT_TASK_map(
                    &options,
-                    uplo, A(m, n),
+                    access, uplo, A(m, n),
                    op_fct, op_args );
            }
        }

--- a/compute/pzgetrf.c
+++ b/compute/pzgetrf.c
@@ -24,6 +24,50 @@

 #define A(m,n) A,  m,  n
 #define U(m,n) &(ws->U),  m,  n
+#define IPIV(m) IPIV,  m,  1
+
+/*
+ * Static variable to know how to handle the data within the kernel
+ * This assumes that only one runtime is enabled at a time.
+ */
+static RUNTIME_id_t zgetrf_runtime_id = RUNTIME_SCHED_STARPU;
+
+static inline int
+zgetrf_ipiv_init( const CHAM_desc_t *descIPIV,
+                  cham_uplo_t uplo, int m, int n,
+                  CHAM_tile_t *tileIPIV, void *op_args )
+{
+    int *IPIV;
+    (void)op_args;
+
+    if ( zgetrf_runtime_id == RUNTIME_SCHED_PARSEC ) {
+        IPIV = (int*)tileIPIV;
+    }
+    else {
+        IPIV = CHAM_tile_get_ptr( tileIPIV );
+    }
+
+#if !defined(CHAMELEON_SIMULATION)
+    {
+        int tempmm = m == descIPIV->mt-1 ? descIPIV->m - m * descIPIV->mb : descIPIV->mb;
+        int i;
+
+        for( i=0; i<tempmm; i++ ) {
+            IPIV[i] = m * descIPIV->mb + i + 1;
+        }
+    }
+#endif
+
+    return 0;
+}
+
+static inline void
+chameleon_pzgetrf_ipiv_init( CHAM_desc_t        *IPIV,
+                             RUNTIME_sequence_t *sequence,
+                             RUNTIME_request_t  *request )
+{
+    chameleon_pmap( ChamW, ChamUpperLower, IPIV, zgetrf_ipiv_init, NULL, sequence, request );
+}

 /*
 * All the functions below are panel factorization variant.
@@ -113,11 +157,13 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
                               int                         k,
                               RUNTIME_option_t           *options )
 {
-#if defined(GETRF_NOPIV_PER_COLUMN)
-    chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
-#else
-    chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
-#endif
+    /* TODO: Should be replaced by a function pointer */
+    if ( ws->alg == ChamGetrfNoPivPerColumn ) {
+        chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
+    }
+    else {
+        chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
+    }
 }

 /**
@@ -180,9 +226,10 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
 *  Parallel tile LU factorization with no pivoting - dynamic scheduling
 */
 void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
-                        CHAM_desc_t              *A,
-                        RUNTIME_sequence_t       *sequence,
-                        RUNTIME_request_t        *request )
+                        CHAM_desc_t                *A,
+                        CHAM_desc_t                *IPIV,
+                        RUNTIME_sequence_t         *sequence,
+                        RUNTIME_request_t          *request )
 {
    CHAM_context_t  *chamctxt;
    RUNTIME_option_t options;
@@ -196,6 +243,9 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
    }
    RUNTIME_options_init( &options, chamctxt, sequence, request );

+    /* Initialize IPIV */
+    chameleon_pzgetrf_ipiv_init( IPIV, sequence, request );
+
    for (k = 0; k < min_mnt; k++) {
        RUNTIME_iteration_push( chamctxt, k );


--- a/compute/pzlatms.c
+++ b/compute/pzlatms.c
@@ -165,7 +165,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
    for (n = 0; n < kt; n++) {
        INSERT_TASK_map(
            &options,
-            ChamUpperLower, A(n, n),
+            ChamRW, ChamUpperLower, A(n, n),
            zlaset_diag, D );
    }


--- a/compute/zgetrf.c
+++ b/compute/zgetrf.c
@@ -52,26 +52,44 @@
 void *
 CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
 {
-    CHAM_context_t           *chamctxt;
-    struct chameleon_pzgetrf_s *options;
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzgetrf_s *ws;

    chamctxt = chameleon_context_self();
    if ( chamctxt == NULL ) {
        return NULL;
    }

-    options = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) );
-    options->ib = CHAMELEON_IB;
+    ws = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) );
+    ws->alg = ChamGetrfNoPiv;
+    ws->ib  = CHAMELEON_IB;
+
+    {
+        char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
+
+        if ( algostr ) {
+            if ( strcasecmp( algostr, "nopiv" ) ) {
+                ws->alg = ChamGetrfNoPiv;
+            }
+            else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0  ) {
+                ws->alg = ChamGetrfNoPivPerColumn;
+            }
+            else {
+                fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" );
+            }
+        }
+        chameleon_cleanenv( algostr );
+    }

-#if defined(GETRF_NOPIV_PER_COLUMN)
-    chameleon_desc_init( &(options->U), CHAMELEON_MAT_ALLOC_TILE,
-                         ChamComplexDouble, 1, A->nb, A->nb,
-                         A->mt, A->nt * A->nb, 0, 0,
-                         A->mt, A->nt * A->nb, A->p, A->q,
-                         NULL, NULL, A->get_rankof_init );
-#endif
+    if ( ws->alg == ChamGetrfNoPivPerColumn ) {
+        chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
+                             ChamComplexDouble, 1, A->nb, A->nb,
+                             A->mt, A->nt * A->nb, 0, 0,
+                             A->mt, A->nt * A->nb, A->p, A->q,
+                             NULL, NULL, A->get_rankof_init );
+    }

-    return options;
+    return ws;
 }

 /**
@@ -94,14 +112,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
 *
 */
 void
-CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws )
+CHAMELEON_zgetrf_WS_Free( void *user_ws )
 {
    struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws;

-#if defined(GETRF_NOPIV_PER_COLUMN)
-    chameleon_desc_destroy( &(ws->U) );
-#endif
-
+    if ( ws->alg == ChamGetrfNoPivPerColumn ) {
+        chameleon_desc_destroy( &(ws->U) );
+    }
    free( ws );
 }

@@ -150,7 +167,7 @@ CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws )
 *
 */
 int
-CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
+CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
 {
    int                 NB;
    int                 status;
@@ -210,7 +227,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
    chameleon_sequence_wait( chamctxt, sequence );

    /* Cleanup the temporary data */
-    CHAMELEON_zgetrf_WS_Free( &descAt, ws );
+    CHAMELEON_zgetrf_WS_Free( ws );
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );

    status = sequence->status;
@@ -254,7 +271,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
 *
 */
 int
-CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
+CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
 {
    CHAM_context_t     *chamctxt;
    RUNTIME_sequence_t *sequence = NULL;
@@ -270,12 +287,11 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
    chameleon_sequence_create( chamctxt, &sequence );

    ws = CHAMELEON_zgetrf_WS_Alloc( A );
-    CHAMELEON_zgetrf_Tile_Async( A, ws, sequence, &request );
-
+    CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request );
    CHAMELEON_Desc_Flush( A, sequence );

    chameleon_sequence_wait( chamctxt, sequence );
-    CHAMELEON_zgetrf_WS_Free( A, ws );
+    CHAMELEON_zgetrf_WS_Free( ws );

    status = sequence->status;
    chameleon_sequence_destroy( chamctxt, sequence );
@@ -317,11 +333,13 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
 */
 int
 CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
+                             CHAM_desc_t        *IPIV,
                             void               *user_ws,
                             RUNTIME_sequence_t *sequence,
                             RUNTIME_request_t  *request )
 {
-    CHAM_context_t *chamctxt;
+    CHAM_context_t             *chamctxt;
+    struct chameleon_pzgetrf_s *ws;
    chamctxt = chameleon_context_self();

    if ( chamctxt == NULL ) {
@@ -357,14 +375,38 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t        *A,
        chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" );
        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
    }
+    if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }

    /* Check input arguments */
    if ( A->nb != A->mb ) {
        chameleon_error( "CHAMELEON_zgetrf_Tile", "only square tiles supported" );
        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
    }
+    if ( IPIV->mb != A->mb ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+    if ( IPIV->nb != 1 ) {
+        chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" );
+        return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
+    }
+
+    if ( user_ws == NULL ) {
+        ws = CHAMELEON_zgetrf_WS_Alloc( A );
+    }
+    else {
+        ws = user_ws;
+    }

-    chameleon_pzgetrf( user_ws, A, sequence, request );
+    chameleon_pzgetrf( user_ws, A, IPIV, sequence, request );

+    if ( user_ws == NULL ) {
+        CHAMELEON_Desc_Flush( A, sequence );
+        chameleon_sequence_wait( chamctxt, sequence );
+        CHAMELEON_zgetrf_WS_Free( ws );
+    }
    return CHAMELEON_SUCCESS;
 }
--- a/compute/zlacpy.c
+++ b/compute/zlacpy.c
@@ -280,8 +280,8 @@ int CHAMELEON_zlacpy_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
-    if (A->nb != A->mb) {
-        chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only square tiles supported");
+    if ((A->mb != B->mb) || (A->nb != B->nb) ){
+        chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only matching tile sizes supported");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */

--- a/compute/zprint.c
+++ b/compute/zprint.c
@@ -152,7 +152,7 @@ int CHAMELEON_zprint( FILE *file, const char *header,

    /* Call the tile interface */
    zprint_runtime_id = chamctxt->scheduler;
-    chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request );
+    chameleon_pmap( ChamR, uplo, &descAt, zprint, &options, sequence, &request );

    /* Submit the matrix conversion back */
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
@@ -216,7 +216,7 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header,
    chameleon_sequence_create( chamctxt, &sequence );

    zprint_runtime_id = chamctxt->scheduler;
-    chameleon_pmap( uplo, A, zprint, &options, sequence, &request );
+    chameleon_pmap( ChamR, uplo, A, zprint, &options, sequence, &request );
    CHAMELEON_Desc_Flush( A, sequence );

    chameleon_sequence_wait( chamctxt, sequence );

--- a/control/common.h
+++ b/control/common.h
@@ -102,7 +102,7 @@ extern char *chameleon_lapack_constants[];
 extern "C" {
 #endif

-void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
+void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
                     cham_unary_operator_t operator, void *op_args,
                     RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );

@@ -127,7 +127,7 @@ static inline int chameleon_asprintf( char **strp, const char *fmt, ... )
    int rc;

    va_start( ap, fmt );
-    rc = asprintf( strp, fmt, ap );
+    rc = vasprintf( strp, fmt, ap );
    va_end( ap );

    assert( rc != -1 );

--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -41,8 +41,9 @@ struct chameleon_pzgemm_s {
 * @brief Data structure to handle the GETRF workspaces with partial pivoting
 */
 struct chameleon_pzgetrf_s {
-    int         ib; /* Internal blocking parameter */
-    CHAM_desc_t U;
+    cham_getrf_t alg;
+    int          ib; /* Internal blocking parameter */
+    CHAM_desc_t  U;
 };

 /**
@@ -86,7 +87,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
 void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);

--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -376,7 +376,8 @@ int chameleon_desc_check(const CHAM_desc_t *desc)
        chameleon_error("chameleon_desc_check", "NULL matrix pointer");
        return CHAMELEON_ERR_UNALLOCATED;
    }
-    if (desc->dtyp != ChamRealFloat &&
+    if (desc->dtyp != ChamInteger &&
+        desc->dtyp != ChamRealFloat &&
        desc->dtyp != ChamRealDouble &&
        desc->dtyp != ChamComplexFloat &&
        desc->dtyp != ChamComplexDouble  ) {

--- a/doc/dev/main.dox
+++ b/doc/dev/main.dox
@@ -67,10 +67,10 @@ The libraries are organized as follows :

 - __chameleon_quark|openmp|parsec|starpu__ : interface to the
  different runtimes, depends on "coreblas" and optionally on
-  "cudablas" and on a runtime system library
+  "gpucublas" or "gpuhiblas" and on a runtime system library

- __coreblas__ and __cudablas__ : interfaces to the CPU and GPU
-  kernels
+- __coreblas__ and __gpucublas__ or __gpuhipblas__ :
+  interfaces to the CPU and GPU kernels

 - __hqr__ : HQR is a C library providing tools to generate hierachical
  trees adapted to 2D block-cyclic data distribution and algorithms
@@ -91,14 +91,16 @@ Lets have a look to the source code organization in directories.

 - __coreblas__ : the Chameleon interface to CPU linear algebra kernels

- __cudablas__ : the Chameleon interface to GPU linear algebra kernels
-
 - __distrib__ : some hints to install Chameleon's dependencies

 - __doc__ : users and developers documentations

 - __example__ : couple of C files to show how to use Chameleon

+- __gpucublas__ : the Chameleon interface to GPU linear algebra kernels (cublas)
+
+- __gpuhipblas__ : the Chameleon interface to GPU linear algebra kernels (hipblas)
+
 - __hqr__ : [HQR](https://gitlab.inria.fr/solverstack/hqr) is a C
 library providing tools to generate hierachical trees adapted to 2D
 block-cyclic data distribution and algorithms based on tiled
@@ -106,6 +108,8 @@ QR/algorithms

 - __include__ : Chameleon's headers file necessary for users

+- __lapack_api__ : the Chameleon interface CBLAS/LAPACKE like
+
 - __lib__ : material related to the distribution

 - __plasma-conversion__ : scripts to convert plasma task based

--- a/cudablas/CMakeLists.txt
+++ b/cudablas/CMakeLists.txt
--- a/cudablas/compute/CMakeLists.txt
+++ b/cudablas/compute/CMakeLists.txt
@@ -27,7 +27,7 @@

 # Generate the chameleon sources for all possible precisions
 # ------------------------------------------------------
-set(CUDABLAS_SRCS_GENERATED "")
+set(GPUCUBLAS_SRCS_GENERATED "")
 set(ZSRC
    cuda_zgeadd.c
    cuda_zgemerge.c
@@ -72,46 +72,46 @@ set(ZSRC
 # endif()

 precisions_rules_py(
-  CUDABLAS_SRCS_GENERATED "${ZSRC}"
+  GPUCUBLAS_SRCS_GENERATED "${ZSRC}"
  PRECISIONS "${CHAMELEON_PRECISION}")

-set(CUDABLAS_SRCS
-  ${CUDABLAS_SRCS_GENERATED}
+set(GPUCUBLAS_SRCS
+  ${GPUCUBLAS_SRCS_GENERATED}
  cudaglobal.c
  )

 # Force generation of sources
 # ---------------------------
-add_custom_target(cudablas_sources ALL SOURCES ${CUDABLAS_SRCS})
-set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};cudablas_sources" CACHE INTERNAL "List of targets of sources")
+add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS})
+set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};gpucublas_sources" CACHE INTERNAL "List of targets of sources")

 # Compile step
 # ------------
-add_library(cudablas ${CUDABLAS_SRCS})
-set_target_properties(cudablas PROPERTIES VERSION ${CHAMELEON_VERSION})
-set_target_properties(cudablas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR})
-add_dependencies(cudablas cudablas_include cudablas_sources)
-target_include_directories(cudablas PUBLIC
-  $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/cudablas/include>
-  $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/cudablas/include>
+add_library(gpucublas ${GPUCUBLAS_SRCS})
+set_target_properties(gpucublas PROPERTIES VERSION ${CHAMELEON_VERSION})
+set_target_properties(gpucublas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR})
+add_dependencies(gpucublas gpucublas_include gpucublas_sources)
+target_include_directories(gpucublas PUBLIC
+  $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/gpucublas/include>
+  $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/gpucublas/include>
  $<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include>
  $<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include>
  $<INSTALL_INTERFACE:include>)
-set_property(TARGET cudablas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+set_property(TARGET gpucublas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")

-target_link_libraries(cudablas PRIVATE coreblas CUDA::CUBLAS)
-target_link_libraries(cudablas PUBLIC MORSE::M)
+target_link_libraries(gpucublas PRIVATE coreblas CUDA::CUBLAS)
+target_link_libraries(gpucublas PUBLIC MORSE::M)

 # export target coreblas
-install(EXPORT cudablasTargets
+install(EXPORT gpucublasTargets
        NAMESPACE CHAMELEON::
        DESTINATION lib/cmake/chameleon
        )

 # installation
 # ------------
-install(TARGETS cudablas
-        EXPORT cudablasTargets
+install(TARGETS gpucublas
+        EXPORT gpucublasTargets
        ARCHIVE DESTINATION lib
        LIBRARY DESTINATION lib
        )

--- a/cudablas/compute/cuda_zgeadd.c
+++ b/cudablas/compute/cuda_zgeadd.c
@@ -17,7 +17,7 @@
 * @precisions normal z -> c d s
 *
 */
-#include "cudablas.h"
+#include "gpucublas.h"

 /**
 ******************************************************************************
No results found