Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Commits on Source (23)
Showing with 818 additions and 125 deletions
......@@ -83,3 +83,569 @@ install_manifest.txt
compile_commands.json
CTestTestfile.cmake
#################################################################
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
#################################################################
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
#################################################################
# https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
#################################################################
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
......@@ -36,12 +36,12 @@ build_starpu_hip:
BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_ROC=ON -DCHAMELEON_USE_MPI=ON"
VERSION: starpu_hip
# build_starpu_hipcuda:
# extends: .build_script_template
# variables:
# CHAM_CI_ENV_ARG: hipcuda
# BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_CUDA=ON -DCHAMELEON_HIPBLAS_PATH=/home/gitlab/hipcuda/hipblas -DCHAMELEON_USE_MPI=ON"
# VERSION: starpu_hipcuda
build_starpu_hipcuda:
extends: .build_script_template
variables:
CHAM_CI_ENV_ARG: hipcuda
BUILD_OPTIONS: "-DCHAMELEON_USE_HIP_CUDA=ON -DCHAMELEON_HIPBLAS_PATH=/home/gitlab/hipcuda/hipblas -DCHAMELEON_USE_MPI=ON"
VERSION: starpu_hipcuda
build_starpu_simgrid:
extends: .build_script_template
......
......@@ -230,11 +230,10 @@ cmake_dependent_option(CHAMELEON_USE_HIP_ROC
"Enable HIP kernels with ROCclr backend" OFF
"CHAMELEON_ENABLE_HIP" OFF)
set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled")
if( CHAMELEON_USE_HIP_CUDA OR CHAMELEON_USE_HIP_ROC )
set(CHAMELEON_USE_HIP ON)
set(CHAMELEON_USE_HIP ON CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled" FORCE)
else()
set(CHAMELEON_USE_HIP OFF)
set(CHAMELEON_USE_HIP OFF CACHE INTERNAL "Equivalent to CHAMELEON_USE_CUDA for HIP. Enabled only of one of the CHAMELEON_USE_HIP{CUDA,ROC} is enabled" FORCE)
endif()
# Enable Hmat-OSS kernels
......@@ -690,18 +689,18 @@ set(CHAMELEON_SOURCES_TARGETS "" CACHE INTERNAL "List of targets of sources")
if (NOT CHAMELEON_SIMULATION)
###############################################################################
# Coreblas library (kernels for CPUs) #
#######################################
# coreblas library (kernels for CPUs, interface to cblas/lapacke) #
###################################################################
add_subdirectory(coreblas)
###############################################################################
# Cudablas library (kernels for CUDAs) #
########################################
# gpucublas library (kernels for GPUs, interface to cublas or hibblas) #
########################################################################
if(CHAMELEON_USE_CUDA)
add_subdirectory(cudablas)
add_subdirectory(gpucublas)
endif()
if(CHAMELEON_USE_HIP)
add_subdirectory(hipblas)
add_subdirectory(gpuhipblas)
endif()
endif()
......@@ -779,6 +778,12 @@ endif()
# Export targets #
##################
# Threads::Threads may be a dependency of BLAS/LAPACK and CUDA
# the target may be required for cmake users linking with CHAMELEONconfig.cmake
if (TARGET Threads::Threads)
morse_export_imported_target(Threads Threads threads chameleon)
endif()
# see https://cmake.org/cmake/help/latest/module/CMakePackageConfigHelpers.html
include(CMakePackageConfigHelpers)
......
......@@ -11,6 +11,9 @@ check_required_components(CHAMELEON)
# dependencies of CHAMELEON
include("${CMAKE_CURRENT_LIST_DIR}/mTargets.cmake")
if (EXISTS "${CMAKE_CURRENT_LIST_DIR}/threadsTargets.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/threadsTargets.cmake")
endif()
if (NOT @CHAMELEON_SIMULATION@)
include("${CMAKE_CURRENT_LIST_DIR}/cblasTargets.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/lapackeTargets.cmake")
......@@ -50,7 +53,10 @@ if (NOT @CHAMELEON_SIMULATION@)
include("${CMAKE_CURRENT_LIST_DIR}/coreblasTargets.cmake")
endif()
if (@CHAMELEON_USE_CUDA@ AND NOT @CHAMELEON_SIMULATION@)
include("${CMAKE_CURRENT_LIST_DIR}/cudablasTargets.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/gpucublasTargets.cmake")
endif()
if (@CHAMELEON_USE_HIP@ AND NOT @CHAMELEON_SIMULATION@)
include("${CMAKE_CURRENT_LIST_DIR}/gpuhipblasTargets.cmake")
endif()
if (@CHAMELEON_SCHED_OPENMP@)
include("${CMAKE_CURRENT_LIST_DIR}/chameleon_openmpTargets.cmake")
......
......@@ -74,7 +74,7 @@ ENDMACRO(CLEAN_LIB_LIST)
###
#
# GENERATE_PKGCONFIG_FILE: generate files chameleon.pc, coreblas.pc and cudablas.pc
# GENERATE_PKGCONFIG_FILE: generate files chameleon_lapack.pc, chameleon.pc, coreblas.pc and gpucublas.pc or gpuhipblas.pc
#
###
MACRO(GENERATE_PKGCONFIG_FILE)
......@@ -83,39 +83,39 @@ MACRO(GENERATE_PKGCONFIG_FILE)
set(CHAMELEON_LAPACK_PKGCONFIG_DEFINITIONS "")
set(CHAMELEON_PKGCONFIG_DEFINITIONS "")
set(COREBLAS_PKGCONFIG_DEFINITIONS "")
set(CUDABLAS_PKGCONFIG_DEFINITIONS "")
set(HIPBLAS_PKGCONFIG_DEFINITIONS "")
set(GPUCUBLAS_PKGCONFIG_DEFINITIONS "")
set(GPUHIPBLAS_PKGCONFIG_DEFINITIONS "")
# The link flags specific to this package and any required libraries
# that don't support PkgConfig
set(CHAMELEON_LAPACK_PKGCONFIG_LIBS "-lchameleon_lapack")
set(CHAMELEON_PKGCONFIG_LIBS "-lchameleon")
set(COREBLAS_PKGCONFIG_LIBS "-lcoreblas")
set(CUDABLAS_PKGCONFIG_LIBS "-lcudablas")
set(HIPBLAS_PKGCONFIG_LIBS "-lhipblas")
set(GPUCUBLAS_PKGCONFIG_LIBS "-lgpucublas")
set(GPUHIPBLAS_PKGCONFIG_LIBS "-lgpuhipblas")
# The link flags for private libraries required by this package but not
# exposed to applications
set(CHAMELEON_LAPACK_PKGCONFIG_LIBS_PRIVATE "")
set(CHAMELEON_PKGCONFIG_LIBS_PRIVATE "")
set(COREBLAS_PKGCONFIG_LIBS_PRIVATE "")
set(CUDABLAS_PKGCONFIG_LIBS_PRIVATE "")
set(HIPBLAS_PKGCONFIG_LIBS_PRIVATE "")
set(GPUCUBLAS_PKGCONFIG_LIBS_PRIVATE "")
set(GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE "")
# A list of packages required by this package
set(CHAMELEON_LAPACK_PKGCONFIG_REQUIRED "chameleon")
set(CHAMELEON_PKGCONFIG_REQUIRED "hqr")
set(COREBLAS_PKGCONFIG_REQUIRED "")
set(CUDABLAS_PKGCONFIG_REQUIRED "")
set(HIPBLAS_PKGCONFIG_REQUIRED "")
set(GPUCUBLAS_PKGCONFIG_REQUIRED "")
set(GPUHIPBLAS_PKGCONFIG_REQUIRED "")
# A list of private packages required by this package but not exposed to
# applications
set(CHAMELEON_LAPACK_PKGCONFIG_REQUIRED_PRIVATE "")
set(CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE "")
set(COREBLAS_PKGCONFIG_REQUIRED_PRIVATE "")
set(CUDABLAS_PKGCONFIG_REQUIRED_PRIVATE "")
set(HIPBLAS_PKGCONFIG_REQUIRED_PRIVATE "")
set(GPUCUBLAS_PKGCONFIG_REQUIRED_PRIVATE "")
set(GPUHIPBLAS_PKGCONFIG_REQUIRED_PRIVATE "")
if(CHAMELEON_SCHED_OPENMP)
list(APPEND CHAMELEON_PKGCONFIG_LIBS -lchameleon_openmp)
......@@ -144,15 +144,15 @@ MACRO(GENERATE_PKGCONFIG_FILE)
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "coreblas")
if(CHAMELEON_USE_CUDA)
list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_CUBLAS_LIBRARIES})
list(APPEND CUDABLAS_PKGCONFIG_REQUIRED "cuda")
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "cudablas")
list(APPEND GPUCUBLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_CUBLAS_LIBRARIES})
list(APPEND GPUCUBLAS_PKGCONFIG_REQUIRED "cuda")
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "gpucublas")
endif()
if(CHAMELEON_USE_HIP)
list(APPEND HIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIPBLAS_LIBRARIES})
list(APPEND HIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIP_LIBRARIES})
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "hipblas")
list(APPEND GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIPBLAS_LIBRARIES})
list(APPEND GPUHIPBLAS_PKGCONFIG_LIBS_PRIVATE ${HIP_LIBRARIES})
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "gpuhipblas")
endif()
endif(NOT CHAMELEON_SIMULATION)
......@@ -170,10 +170,10 @@ MACRO(GENERATE_PKGCONFIG_FILE)
CLEAN_LIB_LIST(CHAMELEON)
CLEAN_LIB_LIST(COREBLAS)
if(CHAMELEON_USE_CUDA)
CLEAN_LIB_LIST(CUDABLAS)
CLEAN_LIB_LIST(GPUCUBLAS)
endif()
if(CHAMELEON_USE_HIP)
CLEAN_LIB_LIST(HIPBLAS)
CLEAN_LIB_LIST(GPUHIPBLAS)
endif()
# Create .pc file
......@@ -182,30 +182,30 @@ MACRO(GENERATE_PKGCONFIG_FILE)
SET(_output_chameleon_file "${CMAKE_BINARY_DIR}/chameleon.pc")
SET(_output_coreblas_file "${CMAKE_BINARY_DIR}/coreblas.pc")
if(CHAMELEON_USE_CUDA)
SET(_output_cudablas_file "${CMAKE_BINARY_DIR}/cudablas.pc")
SET(_output_gpucublas_file "${CMAKE_BINARY_DIR}/gpucublas.pc")
endif()
if(CHAMELEON_USE_HIP)
SET(_output_hipblas_file "${CMAKE_BINARY_DIR}/hipblas.pc")
SET(_output_gpuhipblas_file "${CMAKE_BINARY_DIR}/gpuhipblas.pc")
endif()
# TODO: add url of CHAMELEON releases in .pc file
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/chameleon_lapack.pc.in" "${_output_chameleon_lapack_file}" @ONLY)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/chameleon.pc.in" "${_output_chameleon_file}" @ONLY)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/coreblas.pc.in" "${_output_coreblas_file}" @ONLY)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/coreblas.pc.in" "${_output_coreblas_file}" @ONLY)
if(CHAMELEON_USE_CUDA)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/cudablas.pc.in" "${_output_cudablas_file}" @ONLY)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/gpucublas.pc.in" "${_output_gpucublas_file}" @ONLY)
endif()
if(CHAMELEON_USE_HIP)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/hipblas.pc.in" "${_output_hipblas_file}" @ONLY)
CONFIGURE_FILE("${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/gpuhipblas.pc.in" "${_output_gpuhipblas_file}" @ONLY)
endif()
# installation
# ------------
INSTALL(FILES ${_output_chameleon_lapack_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_chameleon_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_coreblas_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_cudablas_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_hipblas_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_coreblas_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_gpucublas_file} DESTINATION lib/pkgconfig)
INSTALL(FILES ${_output_gpuhipblas_file} DESTINATION lib/pkgconfig)
ENDMACRO(GENERATE_PKGCONFIG_FILE)
......
......@@ -344,13 +344,13 @@ if (CHAMELEON_USE_MPI)
endif()
if (CHAMELEON_USE_CUDA)
if (NOT CHAMELEON_SIMULATION)
target_link_libraries(chameleon PUBLIC cudablas)
target_link_libraries(chameleon PUBLIC gpucublas)
target_link_libraries(chameleon PUBLIC CUDA::CUBLAS)
endif()
endif()
if (CHAMELEON_USE_HIP)
if (NOT CHAMELEON_SIMULATION)
target_link_libraries(chameleon PUBLIC hipblas)
target_link_libraries(chameleon PUBLIC gpuhipblas)
target_link_libraries(chameleon PUBLIC HIP::HIPBLAS)
endif()
endif()
......
......@@ -27,7 +27,16 @@
*
*******************************************************************************
*
* @param[in,out] uplo
* @param[in] access
* - ChamR: A is accessed in read-only mode.
* - ChamW: A is accessed in write-only mode.
* WARNING: if the descriptor is set for allocation on the fly, the
* flush call included in this synchronous API will free all allocated
* data, prefer asynchronous call if you want to initialiaze data
* before submitting another algorithm.
* - ChamRW: A is accessed in read-write mode.
*
* @param[in] uplo
* - ChamUpper: Only the upper triangular part of the matrix is touched
* - ChamLower: Only the lower triangular part of the matrix is touched
* - ChamUpperLower: The entire the matrix is touched
......@@ -51,7 +60,8 @@
* @sa CHAMELEON_map_Tile_Async
*
*/
int CHAMELEON_map_Tile( cham_uplo_t uplo,
int CHAMELEON_map_Tile( cham_access_t access,
cham_uplo_t uplo,
CHAM_desc_t *A,
cham_unary_operator_t op_fct,
void *op_args )
......@@ -68,7 +78,7 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo,
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_map_Tile_Async( uplo, A, op_fct, op_args, sequence, &request );
CHAMELEON_map_Tile_Async( access, uplo, A, op_fct, op_args, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
......@@ -89,6 +99,13 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo,
*
*******************************************************************************
*
* @param[in] access
* - ChamR: A is accessed in read-only mode.
* - ChamW: A is accessed in write-only mode.
* INFO: tile of A can be unallocated before the call if the
* descriptor is set for allocation on the fly.
* - ChamRW: A is accessed in read-write mode.
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
......@@ -105,7 +122,8 @@ int CHAMELEON_map_Tile( cham_uplo_t uplo,
* @sa CHAMELEON_map_Tile
*
*/
int CHAMELEON_map_Tile_Async( cham_uplo_t uplo,
int CHAMELEON_map_Tile_Async( cham_access_t access,
cham_uplo_t uplo,
CHAM_desc_t *A,
cham_unary_operator_t op_fct,
void *op_args,
......@@ -146,7 +164,7 @@ int CHAMELEON_map_Tile_Async( cham_uplo_t uplo,
return CHAMELEON_SUCCESS;
}
chameleon_pmap( uplo, A, op_fct, op_args, sequence, request );
chameleon_pmap( access, uplo, A, op_fct, op_args, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -20,7 +20,7 @@
/**
* chameleon_pmap - Generate a random matrix by tiles.
*/
void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
cham_unary_operator_t op_fct, void *op_args,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
......@@ -39,12 +39,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
for (m = 0; m < n; m++) {
INSERT_TASK_map(
&options,
ChamUpperLower, A(m, n),
access, ChamUpperLower, A(m, n),
op_fct, op_args );
}
INSERT_TASK_map(
&options,
uplo, A(n, n),
access, uplo, A(n, n),
op_fct, op_args );
}
break;
......@@ -53,12 +53,12 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
for (n = 0; n < A->nt; n++) {
INSERT_TASK_map(
&options,
uplo, A(n, n),
access, uplo, A(n, n),
op_fct, op_args );
for (m = n+1; m < A->mt; m++) {
INSERT_TASK_map(
&options,
ChamUpperLower, A(m, n),
access, ChamUpperLower, A(m, n),
op_fct, op_args );
}
}
......@@ -70,7 +70,7 @@ void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
for (n = 0; n < A->nt; n++) {
INSERT_TASK_map(
&options,
uplo, A(m, n),
access, uplo, A(m, n),
op_fct, op_args );
}
}
......
......@@ -24,6 +24,50 @@
#define A(m,n) A, m, n
#define U(m,n) &(ws->U), m, n
#define IPIV(m) IPIV, m, 1
/*
* Static variable to know how to handle the data within the kernel
* This assumes that only one runtime is enabled at a time.
*/
static RUNTIME_id_t zgetrf_runtime_id = RUNTIME_SCHED_STARPU;
static inline int
zgetrf_ipiv_init( const CHAM_desc_t *descIPIV,
cham_uplo_t uplo, int m, int n,
CHAM_tile_t *tileIPIV, void *op_args )
{
int *IPIV;
(void)op_args;
if ( zgetrf_runtime_id == RUNTIME_SCHED_PARSEC ) {
IPIV = (int*)tileIPIV;
}
else {
IPIV = CHAM_tile_get_ptr( tileIPIV );
}
#if !defined(CHAMELEON_SIMULATION)
{
int tempmm = m == descIPIV->mt-1 ? descIPIV->m - m * descIPIV->mb : descIPIV->mb;
int i;
for( i=0; i<tempmm; i++ ) {
IPIV[i] = m * descIPIV->mb + i + 1;
}
}
#endif
return 0;
}
static inline void
chameleon_pzgetrf_ipiv_init( CHAM_desc_t *IPIV,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
chameleon_pmap( ChamW, ChamUpperLower, IPIV, zgetrf_ipiv_init, NULL, sequence, request );
}
/*
* All the functions below are panel factorization variant.
......@@ -113,11 +157,13 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws,
int k,
RUNTIME_option_t *options )
{
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
#else
chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
#endif
/* TODO: Should be replaced by a function pointer */
if ( ws->alg == ChamGetrfNoPivPerColumn ) {
chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options );
}
else {
chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options );
}
}
/**
......@@ -180,9 +226,10 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws,
* Parallel tile LU factorization with no pivoting - dynamic scheduling
*/
void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
CHAM_desc_t *A,
CHAM_desc_t *IPIV,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
......@@ -196,6 +243,9 @@ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws,
}
RUNTIME_options_init( &options, chamctxt, sequence, request );
/* Initialize IPIV */
chameleon_pzgetrf_ipiv_init( IPIV, sequence, request );
for (k = 0; k < min_mnt; k++) {
RUNTIME_iteration_push( chamctxt, k );
......
......@@ -165,7 +165,7 @@ void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym
for (n = 0; n < kt; n++) {
INSERT_TASK_map(
&options,
ChamUpperLower, A(n, n),
ChamRW, ChamUpperLower, A(n, n),
zlaset_diag, D );
}
......
......@@ -52,26 +52,44 @@
void *
CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *options;
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *ws;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
return NULL;
}
options = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) );
options->ib = CHAMELEON_IB;
ws = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) );
ws->alg = ChamGetrfNoPiv;
ws->ib = CHAMELEON_IB;
{
char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
if ( algostr ) {
if ( strcasecmp( algostr, "nopiv" ) ) {
ws->alg = ChamGetrfNoPiv;
}
else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0 ) {
ws->alg = ChamGetrfNoPivPerColumn;
}
else {
fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" );
}
}
chameleon_cleanenv( algostr );
}
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_desc_init( &(options->U), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, A->nb, A->nb,
A->mt, A->nt * A->nb, 0, 0,
A->mt, A->nt * A->nb, A->p, A->q,
NULL, NULL, A->get_rankof_init );
#endif
if ( ws->alg == ChamGetrfNoPivPerColumn ) {
chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, A->nb, A->nb,
A->mt, A->nt * A->nb, 0, 0,
A->mt, A->nt * A->nb, A->p, A->q,
NULL, NULL, A->get_rankof_init );
}
return options;
return ws;
}
/**
......@@ -94,14 +112,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
*
*/
void
CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws )
CHAMELEON_zgetrf_WS_Free( void *user_ws )
{
struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws;
#if defined(GETRF_NOPIV_PER_COLUMN)
chameleon_desc_destroy( &(ws->U) );
#endif
if ( ws->alg == ChamGetrfNoPivPerColumn ) {
chameleon_desc_destroy( &(ws->U) );
}
free( ws );
}
......@@ -150,7 +167,7 @@ CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws )
*
*/
int
CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
{
int NB;
int status;
......@@ -210,7 +227,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
CHAMELEON_zgetrf_WS_Free( &descAt, ws );
CHAMELEON_zgetrf_WS_Free( ws );
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
......@@ -254,7 +271,7 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA )
*
*/
int
CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_desc_t *IPIV )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
......@@ -270,12 +287,11 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
chameleon_sequence_create( chamctxt, &sequence );
ws = CHAMELEON_zgetrf_WS_Alloc( A );
CHAMELEON_zgetrf_Tile_Async( A, ws, sequence, &request );
CHAMELEON_zgetrf_Tile_Async( A, IPIV, ws, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_WS_Free( A, ws );
CHAMELEON_zgetrf_WS_Free( ws );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
......@@ -317,11 +333,13 @@ CHAMELEON_zgetrf_Tile( CHAM_desc_t *A )
*/
int
CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A,
CHAM_desc_t *IPIV,
void *user_ws,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *ws;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
......@@ -357,14 +375,38 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A,
chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( chameleon_desc_check( IPIV ) != CHAMELEON_SUCCESS ) {
chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid second descriptor" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
/* Check input arguments */
if ( A->nb != A->mb ) {
chameleon_error( "CHAMELEON_zgetrf_Tile", "only square tiles supported" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( IPIV->mb != A->mb ) {
chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must have the number of rows as tiles of A" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( IPIV->nb != 1 ) {
chameleon_error( "CHAMELEON_zgetrf_Tile", "IPIV tiles must be vectore with only one column per tile" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( user_ws == NULL ) {
ws = CHAMELEON_zgetrf_WS_Alloc( A );
}
else {
ws = user_ws;
}
chameleon_pzgetrf( user_ws, A, sequence, request );
chameleon_pzgetrf( user_ws, A, IPIV, sequence, request );
if ( user_ws == NULL ) {
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_WS_Free( ws );
}
return CHAMELEON_SUCCESS;
}
......@@ -280,8 +280,8 @@ int CHAMELEON_zlacpy_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only square tiles supported");
if ((A->mb != B->mb) || (A->nb != B->nb) ){
chameleon_error("CHAMELEON_zlacpy_Tile_Async", "only matching tile sizes supported");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
......
......@@ -152,7 +152,7 @@ int CHAMELEON_zprint( FILE *file, const char *header,
/* Call the tile interface */
zprint_runtime_id = chamctxt->scheduler;
chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request );
chameleon_pmap( ChamR, uplo, &descAt, zprint, &options, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
......@@ -216,7 +216,7 @@ int CHAMELEON_zprint_Tile( FILE *file, const char *header,
chameleon_sequence_create( chamctxt, &sequence );
zprint_runtime_id = chamctxt->scheduler;
chameleon_pmap( uplo, A, zprint, &options, sequence, &request );
chameleon_pmap( ChamR, uplo, A, zprint, &options, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
......
......@@ -102,7 +102,7 @@ extern char *chameleon_lapack_constants[];
extern "C" {
#endif
void chameleon_pmap( cham_uplo_t uplo, CHAM_desc_t *A,
void chameleon_pmap( cham_access_t access, cham_uplo_t uplo, CHAM_desc_t *A,
cham_unary_operator_t operator, void *op_args,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
......@@ -127,7 +127,7 @@ static inline int chameleon_asprintf( char **strp, const char *fmt, ... )
int rc;
va_start( ap, fmt );
rc = asprintf( strp, fmt, ap );
rc = vasprintf( strp, fmt, ap );
va_end( ap );
assert( rc != -1 );
......
......@@ -41,8 +41,9 @@ struct chameleon_pzgemm_s {
* @brief Data structure to handle the GETRF workspaces with partial pivoting
*/
struct chameleon_pzgetrf_s {
int ib; /* Internal blocking parameter */
CHAM_desc_t U;
cham_getrf_t alg;
int ib; /* Internal blocking parameter */
CHAM_desc_t U;
};
/**
......@@ -86,7 +87,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc
void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_desc_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
......@@ -376,7 +376,8 @@ int chameleon_desc_check(const CHAM_desc_t *desc)
chameleon_error("chameleon_desc_check", "NULL matrix pointer");
return CHAMELEON_ERR_UNALLOCATED;
}
if (desc->dtyp != ChamRealFloat &&
if (desc->dtyp != ChamInteger &&
desc->dtyp != ChamRealFloat &&
desc->dtyp != ChamRealDouble &&
desc->dtyp != ChamComplexFloat &&
desc->dtyp != ChamComplexDouble ) {
......
......@@ -67,10 +67,10 @@ The libraries are organized as follows :
- __chameleon_quark|openmp|parsec|starpu__ : interface to the
different runtimes, depends on "coreblas" and optionally on
"cudablas" and on a runtime system library
"gpucublas" or "gpuhiblas" and on a runtime system library
- __coreblas__ and __cudablas__ : interfaces to the CPU and GPU
kernels
- __coreblas__ and __gpucublas__ or __gpuhipblas__ :
interfaces to the CPU and GPU kernels
- __hqr__ : HQR is a C library providing tools to generate hierachical
trees adapted to 2D block-cyclic data distribution and algorithms
......@@ -91,14 +91,16 @@ Lets have a look to the source code organization in directories.
- __coreblas__ : the Chameleon interface to CPU linear algebra kernels
- __cudablas__ : the Chameleon interface to GPU linear algebra kernels
- __distrib__ : some hints to install Chameleon's dependencies
- __doc__ : users and developers documentations
- __example__ : couple of C files to show how to use Chameleon
- __gpucublas__ : the Chameleon interface to GPU linear algebra kernels (cublas)
- __gpuhipblas__ : the Chameleon interface to GPU linear algebra kernels (hipblas)
- __hqr__ : [HQR](https://gitlab.inria.fr/solverstack/hqr) is a C
library providing tools to generate hierachical trees adapted to 2D
block-cyclic data distribution and algorithms based on tiled
......@@ -106,6 +108,8 @@ QR/algorithms
- __include__ : Chameleon's headers file necessary for users
- __lapack_api__ : the Chameleon interface CBLAS/LAPACKE like
- __lib__ : material related to the distribution
- __plasma-conversion__ : scripts to convert plasma task based
......
File moved
......@@ -27,7 +27,7 @@
# Generate the chameleon sources for all possible precisions
# ------------------------------------------------------
set(CUDABLAS_SRCS_GENERATED "")
set(GPUCUBLAS_SRCS_GENERATED "")
set(ZSRC
cuda_zgeadd.c
cuda_zgemerge.c
......@@ -72,46 +72,46 @@ set(ZSRC
# endif()
precisions_rules_py(
CUDABLAS_SRCS_GENERATED "${ZSRC}"
GPUCUBLAS_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
set(CUDABLAS_SRCS
${CUDABLAS_SRCS_GENERATED}
set(GPUCUBLAS_SRCS
${GPUCUBLAS_SRCS_GENERATED}
cudaglobal.c
)
# Force generation of sources
# ---------------------------
add_custom_target(cudablas_sources ALL SOURCES ${CUDABLAS_SRCS})
set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};cudablas_sources" CACHE INTERNAL "List of targets of sources")
add_custom_target(gpucublas_sources ALL SOURCES ${GPUCUBLAS_SRCS})
set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};gpucublas_sources" CACHE INTERNAL "List of targets of sources")
# Compile step
# ------------
add_library(cudablas ${CUDABLAS_SRCS})
set_target_properties(cudablas PROPERTIES VERSION ${CHAMELEON_VERSION})
set_target_properties(cudablas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR})
add_dependencies(cudablas cudablas_include cudablas_sources)
target_include_directories(cudablas PUBLIC
$<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/cudablas/include>
$<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/cudablas/include>
add_library(gpucublas ${GPUCUBLAS_SRCS})
set_target_properties(gpucublas PROPERTIES VERSION ${CHAMELEON_VERSION})
set_target_properties(gpucublas PROPERTIES SOVERSION ${CHAMELEON_VERSION_MAJOR})
add_dependencies(gpucublas gpucublas_include gpucublas_sources)
target_include_directories(gpucublas PUBLIC
$<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/gpucublas/include>
$<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/gpucublas/include>
$<BUILD_INTERFACE:${CHAMELEON_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CHAMELEON_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>)
set_property(TARGET cudablas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
set_property(TARGET gpucublas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
target_link_libraries(cudablas PRIVATE coreblas CUDA::CUBLAS)
target_link_libraries(cudablas PUBLIC MORSE::M)
target_link_libraries(gpucublas PRIVATE coreblas CUDA::CUBLAS)
target_link_libraries(gpucublas PUBLIC MORSE::M)
# export target coreblas
install(EXPORT cudablasTargets
install(EXPORT gpucublasTargets
NAMESPACE CHAMELEON::
DESTINATION lib/cmake/chameleon
)
# installation
# ------------
install(TARGETS cudablas
EXPORT cudablasTargets
install(TARGETS gpucublas
EXPORT gpucublasTargets
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
)
......
......@@ -17,7 +17,7 @@
* @precisions normal z -> c d s
*
*/
#include "cudablas.h"
#include "gpucublas.h"
/**
******************************************************************************
......