Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 18416866 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

It seems that almost everything is back

parent 3df078f5
No related branches found
No related tags found
No related merge requests found
Showing
with 64 additions and 129 deletions
......@@ -161,15 +161,15 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/cmake_modules/")
endif()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set(CHAMELEON_ENABLE_MPI OFF CACHE BOOL "Tells if MPI might be supported by the runtime")
set(CHAMELEON_ENABLE_MPI OFF CACHE INTERNAL "Tells if MPI might be supported by the runtime")
if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
set(CHAMELEON_ENABLE_MPI ON)
set(CHAMELEON_ENABLE_MPI ON FORCE)
endif()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set(CHAMELEON_ENABLE_CUDA OFF CACHE BOOL "Tells if CUDA might be supported by the runtime")
set(CHAMELEON_ENABLE_CUDA OFF CACHE INTERNAL "Tells if CUDA might be supported by the runtime")
if ( CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU )
set(CHAMELEON_ENABLE_CUDA ON)
set(CHAMELEON_ENABLE_CUDA ON FORCE)
endif()
# Additional options
......
......@@ -26,6 +26,18 @@
#
###
option(CHAMELEON_COPY_DIAG
"This options enables the duplication of the diagonal tiles in some algorithm to avoid anti-dependencies on lower/upper triangular parts (Might be useful to StarPU)" ON)
mark_as_advanced(CHAMELEON_COPY_DIAG)
if ( CHAMELEON_SCHED_QUARK )
# No need for those extra diagonale tiles
set( CHAMELEON_COPY_DIAG OFF )
endif()
if (CHAMELEON_COPY_DIAG)
add_definitions(-DCHAMELEON_COPY_DIAG)
endif()
# Define the list of sources
# --------------------------
......
......@@ -53,7 +53,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
int k, m, n;
int ldak, ldam;
int tempkm, tempkn, tempmm, tempnn;
int ib;
int ib, minMT;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -62,6 +62,12 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
ib = MORSE_IB;
if (A->m > A->n) {
minMT = A->nt;
} else {
minMT = A->mt;
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
......
......@@ -87,9 +87,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
*/
ws_worker = max( ws_worker, ib * (ib + A->nb) );
ws_worker = max( ws_worker, ib * A->nb * 2 );
#endif
#if defined(CHAMELEON_USE_MAGMA)
/* Host space
*
* zgeqrt = ib * (A->nb+3*ib) + A->nb )
......
......@@ -143,13 +143,6 @@
#define MORSE_MPI_SIZE morse->mpi_comm_size
#endif
/*******************************************************************************
* Activate copy of diagonal tile (StarPU only) for some tile algorithms (pz)
**/
#if defined(CHAMELEON_SCHED_STARPU)
#define CHAMELEON_COPY_DIAG
#endif
/*******************************************************************************
* IPT internal define
**/
......
......@@ -102,7 +102,7 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_seq
void morse_pzgetrf_nopiv(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzgetrf_reclap(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzgetrf_rectil(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhbcpy_t2bl(MORSE_enum uplo, MORSE_desc_t *A, MORSE_Complex64_t *AB, int LDAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhbcpy_t2bl(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *AB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzhegst(MORSE_enum itype, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
#ifdef COMPLEX
void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -145,12 +145,13 @@ int MORSE_Finalize(void)
#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
magma_finalize();
#endif
morse_context_destroy();
#if defined(CHAMELEON_USE_MPI)
if (!morse->mpi_outer_init)
MPI_Finalize();
#endif
morse_context_destroy();
return MORSE_SUCCESS;
}
......
......@@ -177,6 +177,7 @@ inline static int morse_getrankof_2d(const MORSE_desc_t *desc, int m, int n)
**/
inline static int morse_getrankof_2d_diag(const MORSE_desc_t *desc, int m, int n)
{
assert( n == 0 );
return (m % desc->p) * desc->q + (m % desc->q);
}
......
......@@ -27,33 +27,40 @@
# ------------------------------------------------------
set(CUDABLAS_SRCS_GENERATED "")
set(ZSRC
cuda_zgelqt.c
cuda_zgemerge.c
cuda_zgemm.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_zsymm.c
cuda_zsyr2k.c
cuda_zsyrk.c
cuda_ztrmm.c
cuda_ztrsm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsmlq.c
cuda_ztsmqr.c
)
if( CHAMELEON_USE_MAGMA )
set(ZSRC
${ZSRC}
cuda_zgelqt.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsqrt.c
cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
endif()
precisions_rules_py(CUDABLAS_SRCS_GENERATED "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
......
......@@ -24,28 +24,15 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
int M, int N, cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB, CUBLAS_STREAM_PARAM)
int
CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
int M, int N,
cuDoubleComplex *A, int LDA,
cuDoubleComplex *B, int LDB,
CUBLAS_STREAM_PARAM)
{
int i, j;
magmaDoubleComplex *cola, *colb;
cublasHandle_t handle;
cublasStatus_t stat;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cuDoubleComplex *cola, *colb;
if (M < 0) {
return -1;
......@@ -60,55 +47,9 @@ int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
return -7;
}
if (side == MagmaLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}else{
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(handle, M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
}
}
cublasDestroy(handle);
return MORSE_SUCCESS;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
int CUDA_zgemerge(
magma_side_t side, magma_diag_t diag,
magma_int_t M, magma_int_t N,
magmaDoubleComplex *A, magma_int_t LDA,
magmaDoubleComplex *B, magma_int_t LDB,
CUstream stream)
{
int i, j;
magmaDoubleComplex *cola, *colb;
if (M < 0) {
return -1;
}
if (N < 0) {
return -2;
}
if ( (LDA < max(1,M)) && (M > 0) ) {
return -5;
}
if ( (LDB < max(1,M)) && (M > 0) ) {
return -7;
}
CUBLAS_GET_STREAM;
if (side == MagmaLeft){
if (side == MorseLeft){
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
......@@ -128,5 +69,3 @@ int CUDA_zgemerge(
return MORSE_SUCCESS;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif
......@@ -55,7 +55,7 @@ int CUDA_ztsmlq(
NW = IB;
}
else {
NW = M1;
NW = N1;
}
if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
......
......@@ -194,8 +194,8 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -129,11 +129,9 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
B, ldb,
beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
......@@ -172,9 +170,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum transA;
......@@ -213,9 +209,9 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -151,10 +151,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
WORK = TAU + max( m, n );
CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
{
MORSE_starpu_ws_t *h_work;
......@@ -196,8 +195,8 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -133,10 +133,8 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -163,8 +161,8 @@ static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -171,13 +171,12 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
}
#endif
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -228,8 +227,8 @@ static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
......
......@@ -119,13 +119,11 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
......@@ -142,8 +140,8 @@ static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
CUDA_zgetrf_nopiv( m, n, dA, lda, &info );
cudaThreadSynchronize();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -99,11 +99,9 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
B, LDB,
beta, C, LDC);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -143,9 +141,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
......@@ -183,9 +179,9 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -96,11 +96,9 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
CORE_zher2k(uplo, trans,
n, k, alpha, A, lda, B, ldb, beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -135,9 +133,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -171,9 +167,9 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
......@@ -90,11 +90,9 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
alpha, A, lda,
beta, C, ldc);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -132,9 +130,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else /* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum uplo;
......@@ -168,9 +164,9 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment