Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Chameleon
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
18416866
Commit
18416866
authored
Dec 01, 2016
by
Mathieu Faverge
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
It seems that almost everything is back
parent
3df078f5
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
38 changed files
with
95 additions
and
200 deletions
+95
-200
CMakeLists.txt
CMakeLists.txt
+4
-4
compute/CMakeLists.txt
compute/CMakeLists.txt
+12
-0
compute/pzgelqf.c
compute/pzgelqf.c
+7
-1
compute/pzgeqrfrh.c
compute/pzgeqrfrh.c
+0
-2
control/common.h
control/common.h
+0
-7
control/compute_z.h
control/compute_z.h
+1
-1
control/control.c
control/control.c
+2
-1
control/descriptor.h
control/descriptor.h
+1
-0
cudablas/compute/CMakeLists.txt
cudablas/compute/CMakeLists.txt
+16
-9
cudablas/compute/cuda_zgemerge.c
cudablas/compute/cuda_zgemerge.c
+9
-70
cudablas/compute/cuda_ztsmlq.c
cudablas/compute/cuda_ztsmlq.c
+1
-1
runtime/starpu/codelets/codelet_zgelqt.c
runtime/starpu/codelets/codelet_zgelqt.c
+1
-1
runtime/starpu/codelets/codelet_zgemm.c
runtime/starpu/codelets/codelet_zgemm.c
+1
-5
runtime/starpu/codelets/codelet_zgeqrt.c
runtime/starpu/codelets/codelet_zgeqrt.c
+2
-3
runtime/starpu/codelets/codelet_zgessm.c
runtime/starpu/codelets/codelet_zgessm.c
+1
-3
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+2
-3
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+1
-3
runtime/starpu/codelets/codelet_zhemm.c
runtime/starpu/codelets/codelet_zhemm.c
+1
-5
runtime/starpu/codelets/codelet_zher2k.c
runtime/starpu/codelets/codelet_zher2k.c
+1
-5
runtime/starpu/codelets/codelet_zherk.c
runtime/starpu/codelets/codelet_zherk.c
+1
-5
runtime/starpu/codelets/codelet_zlauum.c
runtime/starpu/codelets/codelet_zlauum.c
+1
-3
runtime/starpu/codelets/codelet_zpotrf.c
runtime/starpu/codelets/codelet_zpotrf.c
+1
-3
runtime/starpu/codelets/codelet_zssssm.c
runtime/starpu/codelets/codelet_zssssm.c
+1
-3
runtime/starpu/codelets/codelet_zsymm.c
runtime/starpu/codelets/codelet_zsymm.c
+1
-5
runtime/starpu/codelets/codelet_zsyr2k.c
runtime/starpu/codelets/codelet_zsyr2k.c
+1
-5
runtime/starpu/codelets/codelet_zsyrk.c
runtime/starpu/codelets/codelet_zsyrk.c
+1
-5
runtime/starpu/codelets/codelet_ztrmm.c
runtime/starpu/codelets/codelet_ztrmm.c
+1
-5
runtime/starpu/codelets/codelet_ztrsm.c
runtime/starpu/codelets/codelet_ztrsm.c
+1
-5
runtime/starpu/codelets/codelet_ztrtri.c
runtime/starpu/codelets/codelet_ztrtri.c
+1
-3
runtime/starpu/codelets/codelet_ztslqt.c
runtime/starpu/codelets/codelet_ztslqt.c
+1
-3
runtime/starpu/codelets/codelet_ztsmlq.c
runtime/starpu/codelets/codelet_ztsmlq.c
+1
-3
runtime/starpu/codelets/codelet_ztsmqr.c
runtime/starpu/codelets/codelet_ztsmqr.c
+2
-3
runtime/starpu/codelets/codelet_ztsqrt.c
runtime/starpu/codelets/codelet_ztsqrt.c
+2
-3
runtime/starpu/codelets/codelet_ztstrf.c
runtime/starpu/codelets/codelet_ztstrf.c
+2
-3
runtime/starpu/codelets/codelet_zunmlq.c
runtime/starpu/codelets/codelet_zunmlq.c
+1
-3
runtime/starpu/codelets/codelet_zunmqr.c
runtime/starpu/codelets/codelet_zunmqr.c
+1
-3
runtime/starpu/control/runtime_descriptor.c
runtime/starpu/control/runtime_descriptor.c
+10
-7
runtime/starpu/control/runtime_workspace.c
runtime/starpu/control/runtime_workspace.c
+2
-6
No files found.
CMakeLists.txt
View file @
18416866
...
...
@@ -161,15 +161,15 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/cmake_modules/")
endif
()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set
(
CHAMELEON_ENABLE_MPI OFF CACHE
BOO
L
"Tells if MPI might be supported by the runtime"
)
set
(
CHAMELEON_ENABLE_MPI OFF CACHE
INTERNA
L
"Tells if MPI might be supported by the runtime"
)
if
(
CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU
)
set
(
CHAMELEON_ENABLE_MPI ON
)
set
(
CHAMELEON_ENABLE_MPI ON
FORCE
)
endif
()
# Use intermediate variable since cmake_dependent_option doesn't have OR conditions
set
(
CHAMELEON_ENABLE_CUDA OFF CACHE
BOO
L
"Tells if CUDA might be supported by the runtime"
)
set
(
CHAMELEON_ENABLE_CUDA OFF CACHE
INTERNA
L
"Tells if CUDA might be supported by the runtime"
)
if
(
CHAMELEON_SCHED_PARSEC OR CHAMELEON_SCHED_STARPU
)
set
(
CHAMELEON_ENABLE_CUDA ON
)
set
(
CHAMELEON_ENABLE_CUDA ON
FORCE
)
endif
()
# Additional options
...
...
compute/CMakeLists.txt
View file @
18416866
...
...
@@ -26,6 +26,18 @@
#
###
option
(
CHAMELEON_COPY_DIAG
"This options enables the duplication of the diagonal tiles in some algorithm to avoid anti-dependencies on lower/upper triangular parts (Might be useful to StarPU)"
ON
)
mark_as_advanced
(
CHAMELEON_COPY_DIAG
)
if
(
CHAMELEON_SCHED_QUARK
)
# No need for those extra diagonale tiles
set
(
CHAMELEON_COPY_DIAG OFF
)
endif
()
if
(
CHAMELEON_COPY_DIAG
)
add_definitions
(
-DCHAMELEON_COPY_DIAG
)
endif
()
# Define the list of sources
# --------------------------
...
...
compute/pzgelqf.c
View file @
18416866
...
...
@@ -53,7 +53,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
int
k
,
m
,
n
;
int
ldak
,
ldam
;
int
tempkm
,
tempkn
,
tempmm
,
tempnn
;
int
ib
;
int
ib
,
minMT
;
morse
=
morse_context_self
();
if
(
sequence
->
status
!=
MORSE_SUCCESS
)
...
...
@@ -62,6 +62,12 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
ib
=
MORSE_IB
;
if
(
A
->
m
>
A
->
n
)
{
minMT
=
A
->
nt
;
}
else
{
minMT
=
A
->
mt
;
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
...
...
compute/pzgeqrfrh.c
View file @
18416866
...
...
@@ -87,9 +87,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
*/
ws_worker
=
max
(
ws_worker
,
ib
*
(
ib
+
A
->
nb
)
);
ws_worker
=
max
(
ws_worker
,
ib
*
A
->
nb
*
2
);
#endif
#if defined(CHAMELEON_USE_MAGMA)
/* Host space
*
* zgeqrt = ib * (A->nb+3*ib) + A->nb )
...
...
control/common.h
View file @
18416866
...
...
@@ -143,13 +143,6 @@
#define MORSE_MPI_SIZE morse->mpi_comm_size
#endif
/*******************************************************************************
* Activate copy of diagonal tile (StarPU only) for some tile algorithms (pz)
**/
#if defined(CHAMELEON_SCHED_STARPU)
#define CHAMELEON_COPY_DIAG
#endif
/*******************************************************************************
* IPT internal define
**/
...
...
control/compute_z.h
View file @
18416866
...
...
@@ -102,7 +102,7 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_seq
void
morse_pzgetrf_nopiv
(
MORSE_desc_t
*
A
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
void
morse_pzgetrf_reclap
(
MORSE_desc_t
*
A
,
int
*
IPIV
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
void
morse_pzgetrf_rectil
(
MORSE_desc_t
*
A
,
int
*
IPIV
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
void
morse_pzhbcpy_t2bl
(
MORSE_enum
uplo
,
MORSE_desc_t
*
A
,
MORSE_
Complex64_t
*
AB
,
int
LD
AB
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
void
morse_pzhbcpy_t2bl
(
MORSE_enum
uplo
,
MORSE_desc_t
*
A
,
MORSE_
desc_t
*
AB
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
void
morse_pzhegst
(
MORSE_enum
itype
,
MORSE_enum
uplo
,
MORSE_desc_t
*
A
,
MORSE_desc_t
*
B
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
#ifdef COMPLEX
void
morse_pzhemm
(
MORSE_enum
side
,
MORSE_enum
uplo
,
MORSE_Complex64_t
alpha
,
MORSE_desc_t
*
A
,
MORSE_desc_t
*
B
,
MORSE_Complex64_t
beta
,
MORSE_desc_t
*
C
,
MORSE_sequence_t
*
sequence
,
MORSE_request_t
*
request
);
...
...
control/control.c
View file @
18416866
...
...
@@ -145,12 +145,13 @@ int MORSE_Finalize(void)
#if defined(CHAMELEON_USE_MAGMA) && !defined(CHAMELEON_SIMULATION)
magma_finalize
();
#endif
morse_context_destroy
();
#if defined(CHAMELEON_USE_MPI)
if
(
!
morse
->
mpi_outer_init
)
MPI_Finalize
();
#endif
morse_context_destroy
();
return
MORSE_SUCCESS
;
}
...
...
control/descriptor.h
View file @
18416866
...
...
@@ -177,6 +177,7 @@ inline static int morse_getrankof_2d(const MORSE_desc_t *desc, int m, int n)
**/
inline
static
int
morse_getrankof_2d_diag
(
const
MORSE_desc_t
*
desc
,
int
m
,
int
n
)
{
assert
(
n
==
0
);
return
(
m
%
desc
->
p
)
*
desc
->
q
+
(
m
%
desc
->
q
);
}
...
...
cudablas/compute/CMakeLists.txt
View file @
18416866
...
...
@@ -27,33 +27,40 @@
# ------------------------------------------------------
set
(
CUDABLAS_SRCS_GENERATED
""
)
set
(
ZSRC
cuda_zgelqt.c
cuda_zgemerge.c
cuda_zgemm.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_zsymm.c
cuda_zsyr2k.c
cuda_zsyrk.c
cuda_ztrmm.c
cuda_ztrsm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsmlq.c
cuda_ztsmqr.c
)
if
(
CHAMELEON_USE_MAGMA
)
set
(
ZSRC
${
ZSRC
}
cuda_zgelqt.c
cuda_zgeqrt.c
cuda_zgessm.c
cuda_zgetrf.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsqrt.c
cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
endif
()
precisions_rules_py
(
CUDABLAS_SRCS_GENERATED
"
${
ZSRC
}
"
PRECISIONS
"
${
CHAMELEON_PRECISION
}
"
)
...
...
cudablas/compute/cuda_zgemerge.c
View file @
18416866
...
...
@@ -24,28 +24,15 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUBLAS_V2)
int
CUDA_zgemerge
(
MORSE_enum
side
,
MORSE_enum
diag
,
int
M
,
int
N
,
cuDoubleComplex
*
A
,
int
LDA
,
cuDoubleComplex
*
B
,
int
LDB
,
CUBLAS_STREAM_PARAM
)
int
CUDA_zgemerge
(
MORSE_enum
side
,
MORSE_enum
diag
,
int
M
,
int
N
,
cuDoubleComplex
*
A
,
int
LDA
,
cuDoubleComplex
*
B
,
int
LDB
,
CUBLAS_STREAM_PARAM
)
{
int
i
,
j
;
magmaDoubleComplex
*
cola
,
*
colb
;
cublasHandle_t
handle
;
cublasStatus_t
stat
;
stat
=
cublasCreate
(
&
handle
);
if
(
stat
!=
CUBLAS_STATUS_SUCCESS
)
{
printf
(
"CUBLAS initialization failed
\n
"
);
assert
(
stat
==
CUBLAS_STATUS_SUCCESS
);
}
stat
=
cublasSetStream
(
handle
,
stream
);
if
(
stat
!=
CUBLAS_STATUS_SUCCESS
)
{
printf
(
"cublasSetStream failed
\n
"
);
assert
(
stat
==
CUBLAS_STATUS_SUCCESS
);
}
cuDoubleComplex
*
cola
,
*
colb
;
if
(
M
<
0
)
{
return
-
1
;
...
...
@@ -60,55 +47,9 @@ int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag,
return
-
7
;
}
if
(
side
==
MagmaLeft
){
for
(
i
=
0
;
i
<
N
;
i
++
){
cola
=
A
+
i
*
LDA
;
colb
=
B
+
i
*
LDB
;
// cublasZcopy(handle, i+1, cola, 1, colb, 1);
cudaMemcpyAsync
(
colb
,
cola
,
(
i
+
1
)
*
sizeof
(
cuDoubleComplex
),
cudaMemcpyDeviceToDevice
,
stream
);
}
}
else
{
for
(
i
=
0
;
i
<
N
;
i
++
){
cola
=
A
+
i
*
LDA
;
colb
=
B
+
i
*
LDB
;
// cublasZcopy(handle, M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync
(
colb
+
i
,
cola
+
i
,
(
M
-
i
)
*
sizeof
(
cuDoubleComplex
),
cudaMemcpyDeviceToDevice
,
stream
);
}
}
cublasDestroy
(
handle
);
return
MORSE_SUCCESS
;
}
#else
/* CHAMELEON_USE_CUBLAS_V2 */
int
CUDA_zgemerge
(
magma_side_t
side
,
magma_diag_t
diag
,
magma_int_t
M
,
magma_int_t
N
,
magmaDoubleComplex
*
A
,
magma_int_t
LDA
,
magmaDoubleComplex
*
B
,
magma_int_t
LDB
,
CUstream
stream
)
{
int
i
,
j
;
magmaDoubleComplex
*
cola
,
*
colb
;
if
(
M
<
0
)
{
return
-
1
;
}
if
(
N
<
0
)
{
return
-
2
;
}
if
(
(
LDA
<
max
(
1
,
M
))
&&
(
M
>
0
)
)
{
return
-
5
;
}
if
(
(
LDB
<
max
(
1
,
M
))
&&
(
M
>
0
)
)
{
return
-
7
;
}
CUBLAS_GET_STREAM
;
if
(
side
==
M
agma
Left
){
if
(
side
==
M
orse
Left
){
for
(
i
=
0
;
i
<
N
;
i
++
){
cola
=
A
+
i
*
LDA
;
colb
=
B
+
i
*
LDB
;
...
...
@@ -128,5 +69,3 @@ int CUDA_zgemerge(
return
MORSE_SUCCESS
;
}
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
cudablas/compute/cuda_ztsmlq.c
View file @
18416866
...
...
@@ -55,7 +55,7 @@ int CUDA_ztsmlq(
NW
=
IB
;
}
else
{
NW
=
M
1
;
NW
=
N
1
;
}
if
((
trans
!=
MorseNoTrans
)
&&
(
trans
!=
MorseConjTrans
))
{
...
...
runtime/starpu/codelets/codelet_zgelqt.c
View file @
18416866
...
...
@@ -194,8 +194,8 @@ static void cl_zgelqt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize
();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zgemm.c
View file @
18416866
...
...
@@ -129,11 +129,9 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
B
,
ldb
,
beta
,
C
,
ldc
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgemm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
transA
;
...
...
@@ -172,9 +170,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else
/* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgemm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
transA
;
...
...
@@ -213,9 +209,9 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
/* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zgeqrt.c
View file @
18416866
...
...
@@ -151,10 +151,9 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
WORK
=
TAU
+
max
(
m
,
n
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgeqrt_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_starpu_ws_t
*
h_work
;
...
...
@@ -196,8 +195,8 @@ static void cl_zgeqrt_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize
();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zgessm.c
View file @
18416866
...
...
@@ -133,10 +133,8 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
k
,
&
ib
,
&
IPIV
,
&
ldl
,
&
ldd
,
&
lda
);
CORE_zgessm
(
m
,
n
,
k
,
ib
,
IPIV
,
D
,
ldd
,
A
,
lda
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgessm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
int
m
;
...
...
@@ -163,8 +161,8 @@ static void cl_zgessm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
View file @
18416866
...
...
@@ -171,13 +171,12 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
}
#endif
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgetrf_incpiv_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
int
m
;
...
...
@@ -228,8 +227,8 @@ static void cl_zgetrf_incpiv_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize
();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif //defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#endif //!defined(CHAMELEON_SIMULATION)
/*
...
...
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
View file @
18416866
...
...
@@ -119,13 +119,11 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
iinfo
);
CORE_zgetrf_nopiv
(
m
,
n
,
ib
,
A
,
lda
,
&
info
);
}
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgetrf_nopiv_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
int
m
;
...
...
@@ -142,8 +140,8 @@ static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
CUDA_zgetrf_nopiv
(
m
,
n
,
dA
,
lda
,
&
info
);
cudaThreadSynchronize
();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zhemm.c
View file @
18416866
...
...
@@ -99,11 +99,9 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
B
,
LDB
,
beta
,
C
,
LDC
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zhemm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
side
;
...
...
@@ -143,9 +141,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else
/* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zhemm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
side
;
...
...
@@ -183,9 +179,9 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
/* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zher2k.c
View file @
18416866
...
...
@@ -96,11 +96,9 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
CORE_zher2k
(
uplo
,
trans
,
n
,
k
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zher2k_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
uplo
;
...
...
@@ -135,9 +133,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else
/* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zher2k_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
uplo
;
...
...
@@ -171,9 +167,9 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
/* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zherk.c
View file @
18416866
...
...
@@ -90,11 +90,9 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
alpha
,
A
,
lda
,
beta
,
C
,
ldc
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zherk_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
uplo
;
...
...
@@ -132,9 +130,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else
/* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zherk_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
uplo
;
...
...
@@ -168,9 +164,9 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
/* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zlauum.c
View file @
18416866
...
...
@@ -73,10 +73,8 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args
(
cl_arg
,
&
uplo
,
&
N
,
&
LDA
);
CORE_zlauum
(
uplo
,
N
,
A
,
LDA
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zlauum_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
uplo
;
...
...
@@ -91,8 +89,8 @@ static void cl_zlauum_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize
();
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zpotrf.c
View file @
18416866
...
...
@@ -79,10 +79,8 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args
(
cl_arg
,
&
uplo
,
&
n
,
&
lda
,
&
iinfo
);
CORE_zpotrf
(
uplo
,
n
,
A
,
lda
,
&
info
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_MAGMA
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zpotrf_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
cudaStream_t
stream
[
2
],
currentt_stream
;
...
...
@@ -115,8 +113,8 @@ static void cl_zpotrf_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zssssm.c
View file @
18416866
...
...
@@ -170,10 +170,8 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
starpu_codelet_unpack_args
(
cl_arg
,
&
m1
,
&
n1
,
&
m2
,
&
n2
,
&
k
,
&
ib
,
&
lda1
,
&
lda2
,
&
ldl1
,
&
ldl2
,
&
IPIV
);
CORE_zssssm
(
m1
,
n1
,
m2
,
n2
,
k
,
ib
,
A1
,
lda1
,
A2
,
lda2
,
L1
,
ldl1
,
L2
,
ldl2
,
IPIV
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#if defined(CHAMELEON_USE_MAGMA) && defined(HAVE_MAGMA_GETRF_INCPIV_GPU)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zssssm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
int
m1
;
...
...
@@ -213,8 +211,8 @@ static void cl_zssssm_cuda_func(void *descr[], void *cl_arg)
cudaThreadSynchronize
();
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zsymm.c
View file @
18416866
...
...
@@ -99,11 +99,9 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
B
,
LDB
,
beta
,
C
,
LDC
);
}
#endif //!defined(CHAMELEON_SIMULATION)
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zsymm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
side
;
...
...
@@ -141,9 +139,7 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#else
/* CHAMELEON_USE_CUBLAS_V2 */
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zsymm_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
MORSE_enum
side
;
...
...
@@ -181,9 +177,9 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
return
;
}
#endif //!defined(CHAMELEON_SIMULATION)
#endif
/* CHAMELEON_USE_CUBLAS_V2 */
#endif
/* CHAMELEON_USE_CUDA */
#endif //!defined(CHAMELEON_SIMULATION)
/*
* Codelet definition
...
...
runtime/starpu/codelets/codelet_zsyr2k.c
View file @
18416866
...
...
@@ -96,11 +96,9 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
CORE_zsyr2k
(
uplo
,
trans
,