Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Chameleon
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Service Desk
Milestones
Merge Requests
10
Merge Requests
10
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
Chameleon
Commits
2be2ccd3
Commit
2be2ccd3
authored
Jul 02, 2019
by
Mathieu Faverge
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'fix_codelets' into 'master'
Fix codelets See merge request
!158
parents
9b777d34
aa0fa3e9
Changes
54
Show whitespace changes
Inline
Side-by-side
Showing
54 changed files
with
857 additions
and
643 deletions
+857
-643
runtime/starpu/codelets/codelet_dzasum.c
runtime/starpu/codelets/codelet_dzasum.c
+8
-5
runtime/starpu/codelets/codelet_zbuild.c
runtime/starpu/codelets/codelet_zbuild.c
+9
-6
runtime/starpu/codelets/codelet_zgeadd.c
runtime/starpu/codelets/codelet_zgeadd.c
+24
-19
runtime/starpu/codelets/codelet_zgelqt.c
runtime/starpu/codelets/codelet_zgelqt.c
+16
-13
runtime/starpu/codelets/codelet_zgemm.c
runtime/starpu/codelets/codelet_zgemm.c
+30
-20
runtime/starpu/codelets/codelet_zgeqrt.c
runtime/starpu/codelets/codelet_zgeqrt.c
+16
-13
runtime/starpu/codelets/codelet_zgessm.c
runtime/starpu/codelets/codelet_zgessm.c
+20
-15
runtime/starpu/codelets/codelet_zgessq.c
runtime/starpu/codelets/codelet_zgessq.c
+8
-5
runtime/starpu/codelets/codelet_zgetrf.c
runtime/starpu/codelets/codelet_zgetrf.c
+7
-5
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+12
-9
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+9
-7
runtime/starpu/codelets/codelet_zgram.c
runtime/starpu/codelets/codelet_zgram.c
+19
-13
runtime/starpu/codelets/codelet_zhe2ge.c
runtime/starpu/codelets/codelet_zhe2ge.c
+13
-8
runtime/starpu/codelets/codelet_zhemm.c
runtime/starpu/codelets/codelet_zhemm.c
+31
-20
runtime/starpu/codelets/codelet_zher2k.c
runtime/starpu/codelets/codelet_zher2k.c
+27
-16
runtime/starpu/codelets/codelet_zherfb.c
runtime/starpu/codelets/codelet_zherfb.c
+27
-18
runtime/starpu/codelets/codelet_zherk.c
runtime/starpu/codelets/codelet_zherk.c
+23
-14
runtime/starpu/codelets/codelet_zhessq.c
runtime/starpu/codelets/codelet_zhessq.c
+2
-2
runtime/starpu/codelets/codelet_zlacpy.c
runtime/starpu/codelets/codelet_zlacpy.c
+17
-12
runtime/starpu/codelets/codelet_zlag2c.c
runtime/starpu/codelets/codelet_zlag2c.c
+29
-16
runtime/starpu/codelets/codelet_zlange.c
runtime/starpu/codelets/codelet_zlange.c
+10
-5
runtime/starpu/codelets/codelet_zlanhe.c
runtime/starpu/codelets/codelet_zlanhe.c
+9
-5
runtime/starpu/codelets/codelet_zlansy.c
runtime/starpu/codelets/codelet_zlansy.c
+8
-5
runtime/starpu/codelets/codelet_zlantr.c
runtime/starpu/codelets/codelet_zlantr.c
+7
-5
runtime/starpu/codelets/codelet_zlascal.c
runtime/starpu/codelets/codelet_zlascal.c
+11
-8
runtime/starpu/codelets/codelet_zlaset.c
runtime/starpu/codelets/codelet_zlaset.c
+10
-7
runtime/starpu/codelets/codelet_zlaset2.c
runtime/starpu/codelets/codelet_zlaset2.c
+9
-7
runtime/starpu/codelets/codelet_zlatro.c
runtime/starpu/codelets/codelet_zlatro.c
+12
-8
runtime/starpu/codelets/codelet_zlauum.c
runtime/starpu/codelets/codelet_zlauum.c
+9
-5
runtime/starpu/codelets/codelet_zplghe.c
runtime/starpu/codelets/codelet_zplghe.c
+8
-5
runtime/starpu/codelets/codelet_zplgsy.c
runtime/starpu/codelets/codelet_zplgsy.c
+8
-5
runtime/starpu/codelets/codelet_zplrnt.c
runtime/starpu/codelets/codelet_zplrnt.c
+8
-5
runtime/starpu/codelets/codelet_zpotrf.c
runtime/starpu/codelets/codelet_zpotrf.c
+7
-5
runtime/starpu/codelets/codelet_zssssm.c
runtime/starpu/codelets/codelet_zssssm.c
+23
-22
runtime/starpu/codelets/codelet_zsymm.c
runtime/starpu/codelets/codelet_zsymm.c
+27
-20
runtime/starpu/codelets/codelet_zsyr2k.c
runtime/starpu/codelets/codelet_zsyr2k.c
+23
-16
runtime/starpu/codelets/codelet_zsyrk.c
runtime/starpu/codelets/codelet_zsyrk.c
+20
-14
runtime/starpu/codelets/codelet_zsyssq.c
runtime/starpu/codelets/codelet_zsyssq.c
+6
-5
runtime/starpu/codelets/codelet_zsytrf_nopiv.c
runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+7
-6
runtime/starpu/codelets/codelet_ztplqt.c
runtime/starpu/codelets/codelet_ztplqt.c
+15
-14
runtime/starpu/codelets/codelet_ztpmlqt.c
runtime/starpu/codelets/codelet_ztpmlqt.c
+28
-23
runtime/starpu/codelets/codelet_ztpmqrt.c
runtime/starpu/codelets/codelet_ztpmqrt.c
+28
-24
runtime/starpu/codelets/codelet_ztpqrt.c
runtime/starpu/codelets/codelet_ztpqrt.c
+14
-14
runtime/starpu/codelets/codelet_ztradd.c
runtime/starpu/codelets/codelet_ztradd.c
+16
-14
runtime/starpu/codelets/codelet_ztrasm.c
runtime/starpu/codelets/codelet_ztrasm.c
+7
-5
runtime/starpu/codelets/codelet_ztrmm.c
runtime/starpu/codelets/codelet_ztrmm.c
+20
-14
runtime/starpu/codelets/codelet_ztrsm.c
runtime/starpu/codelets/codelet_ztrsm.c
+19
-14
runtime/starpu/codelets/codelet_ztrssq.c
runtime/starpu/codelets/codelet_ztrssq.c
+6
-5
runtime/starpu/codelets/codelet_ztrtri.c
runtime/starpu/codelets/codelet_ztrtri.c
+7
-6
runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+18
-19
runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+18
-18
runtime/starpu/codelets/codelet_ztstrf.c
runtime/starpu/codelets/codelet_ztstrf.c
+24
-21
runtime/starpu/codelets/codelet_zunmlq.c
runtime/starpu/codelets/codelet_zunmlq.c
+34
-29
runtime/starpu/codelets/codelet_zunmqr.c
runtime/starpu/codelets/codelet_zunmqr.c
+34
-29
No files found.
runtime/starpu/codelets/codelet_dzasum.c
View file @
2be2ccd3
...
...
@@ -15,6 +15,7 @@
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
int
M
;
int
N
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
double
*
work
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
work
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
storev
,
&
uplo
,
&
M
,
&
N
,
&
lda
);
CORE_dzasum
(
storev
,
uplo
,
M
,
N
,
A
,
lda
,
work
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
storev
,
&
uplo
,
&
M
,
&
N
);
CORE_dzasum
(
storev
,
uplo
,
M
,
N
,
A
,
ldA
,
work
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
void
INSERT_TASK_dzasum
(
const
RUNTIME_option_t
*
options
,
cham_store_t
storev
,
cham_uplo_t
uplo
,
int
M
,
int
N
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
)
{
struct
starpu_codelet
*
codelet
=
&
cl_dzasum
;
...
...
@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
M
,
sizeof
(
int
),
STARPU_VALUE
,
&
N
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
B
,
double
,
Bm
,
Bn
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
...
...
@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_NAME
,
"dzasum"
,
#endif
0
);
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zbuild.c
View file @
2be2ccd3
...
...
@@ -20,6 +20,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Lucas Barros de Assis
* @date 2016-09-08
* @precisions normal z -> c d s
*
...
...
@@ -31,19 +32,21 @@
static
void
cl_zbuild_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAMELEON_Complex64_t
*
A
;
int
ld
;
int
ld
A
;
void
*
user_data
;
void
(
*
user_build_callback
)(
int
row_min
,
int
row_max
,
int
col_min
,
int
col_max
,
void
*
buffer
,
int
ld
,
void
*
user_data
)
;
void
(
*
user_build_callback
)(
int
row_min
,
int
row_max
,
int
col_min
,
int
col_max
,
void
*
buffer
,
int
ld
A
,
void
*
user_data
)
;
int
row_min
,
row_max
,
col_min
,
col_max
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
row_min
,
&
row_max
,
&
col_min
,
&
col_max
,
&
ld
,
&
user_data
,
&
user_build_callback
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
row_min
,
&
row_max
,
&
col_min
,
&
col_max
,
&
user_data
,
&
user_build_callback
);
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the address 'buffer' with leading dimension 'ld'
*/
user_build_callback
(
row_min
,
row_max
,
col_min
,
col_max
,
A
,
ld
,
user_data
);
user_build_callback
(
row_min
,
row_max
,
col_min
,
col_max
,
A
,
ld
A
,
user_data
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU
(
zbuild
,
1
,
cl_zbuild_cpu_func
)
void
INSERT_TASK_zbuild
(
const
RUNTIME_option_t
*
options
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
void
*
user_data
,
void
*
user_build_callback
)
{
...
...
@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
col_min
,
sizeof
(
int
),
STARPU_VALUE
,
&
col_max
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_VALUE
,
&
user_data
,
sizeof
(
void
*
),
STARPU_VALUE
,
&
user_build_callback
,
sizeof
(
void
*
),
STARPU_PRIORITY
,
options
->
priority
,
...
...
@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_NAME
,
"zbuild"
,
#endif
0
);
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgeadd.c
View file @
2be2ccd3
...
...
@@ -17,6 +17,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int
N
;
CHAMELEON_Complex64_t
alpha
;
const
CHAMELEON_Complex64_t
*
A
;
int
LD
A
;
int
ld
A
;
CHAMELEON_Complex64_t
beta
;
CHAMELEON_Complex64_t
*
B
;
int
LD
B
;
int
ld
B
;
A
=
(
const
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
LDA
,
&
beta
,
&
LDB
);
CORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
A
,
LDA
,
beta
,
B
,
LDB
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
CORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
A
,
ldA
,
beta
,
B
,
ldB
);
return
;
}
...
...
@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
int
N
;
cuDoubleComplex
alpha
;
const
cuDoubleComplex
*
A
;
int
ld
a
;
int
ld
A
;
cuDoubleComplex
beta
;
cuDoubleComplex
*
B
;
int
ld
b
;
int
ld
B
;
A
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
lda
,
&
beta
,
&
ldb
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
RUNTIME_getStream
(
stream
);
CUDA_zgeadd
(
trans
,
M
,
N
,
&
alpha
,
A
,
ld
a
,
&
beta
,
B
,
ld
b
,
&
alpha
,
A
,
ld
A
,
&
beta
,
B
,
ld
B
,
stream
);
#ifndef STARPU_CUDA_ASYNC
...
...
@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
* Scalar factor of A.
*
* @param[in] A
* Matrix of size
LDA-by-N, if trans = ChamNoTrans, LD
A-by-M
* Matrix of size
ldA-by-N, if trans = ChamNoTrans, ld
A-by-M
* otherwise.
*
* @param[in]
LD
A
* Leading dimension of the array A.
LD
A >= max(1,k), with k=M, if
* @param[in]
ld
A
* Leading dimension of the array A.
ld
A >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise.
*
* @param[in] beta
* Scalar factor of B.
*
* @param[in,out] B
* Matrix of size
LD
B-by-N.
* Matrix of size
ld
B-by-N.
* On exit, B = alpha * op(A) + beta * B
*
* @param[in]
LD
B
* Leading dimension of the array B.
LD
B >= max(1,M)
* @param[in]
ld
B
* Leading dimension of the array B.
ld
B >= max(1,M)
*
*******************************************************************************
*
...
...
@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
*/
void
INSERT_TASK_zgeadd
(
const
RUNTIME_option_t
*
options
,
cham_trans_t
trans
,
int
m
,
int
n
,
int
nb
,
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
b
)
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
B
)
{
struct
starpu_codelet
*
codelet
=
&
cl_zgeadd
;
void
(
*
callback
)(
void
*
)
=
options
->
profiling
?
cl_zgeadd_callback
:
NULL
;
...
...
@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
alpha
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_VALUE
,
&
beta
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_RW
,
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
),
STARPU_VALUE
,
&
ldb
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgeadd"
,
#endif
0
);
(
void
)
ldA
;
(
void
)
nb
;
}
runtime/starpu/codelets/codelet_zgelqt.c
View file @
2be2ccd3
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
T
;
int
ld
t
;
int
ld
T
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + ib*n */
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldT
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ld
t
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
ld
a
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ld
T
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
ld
A
,
T
,
ldT
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in]
LD
T
* The leading dimension of the array T.
LD
T >= IB.
* @param[in]
ld
T
* The leading dimension of the array T.
ld
T >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
...
...
@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
*/
void
INSERT_TASK_zgelqt
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
ib
,
int
nb
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
t
)
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
T
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgelqt
;
...
...
@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
),
STARPU_VALUE
,
&
ldt
,
sizeof
(
int
),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH
,
options
->
ws_worker
,
/* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
...
...
@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_NAME
,
"zgelqt"
,
#endif
0
);
(
void
)
ldT
;
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgemm.c
View file @
2be2ccd3
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
int
k
;
CHAMELEON_Complex64_t
alpha
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
B
;
int
ld
b
;
int
ld
B
;
CHAMELEON_Complex64_t
beta
;
CHAMELEON_Complex64_t
*
C
;
int
ld
c
;
int
ld
C
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
C
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
lda
,
&
ldb
,
&
beta
,
&
ldc
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldC
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
beta
);
CORE_zgemm
(
transA
,
transB
,
m
,
n
,
k
,
alpha
,
A
,
ld
a
,
B
,
ld
b
,
beta
,
C
,
ld
c
);
alpha
,
A
,
ld
A
,
B
,
ld
B
,
beta
,
C
,
ld
C
);
}
#ifdef CHAMELEON_USE_CUDA
...
...
@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
int
k
;
cuDoubleComplex
alpha
;
const
cuDoubleComplex
*
A
;
int
ld
a
;
int
ld
A
;
const
cuDoubleComplex
*
B
;
int
ld
b
;
int
ld
B
;
cuDoubleComplex
beta
;
cuDoubleComplex
*
C
;
int
ld
c
;
int
ld
C
;
A
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
C
=
(
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
lda
,
&
ldb
,
&
beta
,
&
ldc
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldC
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
beta
);
RUNTIME_getStream
(
stream
);
CUDA_zgemm
(
transA
,
transB
,
m
,
n
,
k
,
&
alpha
,
A
,
ld
a
,
B
,
ld
b
,
&
beta
,
C
,
ld
c
,
&
alpha
,
A
,
ld
A
,
B
,
ld
B
,
&
beta
,
C
,
ld
C
,
stream
);
#ifndef STARPU_CUDA_ASYNC
...
...
@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
void
INSERT_TASK_zgemm
(
const
RUNTIME_option_t
*
options
,
cham_trans_t
transA
,
cham_trans_t
transB
,
int
m
,
int
n
,
int
k
,
int
nb
,
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
b
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
C
,
int
Cm
,
int
Cn
,
int
ld
c
)
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
B
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
C
,
int
Cm
,
int
Cn
,
int
ld
C
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgemm
;
...
...
@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
k
,
sizeof
(
int
),
STARPU_VALUE
,
&
alpha
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
),
STARPU_VALUE
,
&
ldb
,
sizeof
(
int
),
STARPU_VALUE
,
&
beta
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_RW
,
RTBLKADDR
(
C
,
CHAMELEON_Complex64_t
,
Cm
,
Cn
),
STARPU_VALUE
,
&
ldc
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgemm"
,
#endif
0
);
(
void
)
ldA
;
(
void
)
ldB
;
(
void
)
ldC
;
}
runtime/starpu/codelets/codelet_zgeqrt.c
View file @
2be2ccd3
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,21 +35,23 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
T
;
int
ld
t
;
int
ld
T
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + n * ib */
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldT
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ld
t
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
ld
a
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ld
T
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
ld
A
,
T
,
ldT
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -95,16 +98,16 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in]
LD
T
* The leading dimension of the array T.
LD
T >= IB.
* @param[in]
ld
T
* The leading dimension of the array T.
ld
T >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
...
...
@@ -120,8 +123,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
*/
void
INSERT_TASK_zgeqrt
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
ib
,
int
nb
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
t
)
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
T
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgeqrt
;
...
...
@@ -139,9 +142,7 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
),
STARPU_VALUE
,
&
ldt
,
sizeof
(
int
),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH
,
options
->
ws_worker
,
/* ib * (m+3*ib) + max(m,n) */
...
...
@@ -152,4 +153,6 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_NAME
,
"zgeqrt"
,
#endif
0
);
(
void
)
ldT
;
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgessm.c
View file @
2be2ccd3
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,16 +35,21 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
int
k
;
int
ib
;
int
*
IPIV
;
int
ld
l
;
int
ld
L
;
CHAMELEON_Complex64_t
*
D
;
int
ld
d
;
int
ld
D
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
D
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
k
,
&
ib
,
&
IPIV
,
&
ldl
,
&
ldd
,
&
lda
);
CORE_zgessm
(
m
,
n
,
k
,
ib
,
IPIV
,
D
,
ldd
,
A
,
lda
);
ldL
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldD
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
k
,
&
ib
,
&
IPIV
);
CORE_zgessm
(
m
,
n
,
k
,
ib
,
IPIV
,
D
,
ldD
,
A
,
ldA
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -80,15 +86,15 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
* @param[in] L
* The M-by-K lower triangular tile.
*
* @param[in]
LD
L
* The leading dimension of the array L.
LD
L >= max(1,M).
* @param[in]
ld
L
* The leading dimension of the array L.
ld
L >= max(1,M).
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, updated by the application of L.
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
*******************************************************************************
*
...
...
@@ -100,9 +106,9 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
void
INSERT_TASK_zgessm
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
k
,
int
ib
,
int
nb
,
int
*
IPIV
,
const
CHAM_desc_t
*
L
,
int
Lm
,
int
Ln
,
int
ld
l
,
const
CHAM_desc_t
*
D
,
int
Dm
,
int
Dn
,
int
ld
d
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
)
const
CHAM_desc_t
*
L
,
int
Lm
,
int
Ln
,
int
ld
L
,
const
CHAM_desc_t
*
D
,
int
Dm
,
int
Dn
,
int
ld
D
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgessm
;
...
...
@@ -122,15 +128,14 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_VALUE
,
&
IPIV
,
sizeof
(
int
*
),
STARPU_R
,
RTBLKADDR
(
L
,
CHAMELEON_Complex64_t
,
Lm
,
Ln
),
STARPU_VALUE
,
&
ldl
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
D
,
CHAMELEON_Complex64_t
,
Dm
,
Dn
),
STARPU_VALUE
,
&
ldd
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgessm"
,
#endif
0
);
(
void
)
ldD
;
(
void
)
ldL
;
}