Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
solverstack
Chameleon
Commits
d5e458e1
Commit
d5e458e1
authored
Jul 03, 2019
by
BARROS DE ASSIS Lucas
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of gitlab.inria.fr:solverstack/chameleon
parents
985080e9
2be2ccd3
Changes
60
Hide whitespace changes
Inline
Side-by-side
Showing
60 changed files
with
896 additions
and
656 deletions
+896
-656
runtime/starpu/codelets/codelet_dzasum.c
runtime/starpu/codelets/codelet_dzasum.c
+8
-5
runtime/starpu/codelets/codelet_zbuild.c
runtime/starpu/codelets/codelet_zbuild.c
+9
-6
runtime/starpu/codelets/codelet_zgeadd.c
runtime/starpu/codelets/codelet_zgeadd.c
+24
-19
runtime/starpu/codelets/codelet_zgelqt.c
runtime/starpu/codelets/codelet_zgelqt.c
+16
-13
runtime/starpu/codelets/codelet_zgemm.c
runtime/starpu/codelets/codelet_zgemm.c
+30
-20
runtime/starpu/codelets/codelet_zgeqrt.c
runtime/starpu/codelets/codelet_zgeqrt.c
+16
-13
runtime/starpu/codelets/codelet_zgessm.c
runtime/starpu/codelets/codelet_zgessm.c
+20
-15
runtime/starpu/codelets/codelet_zgessq.c
runtime/starpu/codelets/codelet_zgessq.c
+8
-5
runtime/starpu/codelets/codelet_zgetrf.c
runtime/starpu/codelets/codelet_zgetrf.c
+7
-5
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+12
-9
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+9
-7
runtime/starpu/codelets/codelet_zgram.c
runtime/starpu/codelets/codelet_zgram.c
+19
-13
runtime/starpu/codelets/codelet_zhe2ge.c
runtime/starpu/codelets/codelet_zhe2ge.c
+13
-8
runtime/starpu/codelets/codelet_zhemm.c
runtime/starpu/codelets/codelet_zhemm.c
+31
-20
runtime/starpu/codelets/codelet_zher2k.c
runtime/starpu/codelets/codelet_zher2k.c
+27
-16
runtime/starpu/codelets/codelet_zherfb.c
runtime/starpu/codelets/codelet_zherfb.c
+27
-18
runtime/starpu/codelets/codelet_zherk.c
runtime/starpu/codelets/codelet_zherk.c
+23
-14
runtime/starpu/codelets/codelet_zhessq.c
runtime/starpu/codelets/codelet_zhessq.c
+2
-2
runtime/starpu/codelets/codelet_zlacpy.c
runtime/starpu/codelets/codelet_zlacpy.c
+17
-12
runtime/starpu/codelets/codelet_zlag2c.c
runtime/starpu/codelets/codelet_zlag2c.c
+29
-16
runtime/starpu/codelets/codelet_zlange.c
runtime/starpu/codelets/codelet_zlange.c
+10
-5
runtime/starpu/codelets/codelet_zlanhe.c
runtime/starpu/codelets/codelet_zlanhe.c
+9
-5
runtime/starpu/codelets/codelet_zlansy.c
runtime/starpu/codelets/codelet_zlansy.c
+8
-5
runtime/starpu/codelets/codelet_zlantr.c
runtime/starpu/codelets/codelet_zlantr.c
+7
-5
runtime/starpu/codelets/codelet_zlascal.c
runtime/starpu/codelets/codelet_zlascal.c
+11
-8
runtime/starpu/codelets/codelet_zlaset.c
runtime/starpu/codelets/codelet_zlaset.c
+10
-7
runtime/starpu/codelets/codelet_zlaset2.c
runtime/starpu/codelets/codelet_zlaset2.c
+9
-7
runtime/starpu/codelets/codelet_zlatro.c
runtime/starpu/codelets/codelet_zlatro.c
+12
-8
runtime/starpu/codelets/codelet_zlauum.c
runtime/starpu/codelets/codelet_zlauum.c
+9
-5
runtime/starpu/codelets/codelet_zplghe.c
runtime/starpu/codelets/codelet_zplghe.c
+8
-5
runtime/starpu/codelets/codelet_zplgsy.c
runtime/starpu/codelets/codelet_zplgsy.c
+8
-5
runtime/starpu/codelets/codelet_zplrnt.c
runtime/starpu/codelets/codelet_zplrnt.c
+8
-5
runtime/starpu/codelets/codelet_zpotrf.c
runtime/starpu/codelets/codelet_zpotrf.c
+7
-5
runtime/starpu/codelets/codelet_zssssm.c
runtime/starpu/codelets/codelet_zssssm.c
+23
-22
runtime/starpu/codelets/codelet_zsymm.c
runtime/starpu/codelets/codelet_zsymm.c
+27
-20
runtime/starpu/codelets/codelet_zsyr2k.c
runtime/starpu/codelets/codelet_zsyr2k.c
+23
-16
runtime/starpu/codelets/codelet_zsyrk.c
runtime/starpu/codelets/codelet_zsyrk.c
+20
-14
runtime/starpu/codelets/codelet_zsyssq.c
runtime/starpu/codelets/codelet_zsyssq.c
+6
-5
runtime/starpu/codelets/codelet_zsytrf_nopiv.c
runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+7
-6
runtime/starpu/codelets/codelet_ztplqt.c
runtime/starpu/codelets/codelet_ztplqt.c
+15
-14
runtime/starpu/codelets/codelet_ztpmlqt.c
runtime/starpu/codelets/codelet_ztpmlqt.c
+28
-23
runtime/starpu/codelets/codelet_ztpmqrt.c
runtime/starpu/codelets/codelet_ztpmqrt.c
+28
-24
runtime/starpu/codelets/codelet_ztpqrt.c
runtime/starpu/codelets/codelet_ztpqrt.c
+14
-14
runtime/starpu/codelets/codelet_ztradd.c
runtime/starpu/codelets/codelet_ztradd.c
+16
-14
runtime/starpu/codelets/codelet_ztrasm.c
runtime/starpu/codelets/codelet_ztrasm.c
+7
-5
runtime/starpu/codelets/codelet_ztrmm.c
runtime/starpu/codelets/codelet_ztrmm.c
+20
-14
runtime/starpu/codelets/codelet_ztrsm.c
runtime/starpu/codelets/codelet_ztrsm.c
+19
-14
runtime/starpu/codelets/codelet_ztrssq.c
runtime/starpu/codelets/codelet_ztrssq.c
+6
-5
runtime/starpu/codelets/codelet_ztrtri.c
runtime/starpu/codelets/codelet_ztrtri.c
+7
-6
runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+18
-19
runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+18
-18
runtime/starpu/codelets/codelet_ztstrf.c
runtime/starpu/codelets/codelet_ztstrf.c
+24
-21
runtime/starpu/codelets/codelet_zunmlq.c
runtime/starpu/codelets/codelet_zunmlq.c
+34
-29
runtime/starpu/codelets/codelet_zunmqr.c
runtime/starpu/codelets/codelet_zunmqr.c
+34
-29
testing/testing_zgels.c
testing/testing_zgels.c
+14
-8
testing/testing_zgels_hqr.c
testing/testing_zgels_hqr.c
+6
-1
testing/testing_zgels_systolic.c
testing/testing_zgels_systolic.c
+7
-1
testing/testing_zgesv_incpiv.c
testing/testing_zgesv_incpiv.c
+5
-1
testing/testing_zlange.c
testing/testing_zlange.c
+6
-1
tools/analysis.sh
tools/analysis.sh
+1
-1
No files found.
runtime/starpu/codelets/codelet_dzasum.c
View file @
d5e458e1
...
...
@@ -15,6 +15,7 @@
* @comment This file has been automatically generated
* from Plasma 2.6.0 for CHAMELEON 0.9.2
* @author Mathieu Faverge
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -30,13 +31,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
int
M
;
int
N
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
double
*
work
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
work
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
storev
,
&
uplo
,
&
M
,
&
N
,
&
lda
);
CORE_dzasum
(
storev
,
uplo
,
M
,
N
,
A
,
lda
,
work
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
storev
,
&
uplo
,
&
M
,
&
N
);
CORE_dzasum
(
storev
,
uplo
,
M
,
N
,
A
,
ldA
,
work
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -47,7 +50,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
void
INSERT_TASK_dzasum
(
const
RUNTIME_option_t
*
options
,
cham_store_t
storev
,
cham_uplo_t
uplo
,
int
M
,
int
N
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
)
{
struct
starpu_codelet
*
codelet
=
&
cl_dzasum
;
...
...
@@ -65,7 +68,6 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
M
,
sizeof
(
int
),
STARPU_VALUE
,
&
N
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
B
,
double
,
Bm
,
Bn
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
...
...
@@ -73,4 +75,5 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
STARPU_NAME
,
"dzasum"
,
#endif
0
);
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zbuild.c
View file @
d5e458e1
...
...
@@ -20,6 +20,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Guillaume Sylvand
* @author Lucas Barros de Assis
* @date 2016-09-08
* @precisions normal z -> c d s
*
...
...
@@ -31,19 +32,21 @@
static
void
cl_zbuild_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAMELEON_Complex64_t
*
A
;
int
ld
;
int
ld
A
;
void
*
user_data
;
void
(
*
user_build_callback
)(
int
row_min
,
int
row_max
,
int
col_min
,
int
col_max
,
void
*
buffer
,
int
ld
,
void
*
user_data
)
;
void
(
*
user_build_callback
)(
int
row_min
,
int
row_max
,
int
col_min
,
int
col_max
,
void
*
buffer
,
int
ld
A
,
void
*
user_data
)
;
int
row_min
,
row_max
,
col_min
,
col_max
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
row_min
,
&
row_max
,
&
col_min
,
&
col_max
,
&
ld
,
&
user_data
,
&
user_build_callback
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
row_min
,
&
row_max
,
&
col_min
,
&
col_max
,
&
user_data
,
&
user_build_callback
);
/* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
* (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
* and store it at the address 'buffer' with leading dimension 'ld'
*/
user_build_callback
(
row_min
,
row_max
,
col_min
,
col_max
,
A
,
ld
,
user_data
);
user_build_callback
(
row_min
,
row_max
,
col_min
,
col_max
,
A
,
ld
A
,
user_data
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -54,7 +57,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
CODELETS_CPU
(
zbuild
,
1
,
cl_zbuild_cpu_func
)
void
INSERT_TASK_zbuild
(
const
RUNTIME_option_t
*
options
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
void
*
user_data
,
void
*
user_build_callback
)
{
...
...
@@ -77,7 +80,6 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
col_min
,
sizeof
(
int
),
STARPU_VALUE
,
&
col_max
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_VALUE
,
&
user_data
,
sizeof
(
void
*
),
STARPU_VALUE
,
&
user_build_callback
,
sizeof
(
void
*
),
STARPU_PRIORITY
,
options
->
priority
,
...
...
@@ -86,4 +88,5 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
STARPU_NAME
,
"zbuild"
,
#endif
0
);
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgeadd.c
View file @
d5e458e1
...
...
@@ -17,6 +17,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -32,15 +33,18 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
int
N
;
CHAMELEON_Complex64_t
alpha
;
const
CHAMELEON_Complex64_t
*
A
;
int
LD
A
;
int
ld
A
;
CHAMELEON_Complex64_t
beta
;
CHAMELEON_Complex64_t
*
B
;
int
LD
B
;
int
ld
B
;
A
=
(
const
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
LDA
,
&
beta
,
&
LDB
);
CORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
A
,
LDA
,
beta
,
B
,
LDB
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
CORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
A
,
ldA
,
beta
,
B
,
ldB
);
return
;
}
...
...
@@ -52,22 +56,24 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
int
N
;
cuDoubleComplex
alpha
;
const
cuDoubleComplex
*
A
;
int
ld
a
;
int
ld
A
;
cuDoubleComplex
beta
;
cuDoubleComplex
*
B
;
int
ld
b
;
int
ld
B
;
A
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
lda
,
&
beta
,
&
ldb
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
RUNTIME_getStream
(
stream
);
CUDA_zgeadd
(
trans
,
M
,
N
,
&
alpha
,
A
,
ld
a
,
&
beta
,
B
,
ld
b
,
&
alpha
,
A
,
ld
A
,
&
beta
,
B
,
ld
B
,
stream
);
#ifndef STARPU_CUDA_ASYNC
...
...
@@ -118,22 +124,22 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
* Scalar factor of A.
*
* @param[in] A
* Matrix of size
LD
A-by-N, if trans = ChamNoTrans,
LD
A-by-M
* Matrix of size
ld
A-by-N, if trans = ChamNoTrans,
ld
A-by-M
* otherwise.
*
* @param[in]
LD
A
* Leading dimension of the array A.
LD
A >= max(1,k), with k=M, if
* @param[in]
ld
A
* Leading dimension of the array A.
ld
A >= max(1,k), with k=M, if
* trans = ChamNoTrans, and k=N otherwise.
*
* @param[in] beta
* Scalar factor of B.
*
* @param[in,out] B
* Matrix of size
LD
B-by-N.
* Matrix of size
ld
B-by-N.
* On exit, B = alpha * op(A) + beta * B
*
* @param[in]
LD
B
* Leading dimension of the array B.
LD
B >= max(1,M)
* @param[in]
ld
B
* Leading dimension of the array B.
ld
B >= max(1,M)
*
*******************************************************************************
*
...
...
@@ -143,8 +149,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
*/
void
INSERT_TASK_zgeadd
(
const
RUNTIME_option_t
*
options
,
cham_trans_t
trans
,
int
m
,
int
n
,
int
nb
,
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
b
)
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
B
)
{
struct
starpu_codelet
*
codelet
=
&
cl_zgeadd
;
void
(
*
callback
)(
void
*
)
=
options
->
profiling
?
cl_zgeadd_callback
:
NULL
;
...
...
@@ -161,16 +167,15 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
alpha
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_VALUE
,
&
beta
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_RW
,
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
),
STARPU_VALUE
,
&
ldb
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgeadd"
,
#endif
0
);
(
void
)
ldA
;
(
void
)
nb
;
}
runtime/starpu/codelets/codelet_zgelqt.c
View file @
d5e458e1
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,20 +35,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
T
;
int
ld
t
;
int
ld
T
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + ib*n */
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldT
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ld
t
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
ld
a
,
T
,
ld
t
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ld
T
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
ld
A
,
T
,
ld
T
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -93,16 +96,16 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in]
LD
T
* The leading dimension of the array T.
LD
T >= IB.
* @param[in]
ld
T
* The leading dimension of the array T.
ld
T >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
...
...
@@ -118,8 +121,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
*/
void
INSERT_TASK_zgelqt
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
ib
,
int
nb
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
t
)
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
T
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgelqt
;
...
...
@@ -137,9 +140,7 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
),
STARPU_VALUE
,
&
ldt
,
sizeof
(
int
),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH
,
options
->
ws_worker
,
/* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
...
...
@@ -150,4 +151,6 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
STARPU_NAME
,
"zgelqt"
,
#endif
0
);
(
void
)
ldT
;
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgemm.c
View file @
d5e458e1
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -36,22 +37,26 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
int
k
;
CHAMELEON_Complex64_t
alpha
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
B
;
int
ld
b
;
int
ld
B
;
CHAMELEON_Complex64_t
beta
;
CHAMELEON_Complex64_t
*
C
;
int
ld
c
;
int
ld
C
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
C
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
lda
,
&
ldb
,
&
beta
,
&
ldc
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldC
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
beta
);
CORE_zgemm
(
transA
,
transB
,
m
,
n
,
k
,
alpha
,
A
,
ld
a
,
B
,
ld
b
,
beta
,
C
,
ld
c
);
alpha
,
A
,
ld
A
,
B
,
ld
B
,
beta
,
C
,
ld
C
);
}
#ifdef CHAMELEON_USE_CUDA
...
...
@@ -64,26 +69,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
int
k
;
cuDoubleComplex
alpha
;
const
cuDoubleComplex
*
A
;
int
ld
a
;
int
ld
A
;
const
cuDoubleComplex
*
B
;
int
ld
b
;
int
ld
B
;
cuDoubleComplex
beta
;
cuDoubleComplex
*
C
;
int
ld
c
;
int
ld
C
;
A
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
B
=
(
const
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
C
=
(
cuDoubleComplex
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
lda
,
&
ldb
,
&
beta
,
&
ldc
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldB
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldC
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
transA
,
&
transB
,
&
m
,
&
n
,
&
k
,
&
alpha
,
&
beta
);
RUNTIME_getStream
(
stream
);
CUDA_zgemm
(
transA
,
transB
,
m
,
n
,
k
,
&
alpha
,
A
,
ld
a
,
B
,
ld
b
,
&
beta
,
C
,
ld
c
,
&
alpha
,
A
,
ld
A
,
B
,
ld
B
,
&
beta
,
C
,
ld
C
,
stream
);
#ifndef STARPU_CUDA_ASYNC
...
...
@@ -108,9 +117,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
void
INSERT_TASK_zgemm
(
const
RUNTIME_option_t
*
options
,
cham_trans_t
transA
,
cham_trans_t
transB
,
int
m
,
int
n
,
int
k
,
int
nb
,
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
b
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
C
,
int
Cm
,
int
Cn
,
int
ld
c
)
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ld
B
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
C
,
int
Cm
,
int
Cn
,
int
ld
C
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgemm
;
...
...
@@ -131,16 +140,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
k
,
sizeof
(
int
),
STARPU_VALUE
,
&
alpha
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_R
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
),
STARPU_VALUE
,
&
ldb
,
sizeof
(
int
),
STARPU_VALUE
,
&
beta
,
sizeof
(
CHAMELEON_Complex64_t
),
STARPU_RW
,
RTBLKADDR
(
C
,
CHAMELEON_Complex64_t
,
Cm
,
Cn
),
STARPU_VALUE
,
&
ldc
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgemm"
,
#endif
0
);
(
void
)
ldA
;
(
void
)
ldB
;
(
void
)
ldC
;
}
runtime/starpu/codelets/codelet_zgeqrt.c
View file @
d5e458e1
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,21 +35,23 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
CHAMELEON_Complex64_t
*
T
;
int
ld
t
;
int
ld
T
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + n * ib */
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldT
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ld
t
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
ld
a
,
T
,
ld
t
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ld
T
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
ld
A
,
T
,
ld
T
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -95,16 +98,16 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
* with the array TAU, represent the unitary tile Q as a
* product of elementary reflectors (see Further Details).
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
* @param[out] T
* The IB-by-N triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in]
LD
T
* The leading dimension of the array T.
LD
T >= IB.
* @param[in]
ld
T
* The leading dimension of the array T.
ld
T >= IB.
*
* @param[out] TAU
* The scalar factors of the elementary reflectors (see Further
...
...
@@ -120,8 +123,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
*/
void
INSERT_TASK_zgeqrt
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
ib
,
int
nb
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
t
)
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ld
T
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgeqrt
;
...
...
@@ -139,9 +142,7 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_W
,
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
),
STARPU_VALUE
,
&
ldt
,
sizeof
(
int
),
/* max( nb * (ib+1), ib * (ib+nb) ) */
STARPU_SCRATCH
,
options
->
ws_worker
,
/* ib * (m+3*ib) + max(m,n) */
...
...
@@ -152,4 +153,6 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
STARPU_NAME
,
"zgeqrt"
,
#endif
0
);
(
void
)
ldT
;
(
void
)
ldA
;
}
runtime/starpu/codelets/codelet_zgessm.c
View file @
d5e458e1
...
...
@@ -19,6 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Lucas Barros de Assis
* @date 2014-11-16
* @precisions normal z -> c d s
*
...
...
@@ -34,16 +35,21 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
int
k
;
int
ib
;
int
*
IPIV
;
int
ld
l
;
int
ld
L
;
CHAMELEON_Complex64_t
*
D
;
int
ld
d
;
int
ld
D
;
CHAMELEON_Complex64_t
*
A
;
int
ld
a
;
int
ld
A
;
D
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
k
,
&
ib
,
&
IPIV
,
&
ldl
,
&
ldd
,
&
lda
);
CORE_zgessm
(
m
,
n
,
k
,
ib
,
IPIV
,
D
,
ldd
,
A
,
lda
);
ldL
=
STARPU_MATRIX_GET_LD
(
descr
[
0
]
);
ldD
=
STARPU_MATRIX_GET_LD
(
descr
[
1
]
);
ldA
=
STARPU_MATRIX_GET_LD
(
descr
[
2
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
k
,
&
ib
,
&
IPIV
);
CORE_zgessm
(
m
,
n
,
k
,
ib
,
IPIV
,
D
,
ldD
,
A
,
ldA
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
...
...
@@ -80,15 +86,15 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
* @param[in] L
* The M-by-K lower triangular tile.
*
* @param[in]
LD
L
* The leading dimension of the array L.
LD
L >= max(1,M).
* @param[in]
ld
L
* The leading dimension of the array L.
ld
L >= max(1,M).
*
* @param[in,out] A
* On entry, the M-by-N tile A.
* On exit, updated by the application of L.
*
* @param[in]
LD
A
* The leading dimension of the array A.
LD
A >= max(1,M).
* @param[in]
ld
A
* The leading dimension of the array A.
ld
A >= max(1,M).
*
*******************************************************************************
*
...
...
@@ -100,9 +106,9 @@ CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
void
INSERT_TASK_zgessm
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
k
,
int
ib
,
int
nb
,
int
*
IPIV
,
const
CHAM_desc_t
*
L
,
int
Lm
,
int
Ln
,
int
ld
l
,
const
CHAM_desc_t
*
D
,
int
Dm
,
int
Dn
,
int
ld
d
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
a
)
const
CHAM_desc_t
*
L
,
int
Lm
,
int
Ln
,
int
ld
L
,
const
CHAM_desc_t
*
D
,
int
Dm
,
int
Dn
,
int
ld
D
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
ld
A
)
{
(
void
)
nb
;
struct
starpu_codelet
*
codelet
=
&
cl_zgessm
;
...
...
@@ -122,15 +128,14 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
STARPU_VALUE
,
&
ib
,
sizeof
(
int
),
STARPU_VALUE
,
&
IPIV
,
sizeof
(
int
*
),
STARPU_R
,
RTBLKADDR
(
L
,
CHAMELEON_Complex64_t
,
Lm
,
Ln
),
STARPU_VALUE
,
&
ldl
,
sizeof
(
int
),
STARPU_R
,
RTBLKADDR
(
D
,
CHAMELEON_Complex64_t
,
Dm
,
Dn
),
STARPU_VALUE
,
&
ldd
,
sizeof
(
int
),
STARPU_RW
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
STARPU_VALUE
,
&
lda
,
sizeof
(
int
),
STARPU_PRIORITY
,
options
->
priority
,
STARPU_CALLBACK
,
callback
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zgessm"
,
#endif
0
);