Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Chameleon
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
43572e70
Commit
43572e70
authored
Oct 17, 2018
by
Mathieu Faverge
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'release/fix_geqrf' into 'master'
Release/fix geqrf See merge request
solverstack/chameleon!124
parents
53425480
c1622072
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
289 additions
and
384 deletions
+289
-384
compute/pzgelqf_param.c
compute/pzgelqf_param.c
+29
-34
compute/pzgelqfrh.c
compute/pzgelqfrh.c
+7
-9
compute/pzgeqrf_param.c
compute/pzgeqrf_param.c
+27
-26
compute/pzgeqrfrh.c
compute/pzgeqrfrh.c
+7
-9
compute/pzlaset.c
compute/pzlaset.c
+6
-22
compute/pztpgqrt.c
compute/pztpgqrt.c
+4
-61
compute/pztpqrt.c
compute/pztpqrt.c
+6
-7
compute/pzunglq_param.c
compute/pzunglq_param.c
+10
-13
compute/pzunglqrh.c
compute/pzunglqrh.c
+7
-9
compute/pzungqr_param.c
compute/pzungqr_param.c
+21
-28
compute/pzungqrrh.c
compute/pzungqrrh.c
+8
-10
compute/pzunmlq_param.c
compute/pzunmlq_param.c
+28
-32
compute/pzunmlqrh.c
compute/pzunmlqrh.c
+25
-33
compute/pzunmqr_param.c
compute/pzunmqr_param.c
+29
-33
compute/pzunmqrrh.c
compute/pzunmqrrh.c
+26
-34
compute/zgels_param.c
compute/zgels_param.c
+2
-1
compute/zgeqrf_param.c
compute/zgeqrf_param.c
+13
-3
compute/ztpgqrt.c
compute/ztpgqrt.c
+11
-8
compute/zungqr_param.c
compute/zungqr_param.c
+15
-6
control/compute_z.h
control/compute_z.h
+5
-3
coreblas/compute/core_ztpmlqt.c
coreblas/compute/core_ztpmlqt.c
+1
-1
coreblas/compute/core_ztpmqrt.c
coreblas/compute/core_ztpmqrt.c
+1
-1
cudablas/compute/cuda_ztpmqrt.c
cudablas/compute/cuda_ztpmqrt.c
+1
-1
No files found.
compute/pzgelqf_param.c
View file @
43572e70
...
...
@@ -26,8 +26,7 @@
#define T(m,n) T, (m), (n)
#define D(m,n) D, (m), (n)
/*
/**
* Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
*/
void
chameleon_pzgelqf_param
(
int
genD
,
const
libhqr_tree_t
*
qrtree
,
CHAM_desc_t
*
A
,
...
...
@@ -41,11 +40,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
size_t
ws_host
=
0
;
int
k
,
m
,
n
,
i
,
p
;
int
K
,
L
;
int
K
,
L
,
nbgeqrt
;
int
ldak
,
ldam
,
lddk
;
int
tempkmin
,
tempkm
,
tempnn
,
tempmm
,
temppn
;
int
ib
;
int
*
tiles
;
int
ib
,
node
,
nbtiles
,
*
tiles
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -60,37 +58,32 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* zttlqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztplqt = A->nb * (ib+1)
* ztpmlqt = A->nb * ib
*/
ws_worker
=
A
->
nb
*
(
ib
+
1
);
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
/*
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker
=
chameleon_max
(
ws_worker
,
ib
*
A
->
nb
*
2
);
#endif
/* Initialisation of tiles */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
ws_worker
*=
sizeof
(
CHAMELEON_Complex64_t
);
ws_host
*=
sizeof
(
CHAMELEON_Complex64_t
);
RUNTIME_options_ws_alloc
(
&
options
,
ws_worker
,
ws_host
);
/* Initialisation of temporary tiles array */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
K
=
chameleon_min
(
A
->
mt
,
A
->
nt
);
/* The number of the factorization */
for
(
k
=
0
;
k
<
K
;
k
++
)
{
RUNTIME_iteration_push
(
chamctxt
,
k
);
...
...
@@ -98,9 +91,11 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldak
=
BLKLDD
(
A
,
k
);
lddk
=
BLKLDD
(
D
,
k
);
T
=
TS
;
/* The number of geqrt to apply */
for
(
i
=
0
;
i
<
qrtree
->
getnbgeqrf
(
qrtree
,
k
);
i
++
)
{
nbgeqrt
=
qrtree
->
getnbgeqrf
(
qrtree
,
k
);
T
=
TS
;
for
(
i
=
0
;
i
<
nbgeqrt
;
i
++
)
{
p
=
qrtree
->
getm
(
qrtree
,
k
,
i
);
temppn
=
p
==
A
->
nt
-
1
?
A
->
n
-
p
*
A
->
nb
:
A
->
nb
;
tempkmin
=
chameleon_min
(
tempkm
,
temppn
);
...
...
@@ -110,6 +105,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
tempkm
,
temppn
,
ib
,
T
->
nb
,
A
(
k
,
p
),
ldak
,
T
(
k
,
p
),
T
->
mb
);
if
(
genD
)
{
INSERT_TASK_zlacpy
(
&
options
,
...
...
@@ -124,13 +120,14 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
D
(
k
,
p
),
lddk
);
#endif
}
for
(
m
=
k
+
1
;
m
<
A
->
mt
;
m
++
)
{
tempmm
=
m
==
A
->
mt
-
1
?
A
->
m
-
m
*
A
->
mb
:
A
->
mb
;
ldam
=
BLKLDD
(
A
,
m
);
INSERT_TASK_zunmlq
(
&
options
,
ChamRight
,
ChamConjTrans
,
tempmm
,
temppn
,
tempkmin
,
ib
,
T
->
nb
,
tempmm
,
temppn
,
tempkmin
,
ib
,
T
->
nb
,
D
(
k
,
p
),
lddk
,
T
(
k
,
p
),
T
->
mb
,
A
(
m
,
p
),
ldam
);
...
...
@@ -140,15 +137,15 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
}
/* Setting the order of the tiles */
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
+
(
k
+
1
)
);
nbtiles
=
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
);
for
(
i
=
k
+
1
;
i
<
A
->
nt
;
i
++
)
{
for
(
i
=
0
;
i
<
nbtiles
;
i
++
)
{
n
=
tiles
[
i
];
p
=
qrtree
->
currpiv
(
qrtree
,
k
,
n
);
tempnn
=
n
==
A
->
nt
-
1
?
A
->
n
-
n
*
A
->
nb
:
A
->
nb
;
if
(
qrtree
->
gettype
(
qrtree
,
k
,
n
)
==
0
)
{
if
(
qrtree
->
gettype
(
qrtree
,
k
,
n
)
==
LIBHQR_KILLED_BY_TS
)
{
/* TS kernel */
T
=
TS
;
L
=
0
;
...
...
@@ -159,10 +156,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
L
=
tempnn
;
}
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
p
),
A
->
get_rankof
(
A
,
k
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
n
),
A
->
get_rankof
(
A
,
k
,
n
)
);
node
=
A
->
get_rankof
(
A
,
k
,
n
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
p
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
n
),
node
);
INSERT_TASK_ztplqt
(
&
options
,
...
...
@@ -175,10 +171,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
tempmm
=
m
==
A
->
mt
-
1
?
A
->
m
-
m
*
A
->
mb
:
A
->
mb
;
ldam
=
BLKLDD
(
A
,
m
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
p
),
A
->
get_rankof
(
A
,
m
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
n
),
A
->
get_rankof
(
A
,
m
,
n
)
);
node
=
A
->
get_rankof
(
A
,
m
,
n
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
p
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
n
),
node
);
INSERT_TASK_ztpmlqt
(
&
options
,
...
...
compute/pzgelqfrh.c
View file @
43572e70
...
...
@@ -46,7 +46,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
int
K
,
N
,
RD
;
int
ldak
,
ldam
,
lddk
;
int
tempkmin
,
tempkm
,
tempNn
,
tempnn
,
tempmm
,
tempNRDn
;
int
ib
;
int
ib
,
node
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -167,10 +167,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for
(
N
=
k
;
N
+
RD
<
A
->
nt
;
N
+=
2
*
RD
)
{
tempNRDn
=
N
+
RD
==
A
->
nt
-
1
?
A
->
n
-
(
N
+
RD
)
*
A
->
nb
:
A
->
nb
;
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
N
),
A
->
get_rankof
(
A
,
k
,
N
+
RD
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
N
+
RD
),
A
->
get_rankof
(
A
,
k
,
N
+
RD
)
);
node
=
A
->
get_rankof
(
A
,
k
,
N
+
RD
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
N
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
k
,
N
+
RD
),
node
);
/* TT kernel */
INSERT_TASK_ztplqt
(
...
...
@@ -184,10 +183,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
tempmm
=
m
==
A
->
mt
-
1
?
A
->
m
-
m
*
A
->
mb
:
A
->
mb
;
ldam
=
BLKLDD
(
A
,
m
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
N
),
A
->
get_rankof
(
A
,
m
,
N
+
RD
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
N
+
RD
),
A
->
get_rankof
(
A
,
m
,
N
+
RD
)
);
node
=
A
->
get_rankof
(
A
,
m
,
N
+
RD
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
N
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
N
+
RD
),
node
);
INSERT_TASK_ztpmlqt
(
&
options
,
...
...
compute/pzgeqrf_param.c
View file @
43572e70
...
...
@@ -22,16 +22,21 @@
#include <stdlib.h>
#include "libhqr.h"
#define A(m,n)
A, (m), (n)
#define T(m,n)
T, (m), (n)
#define D(m,n)
D, (m), (n)
#define A(m,n) A, (m), (n)
#define T(m,n) T, (m), (n)
#define D(m,n) D, (m), (n)
/**
* Parallel tile QR factorization (reduction Householder) - dynamic scheduling
*
* @param[in] genD
* Indicate if the copies of the geqrt tiles must be done to speedup
* computations in updates.
*/
void
chameleon_pzgeqrf_param
(
int
genD
,
const
libhqr_tree_t
*
qrtree
,
CHAM_desc_t
*
A
,
void
chameleon_pzgeqrf_param
(
int
genD
,
int
K
,
const
libhqr_tree_t
*
qrtree
,
CHAM_desc_t
*
A
,
CHAM_desc_t
*
TS
,
CHAM_desc_t
*
TT
,
CHAM_desc_t
*
D
,
RUNTIME_sequence_t
*
sequence
,
RUNTIME_request_t
*
request
)
RUNTIME_sequence_t
*
sequence
,
RUNTIME_request_t
*
request
)
{
CHAM_context_t
*
chamctxt
;
RUNTIME_option_t
options
;
...
...
@@ -40,11 +45,10 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
size_t
ws_host
=
0
;
int
k
,
m
,
n
,
i
,
p
;
int
K
,
L
,
nbgeqrt
;
int
L
,
nbgeqrt
;
int
ldap
,
ldam
,
lddm
;
int
tempkmin
,
tempkn
,
tempnn
,
tempmm
;
int
ib
;
int
*
tiles
;
int
ib
,
node
,
nbtiles
,
*
tiles
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -75,23 +79,22 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ws_worker
=
chameleon_max
(
ws_worker
,
ib
*
A
->
nb
*
2
);
#endif
/* Initialisation of temporary tiles array */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
ws_worker
*=
sizeof
(
CHAMELEON_Complex64_t
);
ws_host
*=
sizeof
(
CHAMELEON_Complex64_t
);
RUNTIME_options_ws_alloc
(
&
options
,
ws_worker
,
ws_host
);
K
=
chameleon_min
(
A
->
mt
,
A
->
nt
);
/* Initialisation of temporary tiles array */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
/* The number of the factorization */
for
(
k
=
0
;
k
<
K
;
k
++
)
{
RUNTIME_iteration_push
(
chamctxt
,
k
);
tempkn
=
k
==
A
->
nt
-
1
?
A
->
n
-
k
*
A
->
nb
:
A
->
nb
;
/* The number of geqrt to apply */
nbgeqrt
=
qrtree
->
getnbgeqrf
(
qrtree
,
k
);
T
=
TS
;
for
(
i
=
0
;
i
<
nbgeqrt
;
i
++
)
{
m
=
qrtree
->
getm
(
qrtree
,
k
,
i
);
tempmm
=
m
==
A
->
mt
-
1
?
A
->
m
-
m
*
A
->
mb
:
A
->
mb
;
...
...
@@ -99,13 +102,12 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldam
=
BLKLDD
(
A
,
m
);
lddm
=
BLKLDD
(
D
,
m
);
T
=
TS
;
INSERT_TASK_zgeqrt
(
&
options
,
tempmm
,
tempkn
,
ib
,
T
->
nb
,
A
(
m
,
k
),
ldam
,
T
(
m
,
k
),
T
->
mb
);
if
(
genD
)
{
INSERT_TASK_zlacpy
(
&
options
,
...
...
@@ -120,6 +122,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
D
(
m
,
k
),
lddm
);
#endif
}
for
(
n
=
k
+
1
;
n
<
A
->
nt
;
n
++
)
{
tempnn
=
n
==
A
->
nt
-
1
?
A
->
n
-
n
*
A
->
nb
:
A
->
nb
;
INSERT_TASK_zunmqr
(
...
...
@@ -135,9 +138,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
}
/* Setting the order of the tiles */
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
+
(
k
+
1
)
);
nbtiles
=
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
);
for
(
i
=
k
+
1
;
i
<
A
->
mt
;
i
++
)
{
for
(
i
=
0
;
i
<
nbtiles
;
i
++
)
{
m
=
tiles
[
i
];
p
=
qrtree
->
currpiv
(
qrtree
,
k
,
m
);
...
...
@@ -145,7 +148,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldap
=
BLKLDD
(
A
,
p
);
ldam
=
BLKLDD
(
A
,
m
);
if
(
qrtree
->
gettype
(
qrtree
,
k
,
m
)
==
0
)
{
if
(
qrtree
->
gettype
(
qrtree
,
k
,
m
)
==
LIBHQR_KILLED_BY_TS
)
{
/* TS kernel */
T
=
TS
;
L
=
0
;
...
...
@@ -156,10 +159,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
L
=
tempmm
;
}
RUNTIME_data_migrate
(
sequence
,
A
(
p
,
k
),
A
->
get_rankof
(
A
,
m
,
k
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
k
),
A
->
get_rankof
(
A
,
m
,
k
)
);
node
=
A
->
get_rankof
(
A
,
m
,
k
);
RUNTIME_data_migrate
(
sequence
,
A
(
p
,
k
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
k
),
node
);
INSERT_TASK_ztpqrt
(
&
options
,
...
...
@@ -171,10 +173,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
for
(
n
=
k
+
1
;
n
<
A
->
nt
;
n
++
)
{
tempnn
=
n
==
A
->
nt
-
1
?
A
->
n
-
n
*
A
->
nb
:
A
->
nb
;
RUNTIME_data_migrate
(
sequence
,
A
(
p
,
n
),
A
->
get_rankof
(
A
,
m
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
n
),
A
->
get_rankof
(
A
,
m
,
n
)
);
node
=
A
->
get_rankof
(
A
,
m
,
n
);
RUNTIME_data_migrate
(
sequence
,
A
(
p
,
n
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
m
,
n
),
node
);
INSERT_TASK_ztpmqrt
(
&
options
,
...
...
compute/pzgeqrfrh.c
View file @
43572e70
...
...
@@ -46,7 +46,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
int
K
,
M
,
RD
;
int
ldaM
,
ldam
,
ldaMRD
,
lddM
;
int
tempkmin
,
tempkn
,
tempMm
,
tempnn
,
tempmm
,
tempMRDm
;
int
ib
;
int
ib
,
node
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -166,10 +166,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
ldaM
=
BLKLDD
(
A
,
M
);
ldaMRD
=
BLKLDD
(
A
,
M
+
RD
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
,
k
),
A
->
get_rankof
(
A
,
M
+
RD
,
k
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
+
RD
,
k
),
A
->
get_rankof
(
A
,
M
+
RD
,
k
)
);
node
=
A
->
get_rankof
(
A
,
M
+
RD
,
k
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
,
k
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
+
RD
,
k
),
node
);
/* TT kernel */
INSERT_TASK_ztpqrt
(
...
...
@@ -182,10 +181,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for
(
n
=
k
+
1
;
n
<
A
->
nt
;
n
++
)
{
tempnn
=
n
==
A
->
nt
-
1
?
A
->
n
-
n
*
A
->
nb
:
A
->
nb
;
RUNTIME_data_migrate
(
sequence
,
A
(
M
,
n
),
A
->
get_rankof
(
A
,
M
+
RD
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
+
RD
,
n
),
A
->
get_rankof
(
A
,
M
+
RD
,
n
)
);
node
=
A
->
get_rankof
(
A
,
M
+
RD
,
n
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
,
n
),
node
);
RUNTIME_data_migrate
(
sequence
,
A
(
M
+
RD
,
n
),
node
);
INSERT_TASK_ztpmqrt
(
&
options
,
...
...
compute/pzlaset.c
View file @
43572e70
...
...
@@ -70,26 +70,18 @@ void chameleon_pzlaset(cham_uplo_t uplo,
}
}
else
if
(
uplo
==
ChamUpper
)
{
for
(
j
=
1
;
j
<
A
->
nt
;
j
++
){
for
(
j
=
0
;
j
<
A
->
nt
;
j
++
){
tempjn
=
j
==
A
->
nt
-
1
?
A
->
n
-
j
*
A
->
nb
:
A
->
nb
;
for
(
i
=
0
;
i
<
chameleon_min
(
j
,
A
->
mt
);
i
++
){
for
(
i
=
0
;
i
<
chameleon_min
(
j
+
1
,
A
->
mt
);
i
++
){
tempim
=
i
==
A
->
mt
-
1
?
A
->
m
-
i
*
A
->
mb
:
A
->
mb
;
ldai
=
BLKLDD
(
A
,
i
);
INSERT_TASK_zlaset
(
&
options
,
ChamUpperLower
,
tempim
,
tempjn
,
alpha
,
alpha
,
ChamUpperLower
,
tempim
,
tempjn
,
alpha
,
(
i
==
j
)
?
beta
:
alpha
,
A
(
i
,
j
),
ldai
);
}
}
for
(
j
=
0
;
j
<
minmn
;
j
++
){
tempjm
=
j
==
A
->
mt
-
1
?
A
->
m
-
j
*
A
->
mb
:
A
->
mb
;
tempjn
=
j
==
A
->
nt
-
1
?
A
->
n
-
j
*
A
->
nb
:
A
->
nb
;
ldaj
=
BLKLDD
(
A
,
j
);
INSERT_TASK_zlaset
(
&
options
,
ChamUpper
,
tempjm
,
tempjn
,
alpha
,
beta
,
A
(
j
,
j
),
ldaj
);
}
}
else
{
for
(
i
=
0
;
i
<
A
->
mt
;
i
++
){
...
...
@@ -99,19 +91,11 @@ void chameleon_pzlaset(cham_uplo_t uplo,
tempjn
=
j
==
A
->
nt
-
1
?
A
->
n
-
j
*
A
->
nb
:
A
->
nb
;
INSERT_TASK_zlaset
(
&
options
,
ChamUpperLower
,
tempim
,
tempjn
,
alpha
,
alpha
,
ChamUpperLower
,
tempim
,
tempjn
,
alpha
,
(
i
==
j
)
?
beta
:
alpha
,
A
(
i
,
j
),
ldai
);
}
}
for
(
j
=
0
;
j
<
minmn
;
j
++
){
tempjm
=
j
==
A
->
mt
-
1
?
A
->
m
-
j
*
A
->
mb
:
A
->
mb
;
tempjn
=
j
==
A
->
nt
-
1
?
A
->
n
-
j
*
A
->
nb
:
A
->
nb
;
ldaj
=
BLKLDD
(
A
,
j
);
INSERT_TASK_zlaset
(
&
options
,
ChamUpperLower
,
tempjm
,
tempjn
,
alpha
,
beta
,
A
(
j
,
j
),
ldaj
);
}
}
RUNTIME_options_finalize
(
&
options
,
chamctxt
);
}
compute/pztpgqrt.c
View file @
43572e70
...
...
@@ -20,8 +20,6 @@
*/
#include "control/common.h"
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n
...
...
@@ -31,11 +29,9 @@
/**
* Parallel tile QR factorization - dynamic scheduling
*/
void
chameleon_pztpgqrt
(
int
genD
,
int
L
,
CHAM_desc_t
*
V1
,
CHAM_desc_t
*
T1
,
void
chameleon_pztpgqrt
(
int
KT
,
int
L
,
CHAM_desc_t
*
V2
,
CHAM_desc_t
*
T2
,
CHAM_desc_t
*
Q1
,
CHAM_desc_t
*
Q2
,
CHAM_desc_t
*
D
,
RUNTIME_sequence_t
*
sequence
,
RUNTIME_request_t
*
request
)
{
CHAM_context_t
*
chamctxt
;
...
...
@@ -46,7 +42,7 @@ void chameleon_pztpgqrt( int genD, int L,
int
k
,
m
,
n
;
int
ldvk
,
ldvm
,
lddk
;
int
ldqk
,
ldqm
;
int
tempk
m
,
tempkn
,
tempkk
,
tempnn
,
tempmm
,
templm
;
int
tempk
n
,
tempnn
,
tempmm
,
templm
;
int
ib
;
/* Dimension of the first column */
...
...
@@ -61,11 +57,6 @@ void chameleon_pztpgqrt( int genD, int L,
ib
=
CHAMELEON_IB
;
if
(
D
==
NULL
)
{
D
=
V1
;
genD
=
0
;
}
/*
* ztpmqrt = Q1->nb * ib
*/
...
...
@@ -85,21 +76,17 @@ void chameleon_pztpgqrt( int genD, int L,
RUNTIME_options_ws_alloc
(
&
options
,
ws_worker
,
ws_host
);
for
(
k
=
V1
->
nt
-
1
;
k
>=
0
;
k
--
)
{
for
(
k
=
KT
-
1
;
k
>=
0
;
k
--
)
{
RUNTIME_iteration_push
(
chamctxt
,
k
);
tempkm
=
k
==
V1
->
mt
-
1
?
V1
->
m
-
k
*
V1
->
mb
:
V1
->
mb
;
tempkk
=
k
==
V1
->
nt
-
1
?
V1
->
n
-
k
*
V1
->
nb
:
V1
->
nb
;
tempkn
=
k
==
Q1
->
nt
-
1
?
Q1
->
n
-
k
*
Q1
->
nb
:
Q1
->
nb
;
ldvk
=
BLKLDD
(
V1
,
k
);
lddk
=
BLKLDD
(
D
,
k
);
ldqk
=
BLKLDD
(
Q1
,
k
);
/* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk
=
chameleon_min
(
Q2
->
mt
,
maxmt
+
k
)
-
1
;
for
(
m
=
maxmtk
;
m
>
-
1
;
m
--
)
{
tempmm
=
m
==
Q2
->
mt
-
1
?
Q2
->
m
-
m
*
Q2
->
mb
:
Q2
->
mb
;
templm
=
m
==
maxmtk
?
tempmm
:
0
;
templm
=
((
L
>
0
)
&&
(
m
==
maxmtk
))
?
tempmm
:
0
;
ldvm
=
BLKLDD
(
V2
,
m
);
ldqm
=
BLKLDD
(
Q2
,
m
);
...
...
@@ -117,53 +104,9 @@ void chameleon_pztpgqrt( int genD, int L,
}
}
for
(
m
=
Q1
->
mt
-
1
;
m
>
k
;
m
--
)
{
tempmm
=
m
==
Q1
->
mt
-
1
?
Q1
->
m
-
m
*
Q1
->
mb
:
Q1
->
mb
;
ldvm
=
BLKLDD
(
V1
,
m
);
ldqm
=
BLKLDD
(
Q1
,
m
);
for
(
n
=
k
;
n
<
Q1
->
nt
;
n
++
)
{
tempnn
=
n
==
Q1
->
nt
-
1
?
Q1
->
n
-
n
*
Q1
->
nb
:
Q1
->
nb
;
/* TS kernel */
INSERT_TASK_ztpmqrt
(
&
options
,
ChamLeft
,
ChamNoTrans
,
tempmm
,
tempnn
,
tempkn
,
0
,
ib
,
T1
->
nb
,
V1
(
m
,
k
),
ldvm
,
T1
(
m
,
k
),
T1
->
mb
,
Q1
(
k
,
n
),
ldqk
,
Q1
(
m
,
n
),
ldqm
);
}
}
if
(
genD
)
{
INSERT_TASK_zlacpy
(
&
options
,
ChamLower
,
tempkm
,
tempkk
,
V1
->
nb
,
V1
(
k
,
k
),
ldvk
,
D
(
k
),
lddk
);
#if defined(CHAMELEON_USE_CUDA)
INSERT_TASK_zlaset
(
&
options
,
ChamUpper
,
tempkm
,
tempkk
,
0
.,
1
.,
D
(
k
),
lddk
);
#endif
}
for
(
n
=
k
;
n
<
Q1
->
nt
;
n
++
)
{
tempnn
=
n
==
Q1
->
nt
-
1
?
Q1
->
n
-
n
*
Q1
->
nb
:
Q1
->
nb
;
INSERT_TASK_zunmqr
(
&
options
,
ChamLeft
,
ChamNoTrans
,
tempkm
,
tempnn
,
tempkk
,
ib
,
T1
->
nb
,
D
(
k
),
lddk
,
T1
(
k
,
k
),
T1
->
mb
,
Q1
(
k
,
n
),
ldqk
);
}
RUNTIME_iteration_pop
(
chamctxt
);
}
RUNTIME_options_ws_free
(
&
options
);
RUNTIME_options_finalize
(
&
options
,
chamctxt
);
(
void
)
D
;
}
compute/pztpqrt.c
View file @
43572e70
...
...
@@ -20,9 +20,9 @@
*/
#include "control/common.h"
#define A(m,n) A,
m, n
#define B(m,n) B,
m, n
#define T(m,n) T,
m, n
#define A(m,n) A,
(m), (n)
#define B(m,n) B,
(m), (n)
#define T(m,n) T,
(m), (n)
/**
* Parallel tile QR factorization - dynamic scheduling
...
...
@@ -53,15 +53,14 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
ib
=
CHAMELEON_IB
;
/*
* zt
s
qrt = A->nb * (ib+1)
* zt
p
qrt = A->nb * (ib+1)
* ztpmqrt = A->nb * ib
*/
ws_worker
=
A
->
nb
*
(
ib
+
1
);
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
/*
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker
=
chameleon_max
(
ws_worker
,
ib
*
A
->
nb
*
2
);
...
...
@@ -81,7 +80,7 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
for
(
m
=
0
;
m
<
maxmt
;
m
++
)
{
tempmm
=
m
==
B
->
mt
-
1
?
B
->
m
-
m
*
B
->
mb
:
B
->
mb
;
templm
=
m
==
maxmt
-
1
?
tempmm
:
0
;
templm
=
((
L
>
0
)
&&
(
m
==
maxmt
-
1
))
?
tempmm
:
0
;
ldbm
=
BLKLDD
(
B
,
m
);
/* TT kernel */
INSERT_TASK_ztpqrt
(
...
...
compute/pzunglq_param.c
View file @
43572e70
...
...
@@ -43,8 +43,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
int
K
,
L
;
int
ldak
,
ldqm
,
lddk
;
int
tempkm
,
tempkmin
,
temppn
,
tempnn
,
tempmm
;
int
ib
;
int
*
tiles
;
int
ib
,
node
,
nbtiles
,
*
tiles
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -77,15 +76,14 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ws_worker
=
chameleon_max
(
ws_worker
,
ib
*
A
->
nb
*
2
);
#endif
/* Initialisation of tiles */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
ws_worker
*=
sizeof
(
CHAMELEON_Complex64_t
);
ws_host
*=
sizeof
(
CHAMELEON_Complex64_t
);
RUNTIME_options_ws_alloc
(
&
options
,
ws_worker
,
ws_host
);
/* Initialisation of tiles */
tiles
=
(
int
*
)
calloc
(
qrtree
->
mt
,
sizeof
(
int
));
K
=
chameleon_min
(
A
->
mt
,
A
->
nt
);
for
(
k
=
K
-
1
;
k
>=
0
;
k
--
)
{
...
...
@@ -96,15 +94,15 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
lddk
=
BLKLDD
(
D
,
k
);
/* Setting the order of the tiles*/
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
+
(
k
+
1
)
);
nbtiles
=
libhqr_walk_stepk
(
qrtree
,
k
,
tiles
);
for
(
i
=
A
->
nt
-
1
;
i
>
k
;
i
--
)
{
for
(
i
=
nbtiles
-
1
;
i
>=
0
;
i
--
)
{
n
=
tiles
[
i
];
p
=
qrtree
->
currpiv
(
qrtree
,
k
,
n
);
tempnn
=
n
==
Q
->
nt
-
1
?
Q
->
n
-
n
*
Q
->
nb
:
Q
->
nb
;
if
(
qrtree
->
gettype
(
qrtree
,
k
,
n
)
==
0
)
{
if
(
qrtree
->
gettype
(
qrtree
,
k
,
n
)
==
LIBHQR_KILLED_BY_TS
)
{
/* TS kernel */
L
=
0
;
T
=
TS
;
...
...
@@ -118,10 +116,9 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
tempmm
=
m
==
Q
->
mt
-
1
?
Q
->
m
-
m
*
Q
->
mb
:
Q
->
mb
;
ldqm
=
BLKLDD
(
Q
,
m
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
p
),
Q
->
get_rankof
(
Q
,
m
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
n
),
Q
->
get_rankof
(
Q
,
m
,
n
)
);
node
=
Q
->
get_rankof
(
Q
,
m
,
n
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
p
),
node
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
n
),
node
);
INSERT_TASK_ztpmlqt
(
&
options
,
...
...
compute/pzunglqrh.c
View file @
43572e70
...
...
@@ -48,7 +48,7 @@ void chameleon_pzunglqrh( int genD, int BS,
int
K
,
N
,
RD
,
lastRD
;
int
ldak
,
lddk
,
ldqm
;
int
tempkm
,
tempkmin
,
tempNn
,
tempnn
,
tempmm
,
tempNRDn
;
int
ib
;
int
ib
,
node
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -99,10 +99,9 @@ void chameleon_pzunglqrh( int genD, int BS,
tempmm
=
m
==
Q
->
mt
-
1
?
Q
->
m
-
m
*
Q
->
mb
:
Q
->
mb
;
ldqm
=
BLKLDD
(
Q
,
m
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
),
Q
->
get_rankof
(
Q
,
m
,
N
+
RD
)
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
+
RD
),
Q
->
get_rankof
(
Q
,
m
,
N
+
RD
)
);
node
=
Q
->
get_rankof
(
Q
,
m
,
N
+
RD
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
),
node
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
+
RD
),
node
);
/* TT kernel */
INSERT_TASK_ztpmlqt
(
...
...
@@ -129,10 +128,9 @@ void chameleon_pzunglqrh( int genD, int BS,
tempmm
=
m
==
Q
->
mt
-
1
?
Q
->
m
-
m
*
Q
->
mb
:
Q
->
mb
;
ldqm
=
BLKLDD
(
Q
,
m
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
),
Q
->
get_rankof
(
Q
,
m
,
n
)
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
n
),
Q
->
get_rankof
(
Q
,
m
,
n
)
);
node
=
Q
->
get_rankof
(
Q
,
m
,
n
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
N
),
node
);
RUNTIME_data_migrate
(
sequence
,
Q
(
m
,
n
),
node
);
/* TS kernel */
INSERT_TASK_ztpmlqt
(
...
...
compute/pzungqr_param.c
View file @
43572e70
...
...
@@ -29,7 +29,8 @@
/**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
*/
void
chameleon_pzungqr_param
(
int
genD
,
const
libhqr_tree_t
*
qrtree
,
void
chameleon_pzungqr_param
(
int
genD
,
int
K
,
const
libhqr_tree_t
*
qrtree
,
CHAM_desc_t
*
A
,
CHAM_desc_t
*
Q
,
CHAM_desc_t
*
TS
,
CHAM_desc_t
*
TT
,
CHAM_desc_t
*
D
,
RUNTIME_sequence_t
*
sequence
,
RUNTIME_request_t
*
request
)
...
...
@@ -43,8 +44,7 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
int
k
,
m
,
n
,
i
,
p
,
L
;
int
ldam
,
ldqm
,
ldqp
,
lddm
;
int
tempmm
,
tempnn
,
tempkmin
,
tempkn
;
int
ib
,
minMT
;
int
*
tiles
;
int
ib
,
nbgeqrt
,
node
,
nbtiles
,
*
tiles
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
...
...
@@ -53,12 +53,6 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
ib
=
CHAMELEON_IB
;
if
(
A
->
m
>
A
->
n
)
{
minMT
=
A
->
nt
;
}
else
{
minMT
=
A
->
mt
;
}
if
(
D
==
NULL
)
{
D
=
A
;
genD
=
0
;
...
...
@@ -66,47 +60,44 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
/*
* zunmqr = A->nb * ib
* zt
s
mqr = A->nb * ib
* zt
p
mqr = A->nb * ib