Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Chameleon
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
a9a28b6d
Commit
a9a28b6d
authored
Dec 20, 2018
by
Mathieu Faverge
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'descriptors' into 'master'
Factorize a bit the descriptors functions See merge request
solverstack/chameleon!133
parents
7a5f42b5
05f4bc11
Changes
33
Hide whitespace changes
Inline
Side-by-side
Showing
33 changed files
with
435 additions
and
564 deletions
+435
-564
compute/pzhetrd_he2hb.c
compute/pzhetrd_he2hb.c
+11
-14
compute/pzlange.c
compute/pzlange.c
+79
-72
compute/pzlansy.c
compute/pzlansy.c
+65
-61
compute/zgelqf.c
compute/zgelqf.c
+1
-2
compute/zgelqf_param.c
compute/zgelqf_param.c
+1
-2
compute/zgelqs.c
compute/zgelqs.c
+1
-2
compute/zgelqs_param.c
compute/zgelqs_param.c
+1
-2
compute/zgels.c
compute/zgels.c
+1
-2
compute/zgels_param.c
compute/zgels_param.c
+1
-2
compute/zgeqrf.c
compute/zgeqrf.c
+1
-2
compute/zgeqrf_param.c
compute/zgeqrf_param.c
+1
-2
compute/zgeqrs.c
compute/zgeqrs.c
+1
-2
compute/zgeqrs_param.c
compute/zgeqrs_param.c
+1
-2
compute/zgesv_incpiv.c
compute/zgesv_incpiv.c
+1
-2
compute/zgesvd.c
compute/zgesvd.c
+9
-10
compute/zgetrf_incpiv.c
compute/zgetrf_incpiv.c
+1
-2
compute/zheevd.c
compute/zheevd.c
+2
-3
compute/zhetrd.c
compute/zhetrd.c
+9
-11
compute/ztile.c
compute/ztile.c
+4
-4
compute/ztpgqrt.c
compute/ztpgqrt.c
+1
-2
compute/zunglq.c
compute/zunglq.c
+1
-2
compute/zunglq_param.c
compute/zunglq_param.c
+1
-2
compute/zungqr.c
compute/zungqr.c
+1
-2
compute/zungqr_param.c
compute/zungqr_param.c
+1
-2
compute/zunmlq.c
compute/zunmlq.c
+1
-2
compute/zunmlq_param.c
compute/zunmlq_param.c
+1
-2
compute/zunmqr.c
compute/zunmqr.c
+1
-2
compute/zunmqr_param.c
compute/zunmqr_param.c
+1
-2
control/compute_z.h
control/compute_z.h
+49
-52
control/descriptor.c
control/descriptor.c
+155
-238
control/descriptor.h
control/descriptor.h
+21
-17
control/workspace.c
control/workspace.c
+3
-40
include/chameleon/constants.h
include/chameleon/constants.h
+7
-0
No files found.
compute/pzhetrd_he2hb.c
View file @
a9a28b6d
...
...
@@ -23,9 +23,9 @@
#define A(m, n) A, m, n
#define T(m, n) T, m, n
#define D(k) D, (k)-1, 0
#define D(k)
&
D, (k)-1, 0
#define AT(k) AT, k, 0
#define AT(k)
&
AT, k, 0
#if defined(CHAMELEON_COPY_DIAG)
#define E(m, n) E, m, 0
...
...
@@ -42,8 +42,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
{
CHAM_context_t
*
chamctxt
;
RUNTIME_option_t
options
;
CHAM_desc_t
*
D
=
NULL
;
CHAM_desc_t
*
AT
=
NULL
;
CHAM_desc_t
D
;
CHAM_desc_t
AT
;
size_t
ws_worker
=
0
;
size_t
ws_host
=
0
;
...
...
@@ -87,15 +87,12 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
RUNTIME_options_ws_alloc
(
&
options
,
ws_worker
,
ws_host
);
/* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
D
=
(
CHAM_desc_t
*
)
malloc
(
sizeof
(
CHAM_desc_t
));
chameleon_zdesc_alloc_diag
(
*
D
,
A
->
mb
,
A
->
nb
,
chameleon_min
(
A
->
m
,
A
->
n
)
-
A
->
mb
,
A
->
nb
,
0
,
0
,
chameleon_min
(
A
->
m
,
A
->
n
)
-
A
->
mb
,
A
->
nb
,
A
->
p
,
A
->
q
);
chameleon_zdesc_alloc_diag
(
&
D
,
A
->
mb
,
A
->
m
,
A
->
n
,
A
->
p
,
A
->
q
);
AT
=
(
CHAM_desc_t
*
)
malloc
(
sizeof
(
CHAM_desc_t
));
*
AT
=
chameleon_desc_init
(
ChamComplexDouble
,
A
->
mb
,
A
->
nb
,
(
A
->
mb
*
A
->
nb
),
chameleon_min
(
A
->
mt
,
A
->
nt
)
*
A
->
mb
,
A
->
nb
,
0
,
0
,
chameleon_min
(
A
->
mt
,
A
->
nt
)
*
A
->
mb
,
A
->
nb
,
1
,
1
);
chameleon_desc_mat_alloc
(
AT
);
RUNTIME_desc_create
(
AT
);
chameleon_desc_init
(
&
AT
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamComplexDouble
,
A
->
mb
,
A
->
nb
,
(
A
->
mb
*
A
->
nb
),
chameleon_min
(
A
->
mt
,
A
->
nt
)
*
A
->
mb
,
A
->
nb
,
0
,
0
,
chameleon_min
(
A
->
mt
,
A
->
nt
)
*
A
->
mb
,
A
->
nb
,
1
,
1
,
NULL
,
NULL
,
NULL
);
/* Let's extract the diagonal in a temporary copy that contains A and A' */
for
(
k
=
1
;
k
<
A
->
nt
;
k
++
){
...
...
@@ -437,8 +434,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
RUNTIME_options_finalize
(
&
options
,
chamctxt
);
CHAMELEON_Sequence_Wait
(
sequence
);
CHAMELEON_Desc_D
estroy
(
&
D
);
CHAMELEON_Desc_D
estroy
(
&
AT
);
chameleon_desc_d
estroy
(
&
D
);
chameleon_desc_d
estroy
(
&
AT
);
(
void
)
E
;
}
compute/pzlange.c
View file @
a9a28b6d
...
...
@@ -26,9 +26,8 @@
//WS_ADD : A->mb + A->nb
#include "control/common.h"
#define A(m, n) A, (m), (n)
#define Wcol(m, n) Wcol, (m), (n)
#define Welt(m, n) Welt, (m), (n)
#define A( m, n ) A, (m), (n)
#define W( desc, m, n ) (desc), (m), (n)
static
inline
void
chameleon_pzlange_one
(
cham_uplo_t
uplo
,
cham_diag_t
diag
,
CHAM_desc_t
*
A
,
...
...
@@ -63,21 +62,21 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
INSERT_TASK_ztrasm
(
options
,
ChamColumnwise
,
uplo
,
diag
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
}
else
{
INSERT_TASK_dzasum
(
options
,
ChamColumnwise
,
ChamUpperLower
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
}
if
(
m
>=
P
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempnn
,
1
,
A
->
nb
,
1
.
0
,
W
col
(
m
,
n
),
tempnn
,
1
.
0
,
W
col
(
m
%
P
,
n
),
tempnn
);
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
m
%
P
,
n
),
1
);
}
}
...
...
@@ -88,15 +87,15 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for
(
m
=
1
;
m
<
P
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempnn
,
1
,
A
->
nb
,
1
.
0
,
W
col
(
m
,
n
),
tempnn
,
1
.
0
,
W
col
(
0
,
n
),
tempnn
);
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
0
,
n
),
1
);
}
INSERT_TASK_dlange
(
options
,
ChamMaxNorm
,
tempnn
,
1
,
A
->
nb
,
W
col
(
0
,
n
),
tempnn
,
Welt
(
0
,
n
));
ChamMaxNorm
,
1
,
tempnn
,
A
->
nb
,
W
(
Wcol
,
0
,
n
),
1
,
W
(
Welt
,
0
,
n
));
}
/**
...
...
@@ -106,7 +105,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for
(
n
=
Q
;
n
<
NT
;
n
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
0
,
n
),
Welt
(
0
,
n
%
Q
)
);
W
(
Welt
,
0
,
n
),
W
(
Welt
,
0
,
n
%
Q
)
);
}
/**
...
...
@@ -116,7 +115,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
0
,
n
),
Welt
(
0
,
0
)
);
W
(
Welt
,
0
,
n
),
W
(
Welt
,
0
,
0
)
);
}
}
...
...
@@ -153,21 +152,21 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
INSERT_TASK_ztrasm
(
options
,
ChamRowwise
,
uplo
,
diag
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
}
else
{
INSERT_TASK_dzasum
(
options
,
ChamRowwise
,
ChamUpperLower
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
}
if
(
n
>=
Q
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
col
(
m
,
n
),
tempmm
,
1
.
0
,
W
col
(
m
,
n
%
Q
),
tempmm
);
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
}
}
...
...
@@ -179,14 +178,14 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
col
(
m
,
n
),
tempmm
,
1
.
0
,
W
col
(
m
,
0
),
tempmm
);
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
}
INSERT_TASK_dlange
(
options
,
ChamMaxNorm
,
tempmm
,
1
,
A
->
nb
,
W
col
(
m
,
0
),
1
,
Welt
(
m
,
0
));
W
(
Wcol
,
m
,
0
),
1
,
W
(
Welt
,
m
,
0
));
}
/**
...
...
@@ -196,7 +195,7 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for
(
m
=
P
;
m
<
MT
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
m
%
P
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
m
%
P
,
0
)
);
}
/**
...
...
@@ -206,7 +205,7 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
0
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
0
,
0
)
);
}
}
...
...
@@ -242,19 +241,19 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
INSERT_TASK_zlantr
(
options
,
ChamMaxNorm
,
uplo
,
diag
,
tempmm
,
tempnn
,
A
->
nb
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
));
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
));
}
else
{
INSERT_TASK_zlange
(
options
,
ChamMaxNorm
,
tempmm
,
tempnn
,
A
->
nb
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
));
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
));
}
if
(
n
>=
Q
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
n
%
Q
)
);
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
n
%
Q
)
);
}
}
...
...
@@ -265,7 +264,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
0
)
);
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
0
)
);
}
}
...
...
@@ -276,7 +275,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for
(
m
=
P
;
m
<
MT
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
m
%
P
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
m
%
P
,
0
)
);
}
/**
...
...
@@ -286,7 +285,7 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
0
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
0
,
0
)
);
}
}
...
...
@@ -322,18 +321,18 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
INSERT_TASK_ztrssq
(
options
,
uplo
,
diag
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
)
);
}
else
{
INSERT_TASK_zgessq
(
options
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
)
);
}
if
(
n
>=
Q
)
{
INSERT_TASK_dplssq
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
n
%
Q
)
);
options
,
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
n
%
Q
)
);
}
}
...
...
@@ -343,7 +342,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dplssq
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
0
)
);
options
,
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
0
)
);
}
}
...
...
@@ -353,7 +352,7 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for
(
m
=
P
;
m
<
MT
;
m
++
)
{
INSERT_TASK_dplssq
(
options
,
W
elt
(
m
,
0
),
Welt
(
m
%
P
,
0
)
);
options
,
W
(
Welt
,
m
,
0
),
W
(
Welt
,
m
%
P
,
0
)
);
}
/**
...
...
@@ -362,11 +361,11 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
*/
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dplssq
(
options
,
W
elt
(
m
,
0
),
Welt
(
0
,
0
)
);
options
,
W
(
Welt
,
m
,
0
),
W
(
Welt
,
0
,
0
)
);
}
INSERT_TASK_dplssq2
(
options
,
W
elt
(
0
,
0
)
);
options
,
W
(
Welt
,
0
,
0
)
);
}
/**
...
...
@@ -378,13 +377,13 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
{
CHAM_context_t
*
chamctxt
;
RUNTIME_option_t
options
;
CHAM_desc_t
*
Wcol
=
NULL
;
CHAM_desc_t
*
Welt
=
NULL
;
CHAM_desc_t
Wcol
;
CHAM_desc_t
Welt
;
double
alpha
=
0
.
0
;
double
beta
=
0
.
0
;
int
workn
,
workmt
,
worknt
;
int
m
,
n
;
int
m
,
n
,
wcol_init
=
0
;
chamctxt
=
chameleon_context_self
();
if
(
sequence
->
status
!=
CHAMELEON_SUCCESS
)
{
...
...
@@ -402,11 +401,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamOneNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
CHAMELEON_Desc_Create
(
&
Wcol
,
NULL
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
workmt
,
worknt
*
A
->
nb
,
0
,
0
,
workmt
,
worknt
*
A
->
nb
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
workmt
,
worknt
*
A
->
nb
,
0
,
0
,
workmt
,
worknt
*
A
->
nb
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
CHAMELEON_Desc_Create
(
&
Welt
,
NULL
,
ChamRealDouble
,
1
,
1
,
1
,
A
->
p
,
worknt
,
0
,
0
,
A
->
p
,
worknt
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
A
->
p
,
worknt
,
0
,
0
,
A
->
p
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
break
;
...
...
@@ -416,11 +418,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamInfNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
A
->
mb
,
0
);
CHAMELEON_Desc_Create
(
&
Wcol
,
NULL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
CHAMELEON_Desc_Create
(
&
Welt
,
NULL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
A
->
q
,
0
,
0
,
workmt
,
A
->
q
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
A
->
q
,
0
,
0
,
workmt
,
A
->
q
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
break
;
/*
...
...
@@ -430,8 +435,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
alpha
=
1
.;
CHAMELEON_Desc_Create
(
&
Welt
,
NULL
,
ChamRealDouble
,
2
,
1
,
2
,
workmt
*
2
,
workn
,
0
,
0
,
workmt
*
2
,
workn
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
2
,
1
,
2
,
workmt
*
2
,
workn
,
0
,
0
,
workmt
*
2
,
workn
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
break
;
/*
...
...
@@ -441,8 +447,9 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
default:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
CHAMELEON_Desc_Create
(
&
Welt
,
NULL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
workn
,
0
,
0
,
workmt
,
workn
,
A
->
p
,
A
->
q
);
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
workn
,
0
,
0
,
workmt
,
workn
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
}
/* Initialize workspaces */
...
...
@@ -450,44 +457,44 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
(
norm
==
ChamOneNorm
)
)
{
/* Initialize Wcol tile */
for
(
m
=
0
;
m
<
Wcol
->
mt
;
m
++
)
{
for
(
n
=
0
;
n
<
Wcol
->
nt
;
n
++
)
{
for
(
m
=
0
;
m
<
Wcol
.
mt
;
m
++
)
{
for
(
n
=
0
;
n
<
Wcol
.
nt
;
n
++
)
{
INSERT_TASK_dlaset
(
&
options
,
ChamUpperLower
,
Wcol
->
mb
,
Wcol
->
nb
,
ChamUpperLower
,
Wcol
.
mb
,
Wcol
.
nb
,
alpha
,
beta
,
W
col
(
m
,
n
),
Wcol
->
mb
);
W
(
&
Wcol
,
m
,
n
),
Wcol
.
mb
);
}
}
}
for
(
m
=
0
;
m
<
Welt
->
mt
;
m
++
)
{
for
(
n
=
0
;
n
<
Welt
->
nt
;
n
++
)
{
for
(
m
=
0
;
m
<
Welt
.
mt
;
m
++
)
{
for
(
n
=
0
;
n
<
Welt
.
nt
;
n
++
)
{
INSERT_TASK_dlaset
(
&
options
,
ChamUpperLower
,
Welt
->
mb
,
Welt
->
nb
,
ChamUpperLower
,
Welt
.
mb
,
Welt
.
nb
,
alpha
,
beta
,
W
elt
(
m
,
n
),
Welt
->
mb
);
W
(
&
Welt
,
m
,
n
),
Welt
.
mb
);
}
}
switch
(
norm
)
{
case
ChamOneNorm
:
chameleon_pzlange_one
(
uplo
,
diag
,
A
,
Wcol
,
Welt
,
&
options
);
CHAMELEON_Desc_Flush
(
Wcol
,
sequence
);
chameleon_pzlange_one
(
uplo
,
diag
,
A
,
&
Wcol
,
&
Welt
,
&
options
);
CHAMELEON_Desc_Flush
(
&
Wcol
,
sequence
);
break
;
case
ChamInfNorm
:
chameleon_pzlange_inf
(
uplo
,
diag
,
A
,
Wcol
,
Welt
,
&
options
);
CHAMELEON_Desc_Flush
(
Wcol
,
sequence
);
chameleon_pzlange_inf
(
uplo
,
diag
,
A
,
&
Wcol
,
&
Welt
,
&
options
);
CHAMELEON_Desc_Flush
(
&
Wcol
,
sequence
);
break
;
case
ChamFrobeniusNorm
:
chameleon_pzlange_frb
(
uplo
,
diag
,
A
,
Welt
,
&
options
);
chameleon_pzlange_frb
(
uplo
,
diag
,
A
,
&
Welt
,
&
options
);
break
;
case
ChamMaxNorm
:
default:
chameleon_pzlange_max
(
uplo
,
diag
,
A
,
Welt
,
&
options
);
chameleon_pzlange_max
(
uplo
,
diag
,
A
,
&
Welt
,
&
options
);
}
/**
...
...
@@ -499,20 +506,20 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
INSERT_TASK_dlacpy
(
&
options
,
ChamUpperLower
,
1
,
1
,
1
,
W
elt
(
0
,
0
),
1
,
Welt
(
m
,
n
),
1
);
W
(
&
Welt
,
0
,
0
),
1
,
W
(
&
Welt
,
m
,
n
),
1
);
}
}
}
CHAMELEON_Desc_Flush
(
Welt
,
sequence
);
CHAMELEON_Desc_Flush
(
&
Welt
,
sequence
);
RUNTIME_sequence_wait
(
chamctxt
,
sequence
);
*
result
=
*
(
double
*
)
Welt
->
get_blkaddr
(
Welt
,
A
->
myrank
/
A
->
q
,
A
->
myrank
%
A
->
q
);
*
result
=
*
(
double
*
)
Welt
.
get_blkaddr
(
&
Welt
,
A
->
myrank
/
A
->
q
,
A
->
myrank
%
A
->
q
);
if
(
Wcol
!=
NULL
)
{
CHAMELEON_Desc_D
estroy
(
&
Wcol
);
if
(
wcol_init
)
{
chameleon_desc_d
estroy
(
&
Wcol
);
}
CHAMELEON_Desc_D
estroy
(
&
Welt
);
chameleon_desc_d
estroy
(
&
Welt
);
RUNTIME_options_ws_free
(
&
options
);
RUNTIME_options_finalize
(
&
options
,
chamctxt
);
...
...
compute/pzlansy.c
View file @
a9a28b6d
...
...
@@ -27,9 +27,8 @@
#include <math.h>
#include "control/common.h"
#define A(m, n) A, (m), (n)
#define Wcol(m, n) Wcol, (m), (n)
#define Welt(m, n) Welt, (m), (n)
#define A( m, n ) A, (m), (n)
#define W( desc, m, n ) (desc), (m), (n)
static
inline
void
chameleon_pzlansy_inf
(
cham_uplo_t
uplo
,
CHAM_desc_t
*
A
,
...
...
@@ -62,18 +61,18 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_dzasum
(
options
,
ChamRowwise
,
uplo
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
}
else
{
INSERT_TASK_dzasum
(
options
,
ChamRowwise
,
ChamUpperLower
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
m
,
n
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
m
,
n
)
);
INSERT_TASK_dzasum
(
options
,
ChamColumnwise
,
ChamUpperLower
,
tempmm
,
tempnn
,
A
(
m
,
n
),
ldam
,
W
col
(
n
,
m
)
);
A
(
m
,
n
),
ldam
,
W
(
Wcol
,
n
,
m
)
);
}
}
}
...
...
@@ -85,8 +84,8 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
nb
,
1
.
0
,
W
col
(
m
,
n
),
tempmm
,
1
.
0
,
W
col
(
m
,
n
%
Q
),
tempmm
);
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
}
/**
...
...
@@ -97,34 +96,34 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
col
(
m
,
n
),
tempmm
,
1
.
0
,
W
col
(
m
,
0
),
tempmm
);
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
}
INSERT_TASK_dlange
(
options
,
ChamMaxNorm
,
tempmm
,
1
,
A
->
nb
,
W
col
(
m
,
0
),
1
,
Welt
(
m
,
0
));
W
(
Wcol
,
m
,
0
),
1
,
W
(
Welt
,
m
,
0
));
}
/**
* Step 3:
* For m in 0..P-1, W
elt(
m, n) = max( Wcol(m..mt[P], n ) )
* For m in 0..P-1, W
( Welt,
m, n) = max( Wcol(m..mt[P], n ) )
*/
for
(
m
=
P
;
m
<
MT
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
m
%
P
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
m
%
P
,
0
)
);
}
/**
* Step 4:
* For each i, W
elt(i, n) = max( Welt(
0..P-1, n) )
* For each i, W
( Welt, i, n) = max( W( Welt,
0..P-1, n) )
*/
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
0
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
0
,
0
)
);
}
}
...
...
@@ -159,26 +158,26 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
INSERT_TASK_zlanhe
(
options
,
ChamMaxNorm
,
uplo
,
tempmm
,
A
->
nb
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
));
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
));
}
else
{
INSERT_TASK_zlansy
(
options
,
ChamMaxNorm
,
uplo
,
tempmm
,
A
->
nb
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
));
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
));
}
}
else
{
INSERT_TASK_zlange
(
options
,
ChamMaxNorm
,
tempmm
,
tempnn
,
A
->
nb
,
A
(
m
,
n
),
ldam
,
W
elt
(
m
,
n
));
A
(
m
,
n
),
ldam
,
W
(
Welt
,
m
,
n
));
}
if
(
n
>=
Q
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
n
%
Q
)
);
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
n
%
Q
)
);
}
}
...
...
@@ -189,7 +188,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
n
),
Welt
(
m
,
0
)
);
W
(
Welt
,
m
,
n
),
W
(
Welt
,
m
,
0
)
);
}
}
...
...
@@ -200,7 +199,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
for
(
m
=
P
;
m
<
MT
;
m
++
)
{
INSERT_TASK_dlange_max
(
options
,
W
elt
(
m
,
0
),
Welt
(
m
%
P
,
0
)
);
W
(
Welt
,
m
,
0
),
W
(
Welt
,
m
%
P
,
0
)
);
}
/**
...
...
@@ -210,7 +209,7 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
for
(
m
=
1
;
m
<
P
;
m
++
)
{