Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
PaStiX
Commits
847b6b7b
Commit
847b6b7b
authored
Dec 14, 2016
by
RAMET Pierre
Browse files
solve smp v0
parent
0e1b5718
Changes
9
Hide whitespace changes
Inline
Side-by-side
common/isched.c
View file @
847b6b7b
...
...
@@ -179,7 +179,7 @@ isched_parallel_section(isched_thread_t *ctx)
switch
(
action
)
{
case
ISCHED_ACT_PARALLEL
:
isched
->
pfunc
(
ctx
->
rank
,
isched
->
pargs
);
isched
->
pfunc
(
ctx
,
isched
->
pargs
);
break
;
case
ISCHED_ACT_FINALIZE
:
return
isched_thread_destroy
(
ctx
);
...
...
common/isched.h
View file @
847b6b7b
...
...
@@ -83,7 +83,7 @@ int isched_topo_unbind();
int
isched_topo_world_size
();
static
inline
void
isched_parallel_call
(
isched_t
*
isched
,
void
(
*
func
)(
i
nt
,
void
*
),
void
*
args
)
isched_parallel_call
(
isched_t
*
isched
,
void
(
*
func
)(
i
sched_thread_t
*
,
void
*
),
void
*
args
)
{
pthread_mutex_lock
(
&
isched
->
statuslock
);
isched
->
pfunc
=
func
;
...
...
@@ -93,7 +93,7 @@ isched_parallel_call( isched_t *isched, void (*func)(int, void*), void *args )
pthread_cond_broadcast
(
&
isched
->
statuscond
);
isched_barrier_wait
(
&
(
isched
->
barrier
)
);
isched
->
status
=
ISCHED_ACT_STAND_BY
;
func
(
isched
->
master
->
rank
,
args
);
func
(
isched
->
master
,
args
);
isched_barrier_wait
(
&
(
isched
->
barrier
)
);
}
...
...
kernels/core_ztrsmsp.c
View file @
847b6b7b
...
...
@@ -658,7 +658,8 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
A
=
(
pastix_complex64_t
*
)(
fcbk
->
ucoeftab
);
lda
=
(
fcbk
->
cblktype
&
CBLK_SPLIT
)
?
tempn
:
fcbk
->
stride
;
if
(
!
(
fcbk
->
cblktype
&
CBLK_DENSE
))
{
pastix_cblk_lock
(
fcbk
);
if
(
!
(
fcbk
->
cblktype
&
CBLK_DENSE
)
)
{
lrA
=
blok
->
LRblock
+
1
;
switch
(
lrA
->
rk
){
...
...
@@ -699,6 +700,8 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
b
+
cblk
->
lcolidx
+
blok
->
frownum
-
cblk
->
fcolnum
,
ldb
,
CBLAS_SADDR
(
zone
),
b
+
fcbk
->
lcolidx
,
ldb
);
}
pastix_cblk_unlock
(
fcbk
);
pastix_atomic_dec_32b
(
&
(
fcbk
->
ctrbcnt
)
);
}
}
/*
...
...
@@ -739,12 +742,16 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
lda
=
(
cblk
->
cblktype
&
CBLK_SPLIT
)
?
tempm
:
cblk
->
stride
;
pastix_cblk_lock
(
fcbk
);
cblas_zgemm
(
CblasColMajor
,
CblasNoTrans
,
CblasNoTrans
,
tempm
,
nrhs
,
tempn
,
CBLAS_SADDR
(
mzone
),
A
+
blok
->
coefind
,
lda
,
b
+
cblk
->
lcolidx
,
ldb
,
CBLAS_SADDR
(
zone
),
b
+
fcbk
->
lcolidx
+
blok
->
frownum
-
fcbk
->
fcolnum
,
ldb
);
pastix_cblk_unlock
(
fcbk
);
pastix_atomic_dec_32b
(
&
(
fcbk
->
ctrbcnt
)
);
}
}
else
{
...
...
@@ -767,7 +774,7 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
assert
(
blok
->
frownum
>=
fcbk
->
fcolnum
);
assert
(
tempm
<=
(
fcbk
->
lcolnum
-
fcbk
->
fcolnum
+
1
));
pastix_cblk_lock
(
fcbk
);
switch
(
lrA
->
rk
){
case
0
:
break
;
...
...
@@ -799,6 +806,8 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
memFree_null
(
tmp
);
}
pastix_cblk_unlock
(
fcbk
);
pastix_atomic_dec_32b
(
&
(
fcbk
->
ctrbcnt
)
);
}
}
}
...
...
@@ -831,6 +840,7 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
A
=
(
pastix_complex64_t
*
)(
fcbk
->
lcoeftab
);
lda
=
(
fcbk
->
cblktype
&
CBLK_SPLIT
)
?
tempn
:
fcbk
->
stride
;
pastix_cblk_lock
(
fcbk
);
if
(
!
(
fcbk
->
cblktype
&
CBLK_DENSE
))
{
lrA
=
blok
->
LRblock
;
...
...
@@ -872,6 +882,8 @@ void solve_ztrsmsp( int side, int uplo, int trans, int diag,
b
+
cblk
->
lcolidx
+
blok
->
frownum
-
cblk
->
fcolnum
,
ldb
,
CBLAS_SADDR
(
zone
),
b
+
fcbk
->
lcolidx
,
ldb
);
}
pastix_cblk_unlock
(
fcbk
);
pastix_atomic_dec_32b
(
&
(
fcbk
->
ctrbcnt
)
);
}
}
}
...
...
sopalin/coeftab.c
View file @
847b6b7b
...
...
@@ -65,7 +65,7 @@ struct coeftabinit_s {
* assigned to each thread.)
*/
void
pcoeftabInit
(
i
nt
rank
,
void
*
args
)
pcoeftabInit
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
struct
coeftabinit_s
*
ciargs
=
(
struct
coeftabinit_s
*
)
args
;
const
SolverMatrix
*
datacode
=
ciargs
->
datacode
;
...
...
@@ -74,6 +74,7 @@ pcoeftabInit( int rank, void *args )
int
factoLU
=
ciargs
->
factoLU
;
pastix_int_t
i
,
itercblk
;
pastix_int_t
task
;
int
rank
=
ctx
->
rank
;
void
(
*
initfunc
)(
const
SolverMatrix
*
,
const
pastix_bcsc_t
*
,
pastix_int_t
,
...
...
sopalin/sequential_zgetrf.c
View file @
847b6b7b
...
...
@@ -62,7 +62,7 @@ sequential_zgetrf( pastix_data_t *pastix_data,
}
void
thread_pzgetrf
(
i
nt
rank
,
void
*
args
)
thread_pzgetrf
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
sopalin_data_t
*
sopalin_data
=
(
sopalin_data_t
*
)
args
;
SolverMatrix
*
datacode
=
sopalin_data
->
solvmtx
;
...
...
@@ -71,6 +71,7 @@ thread_pzgetrf( int rank, void *args )
pastix_complex64_t
*
work
;
pastix_int_t
i
,
ii
;
pastix_int_t
tasknbr
,
*
tasktab
;
int
rank
=
ctx
->
rank
;
MALLOC_INTERN
(
work
,
datacode
->
gemmmax
,
pastix_complex64_t
);
...
...
@@ -90,11 +91,11 @@ thread_pzgetrf( int rank, void *args )
}
#if defined(PASTIX_DEBUG_FACTO) && 0
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
if
(
rank
==
0
)
{
coeftab_zdump
(
datacode
,
"getrf_L.txt"
);
}
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
#endif
memFree_null
(
work
);
...
...
sopalin/sequential_zhetrf.c
View file @
847b6b7b
...
...
@@ -59,7 +59,7 @@ sequential_zhetrf( pastix_data_t *pastix_data,
}
void
thread_pzhetrf
(
i
nt
rank
,
void
*
args
)
thread_pzhetrf
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
sopalin_data_t
*
sopalin_data
=
(
sopalin_data_t
*
)
args
;
SolverMatrix
*
datacode
=
sopalin_data
->
solvmtx
;
...
...
@@ -68,6 +68,7 @@ thread_pzhetrf( int rank, void *args )
pastix_complex64_t
*
work1
,
*
work2
;
pastix_int_t
i
,
ii
;
pastix_int_t
tasknbr
,
*
tasktab
;
int
rank
=
ctx
->
rank
;
MALLOC_INTERN
(
work1
,
pastix_imax
(
datacode
->
gemmmax
,
datacode
->
diagmax
),
pastix_complex64_t
);
...
...
@@ -90,11 +91,11 @@ thread_pzhetrf( int rank, void *args )
}
#if defined(PASTIX_DEBUG_FACTO) && 0
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
if
(
rank
==
0
)
{
coeftab_zdump
(
datacode
,
"hetrf_L.txt"
);
}
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
#endif
memFree_null
(
work1
);
...
...
sopalin/sequential_zpotrf.c
View file @
847b6b7b
...
...
@@ -59,7 +59,7 @@ sequential_zpotrf( pastix_data_t *pastix_data,
}
void
thread_pzpotrf
(
i
nt
rank
,
void
*
args
)
thread_pzpotrf
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
sopalin_data_t
*
sopalin_data
=
(
sopalin_data_t
*
)
args
;
SolverMatrix
*
datacode
=
sopalin_data
->
solvmtx
;
...
...
@@ -68,6 +68,7 @@ thread_pzpotrf( int rank, void *args )
pastix_complex64_t
*
work
;
pastix_int_t
i
,
ii
;
pastix_int_t
tasknbr
,
*
tasktab
;
int
rank
=
ctx
->
rank
;
MALLOC_INTERN
(
work
,
datacode
->
gemmmax
,
pastix_complex64_t
);
...
...
@@ -87,11 +88,11 @@ thread_pzpotrf( int rank, void *args )
}
#if defined(PASTIX_DEBUG_FACTO) && 0
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
if
(
rank
==
0
)
{
coeftab_zdump
(
datacode
,
"potrf_L.txt"
);
}
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
#endif
memFree_null
(
work
);
...
...
sopalin/sequential_zsytrf.c
View file @
847b6b7b
...
...
@@ -59,7 +59,7 @@ sequential_zsytrf( pastix_data_t *pastix_data,
}
void
thread_pzsytrf
(
i
nt
rank
,
void
*
args
)
thread_pzsytrf
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
sopalin_data_t
*
sopalin_data
=
(
sopalin_data_t
*
)
args
;
SolverMatrix
*
datacode
=
sopalin_data
->
solvmtx
;
...
...
@@ -68,6 +68,7 @@ thread_pzsytrf( int rank, void *args )
pastix_complex64_t
*
work1
,
*
work2
;
pastix_int_t
i
,
ii
;
pastix_int_t
tasknbr
,
*
tasktab
;
int
rank
=
ctx
->
rank
;
MALLOC_INTERN
(
work1
,
pastix_imax
(
datacode
->
gemmmax
,
datacode
->
diagmax
),
pastix_complex64_t
);
...
...
@@ -90,11 +91,11 @@ thread_pzsytrf( int rank, void *args )
}
#if defined(PASTIX_DEBUG_FACTO) && 0
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
if
(
rank
==
0
)
{
coeftab_zdump
(
datacode
,
"sytrf_L.txt"
);
}
isched_barrier_wait
(
&
(
((
isched_t
*
)(
sopalin_data
->
sched
))
->
barrier
)
);
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
#endif
memFree_null
(
work1
);
...
...
sopalin/sequential_ztrsm.c
View file @
847b6b7b
...
...
@@ -45,47 +45,30 @@ sequential_ztrsm( pastix_data_t *pastix_data, int side, int uplo, int trans, int
pastix_int_t
i
;
(
void
)
pastix_data
;
if
(
side
==
PastixLeft
)
{
if
(
uplo
==
PastixUpper
)
{
/*
* Left / Upper / NoTrans
*/
if
(
trans
==
PastixNoTrans
)
{
cblk
=
datacode
->
cblktab
+
datacode
->
cblknbr
-
1
;
for
(
i
=
0
;
i
<
datacode
->
cblknbr
;
i
++
,
cblk
--
){
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
/* We store U^t, so we swap uplo and trans */
}
else
{
/*
* Left / Lower / NoTrans
*/
if
(
trans
==
PastixNoTrans
)
{
cblk
=
datacode
->
cblktab
;
for
(
i
=
0
;
i
<
datacode
->
cblknbr
;
i
++
,
cblk
++
){
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
/*
* Left / Lower / [Conj]Trans
*/
else
{
cblk
=
datacode
->
cblktab
+
datacode
->
cblknbr
-
1
;
for
(
i
=
0
;
i
<
datacode
->
cblknbr
;
i
++
,
cblk
--
){
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
if
(
(
(
side
==
PastixLeft
)
&&
(
uplo
==
PastixUpper
)
&&
(
trans
==
PastixNoTrans
)
)
||
(
(
side
==
PastixLeft
)
&&
(
uplo
==
PastixLower
)
&&
(
trans
!=
PastixNoTrans
)
)
||
(
(
side
==
PastixRight
)
&&
(
uplo
==
PastixUpper
)
&&
(
trans
!=
PastixNoTrans
)
)
||
(
(
side
==
PastixRight
)
&&
(
uplo
==
PastixLower
)
&&
(
trans
==
PastixNoTrans
)
)
)
{
cblk
=
datacode
->
cblktab
+
datacode
->
cblknbr
-
1
;
for
(
i
=
0
;
i
<
datacode
->
cblknbr
;
i
++
,
cblk
--
){
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
/**
* Right
*/
else
{
else
/**
* ( (side == PastixRight) && (uplo == PastixUpper) && (trans == PastixNoTrans) ) ||
* ( (side == PastixRight) && (uplo == PastixLower) && (trans != PastixNoTrans) ) ||
* ( (side == PastixLeft) && (uplo == PastixUpper) && (trans != PastixNoTrans) ) ||
* ( (side == PastixLeft) && (uplo == PastixLower) && (trans == PastixNoTrans) )
*/
{
cblk
=
datacode
->
cblktab
;
for
(
i
=
0
;
i
<
datacode
->
cblknbr
;
i
++
,
cblk
++
){
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
}
...
...
@@ -99,7 +82,7 @@ struct args_ztrsm_t
};
void
thread_pztrsm
(
i
nt
rank
,
void
*
args
)
thread_pztrsm
(
i
sched_thread_t
*
ctx
,
void
*
args
)
{
struct
args_ztrsm_t
*
arg
=
(
struct
args_ztrsm_t
*
)
args
;
sopalin_data_t
*
sopalin_data
=
arg
->
sopalin_data
;
...
...
@@ -115,13 +98,68 @@ thread_pztrsm( int rank, void *args )
Task
*
t
;
pastix_int_t
i
,
ii
;
pastix_int_t
tasknbr
,
*
tasktab
;
int
rank
=
ctx
->
rank
;
tasknbr
=
datacode
->
ttsknbr
[
rank
];
tasktab
=
datacode
->
ttsktab
[
rank
];
/* try in sequential */
if
(
!
rank
)
sequential_ztrsm
(
NULL
,
side
,
uplo
,
trans
,
diag
,
sopalin_data
,
nrhs
,
b
,
ldb
);
/* Backward like */
if
(
(
(
side
==
PastixLeft
)
&&
(
uplo
==
PastixUpper
)
&&
(
trans
==
PastixNoTrans
)
)
||
(
(
side
==
PastixLeft
)
&&
(
uplo
==
PastixLower
)
&&
(
trans
!=
PastixNoTrans
)
)
||
(
(
side
==
PastixRight
)
&&
(
uplo
==
PastixUpper
)
&&
(
trans
!=
PastixNoTrans
)
)
||
(
(
side
==
PastixRight
)
&&
(
uplo
==
PastixLower
)
&&
(
trans
==
PastixNoTrans
)
)
)
{
/* Init ctrbcnt in parallel */
for
(
ii
=
0
;
ii
<
tasknbr
;
ii
++
)
{
i
=
tasktab
[
ii
];
t
=
datacode
->
tasktab
+
i
;
cblk
=
datacode
->
cblktab
+
t
->
cblknum
;
cblk
->
ctrbcnt
=
(
cblk
[
1
].
fblokptr
-
cblk
[
0
].
fblokptr
)
-
1
;
}
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
for
(
ii
=
tasknbr
-
1
;
ii
>=
0
;
ii
--
)
{
i
=
tasktab
[
ii
];
t
=
datacode
->
tasktab
+
i
;
cblk
=
datacode
->
cblktab
+
t
->
cblknum
;
/* Wait */
do
{}
while
(
cblk
->
ctrbcnt
);
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
/* Forward like */
else
/**
* ( (side == PastixRight) && (uplo == PastixUpper) && (trans == PastixNoTrans) ) ||
* ( (side == PastixRight) && (uplo == PastixLower) && (trans != PastixNoTrans) ) ||
* ( (side == PastixLeft) && (uplo == PastixUpper) && (trans != PastixNoTrans) ) ||
* ( (side == PastixLeft) && (uplo == PastixLower) && (trans == PastixNoTrans) )
*/
{
/* Init ctrbcnt in parallel */
for
(
ii
=
0
;
ii
<
tasknbr
;
ii
++
)
{
i
=
tasktab
[
ii
];
t
=
datacode
->
tasktab
+
i
;
cblk
=
datacode
->
cblktab
+
t
->
cblknum
;
cblk
->
ctrbcnt
=
cblk
[
1
].
brownum
-
cblk
[
0
].
brownum
;
}
isched_barrier_wait
(
&
(
ctx
->
global_ctx
->
barrier
)
);
for
(
ii
=
0
;
ii
<
tasknbr
;
ii
++
)
{
i
=
tasktab
[
ii
];
t
=
datacode
->
tasktab
+
i
;
cblk
=
datacode
->
cblktab
+
t
->
cblknum
;
/* Wait */
do
{}
while
(
cblk
->
ctrbcnt
);
solve_ztrsmsp
(
side
,
uplo
,
trans
,
diag
,
datacode
,
cblk
,
nrhs
,
b
,
ldb
);
}
}
}
void
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment