Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
fa6d78a3
Commit
fa6d78a3
authored
Feb 08, 2019
by
Mathieu Faverge
Browse files
Merge branch 'starpu/alloc_on_the_fly' into 'master'
Starpu/alloc on the fly See merge request
solverstack/chameleon!140
parents
e932eacc
f676d524
Changes
42
Hide whitespace changes
Inline
Side-by-side
compute/pzlange.c
View file @
fa6d78a3
...
...
@@ -72,11 +72,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
}
if
(
m
>=
P
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
m
%
P
,
n
),
1
);
INSERT_TASK_daxpy
(
options
,
tempnn
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
%
P
,
n
),
1
);
}
}
...
...
@@ -85,11 +84,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
* For each i, W(i, n) = reduce( W(0..P-1, n) )
*/
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
0
,
n
),
1
);
INSERT_TASK_daxpy
(
options
,
tempnn
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
0
,
n
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -165,11 +163,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
}
if
(
n
>=
Q
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
n
%
Q
),
1
);
}
}
...
...
@@ -178,11 +175,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
0
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -407,11 +403,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamOneNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
workmt
,
worknt
*
A
->
nb
,
0
,
0
,
workmt
,
worknt
*
A
->
nb
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
/*
* Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
A
->
p
,
worknt
,
0
,
0
,
A
->
p
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
...
...
@@ -424,7 +423,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamInfNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
A
->
mb
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
...
...
@@ -522,7 +521,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
}
CHAMELEON_Desc_Flush
(
&
Welt
,
sequence
);
CHAMELEON_Desc_Flush
(
A
,
sequence
);
RUNTIME_sequence_wait
(
chamctxt
,
sequence
);
RUNTIME_sequence_wait
(
chamctxt
,
sequence
);
*
result
=
*
((
double
*
)
Welt
.
get_blkaddr
(
&
Welt
,
A
->
myrank
/
A
->
q
,
A
->
myrank
%
A
->
q
));
...
...
compute/pzlansy.c
View file @
fa6d78a3
...
...
@@ -81,11 +81,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
int
tempmm
=
(
m
==
(
MT
-
1
)
)
?
M
-
m
*
A
->
mb
:
A
->
mb
;
for
(
n
=
Q
;
n
<
NT
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
n
%
Q
),
1
);
}
/**
...
...
@@ -93,11 +92,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
0
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -334,11 +332,14 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
case
ChamInfNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
/*
* Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
A
->
q
,
0
,
0
,
workmt
,
A
->
q
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
...
...
compute/pzunmlq_param.c
View file @
fa6d78a3
...
...
@@ -466,6 +466,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
RUNTIME_data_flush
(
sequence
,
T
(
k
,
n
)
);
}
/* Restore the original location of the tiles */
for
(
m
=
0
;
m
<
B
->
mt
;
m
++
)
{
RUNTIME_data_migrate
(
sequence
,
B
(
m
,
k
),
B
->
get_rankof
(
B
,
m
,
k
)
);
}
RUNTIME_iteration_pop
(
chamctxt
);
}
}
...
...
compute/pzunmqr_param.c
View file @
fa6d78a3
...
...
@@ -467,6 +467,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
RUNTIME_data_flush
(
sequence
,
T
(
n
,
k
)
);
}
/* Restore the original location of the tiles */
for
(
m
=
0
;
m
<
B
->
mt
;
m
++
)
{
RUNTIME_data_migrate
(
sequence
,
B
(
m
,
k
),
B
->
get_rankof
(
B
,
m
,
k
)
);
}
RUNTIME_iteration_pop
(
chamctxt
);
}
}
...
...
compute/zlaset.c
View file @
fa6d78a3
...
...
@@ -266,7 +266,7 @@ int CHAMELEON_zlaset_Tile_Async( cham_uplo_t uplo,
return
chameleon_request_fail
(
sequence
,
request
,
CHAMELEON_ERR_ILLEGAL_VALUE
);
}
/* Check input arguments */
if
(
A
->
nb
!=
A
->
mb
)
{
if
(
(
alpha
!=
beta
)
&&
(
A
->
nb
!=
A
->
mb
)
)
{
chameleon_error
(
"CHAMELEON_zlaset_Tile_Async"
,
"only square tiles supported"
);
return
chameleon_request_fail
(
sequence
,
request
,
CHAMELEON_ERR_ILLEGAL_VALUE
);
}
...
...
control/compute_z.h
View file @
fa6d78a3
...
...
@@ -134,7 +134,7 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree,
static
inline
int
chameleon_zdesc_alloc_diag
(
CHAM_desc_t
*
descA
,
int
nb
,
int
m
,
int
n
,
int
p
,
int
q
)
{
int
diag_m
=
chameleon_min
(
m
,
n
);
return
chameleon_desc_init
(
descA
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
return
chameleon_desc_init
(
descA
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamComplexDouble
,
nb
,
nb
,
nb
*
nb
,
diag_m
,
nb
,
0
,
0
,
diag_m
,
nb
,
p
,
q
,
chameleon_getaddr_diag
,
...
...
@@ -145,7 +145,7 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int
#define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \
{ \
int rc; \
rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_
GLOBAL
, \
rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_
TILE
, \
ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \
(m), (n), (i), (j), (m), (n), 1, 1, \
NULL, NULL, NULL ); \
...
...
@@ -174,7 +174,7 @@ chameleon_zlap2tile( CHAM_context_t *chamctxt,
if
(
CHAMELEON_TRANSLATION
==
ChamOutOfPlace
)
{
/* Initialize the tile descriptor */
chameleon_desc_init
(
descAt
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamComplexDouble
,
mb
,
nb
,
(
mb
)
*
(
nb
),
chameleon_desc_init
(
descAt
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamComplexDouble
,
mb
,
nb
,
(
mb
)
*
(
nb
),
lm
,
ln
,
0
,
0
,
m
,
n
,
1
,
1
,
chameleon_getaddr_ccrb
,
chameleon_getblkldd_ccrb
,
NULL
);
...
...
@@ -235,6 +235,7 @@ chameleon_ztile2lap( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t
static
inline
void
chameleon_ztile2lap_cleanup
(
CHAM_context_t
*
chamctxt
,
CHAM_desc_t
*
descAl
,
CHAM_desc_t
*
descAt
)
{
(
void
)
chamctxt
;
chameleon_desc_destroy
(
descAl
);
chameleon_desc_destroy
(
descAt
);
}
...
...
control/descriptor.c
View file @
fa6d78a3
...
...
@@ -226,26 +226,32 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
/* The matrix is alocated tile by tile with out of core */
desc
->
ooc
=
0
;
// Matrix address
if
(
mat
==
CHAMELEON_MAT_ALLOC_GLOBAL
)
{
rc
=
chameleon_desc_mat_alloc
(
desc
);
switch
(
(
intptr_t
)
mat
)
{
case
(
intptr_t
)
CHAMELEON_MAT_ALLOC_TILE
:
if
(
chamctxt
->
scheduler
==
RUNTIME_SCHED_STARPU
)
{
/* Let's use the allocation on the fly as in OOC */
desc
->
get_blkaddr
=
chameleon_getaddr_null
;
desc
->
mat
=
NULL
;
break
;
}
/* Otherwise we switch back to the full allocation */
desc
->
alloc_mat
=
1
;
desc
->
use_mat
=
1
;
}
else
if
(
mat
==
CHAMELEON_MAT_ALLOC_TILE
)
{
//chameleon_error( "chameleon_desc_init", "CHAMELEON_MAT_ALLOC_TILE is not available yet" );
//desc->mat = NULL;
case
(
intptr_t
)
CHAMELEON_MAT_ALLOC_GLOBAL
:
rc
=
chameleon_desc_mat_alloc
(
desc
);
desc
->
alloc_mat
=
1
;
desc
->
use_mat
=
1
;
break
;
desc
->
alloc_mat
=
1
;
}
else
if
(
mat
==
CHAMELEON_MAT_OOC
)
{
case
(
intptr_t
)
CHAMELEON_MAT_OOC
:
if
(
chamctxt
->
scheduler
!=
RUNTIME_SCHED_STARPU
)
{
chameleon_error
(
"CHAMELEON_Desc_Create"
,
"CHAMELEON Out-of-Core descriptors are supported only with StarPU"
);
return
CHAMELEON_ERR_NOT_SUPPORTED
;
}
desc
->
mat
=
NULL
;
desc
->
ooc
=
1
;
}
else
{
break
;
default:
/* memory of the matrix is handled by users */
desc
->
mat
=
mat
;
desc
->
use_mat
=
1
;
...
...
control/workspace.c
View file @
fa6d78a3
...
...
@@ -74,7 +74,8 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM
lm
=
IB
*
MT
;
ln
=
NB
*
NT
;
return
CHAMELEON_Desc_Create
(
desc
,
NULL
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
return
CHAMELEON_Desc_Create
(
desc
,
CHAMELEON_MAT_ALLOC_TILE
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
}
/**
...
...
@@ -119,7 +120,8 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc
/* TODO: Fix the distribution for IPIV */
*
IPIV
=
(
int
*
)
malloc
(
size
);
return
CHAMELEON_Desc_Create
(
desc
,
NULL
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
return
CHAMELEON_Desc_Create
(
desc
,
CHAMELEON_MAT_ALLOC_TILE
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
}
/**
...
...
coreblas/compute/core_zgelqt.c
View file @
fa6d78a3
...
...
@@ -67,7 +67,7 @@
* The leading dimension of the array A. LDA >= max(1,M).
*
* @param[out] T
* The IB-by-
N
triangular factor T of the block reflector.
* The IB-by-
M
triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
...
...
include/chameleon/tasks_z.h
View file @
fa6d78a3
...
...
@@ -483,6 +483,8 @@ INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmlqt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
0
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -497,6 +499,8 @@ INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmqrt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
0
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -511,6 +515,8 @@ INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmlqt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
n2
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -525,6 +531,8 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmqrt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
m2
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
runtime/openmp/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -98,10 +98,13 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrA
=
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
TAU
[
ws_size
];
CHAMELEON_Complex64_t
*
work
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zgelqt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
}
}
runtime/openmp/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -99,10 +99,13 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrA
=
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
TAU
[
ws_size
];
CHAMELEON_Complex64_t
*
work
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zgeqrt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
}
}
runtime/openmp/codelets/codelet_ztplqt.c
View file @
fa6d78a3
...
...
@@ -31,9 +31,13 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrB
=
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0])
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
work
[
ws_size
];
CORE_zlaset
(
ChamUpperLower
,
ib
,
M
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_ztplqt
(
M
,
N
,
L
,
ib
,
ptrA
,
lda
,
ptrB
,
ldb
,
ptrT
,
ldt
,
work
);
}
...
...
runtime/openmp/codelets/codelet_ztpqrt.c
View file @
fa6d78a3
...
...
@@ -30,9 +30,13 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrB
=
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0])
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
tmp
[
ws_size
];
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_ztpqrt
(
M
,
N
,
L
,
ib
,
ptrA
,
lda
,
ptrB
,
ldb
,
ptrT
,
ldt
,
tmp
);
}
...
...
runtime/parsec/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -98,6 +98,7 @@ CORE_zgelqt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
m
,
&
n
,
&
ib
,
&
A
,
&
lda
,
&
T
,
&
ldt
,
&
TAU
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
(
void
)
context
;
...
...
runtime/parsec/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -99,6 +99,7 @@ CORE_zgeqrt_parsec ( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
m
,
&
n
,
&
ib
,
&
A
,
&
lda
,
&
T
,
&
ldt
,
&
TAU
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
(
void
)
context
;
...
...
runtime/parsec/codelets/codelet_ztplqt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ CORE_ztplqt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
M
,
&
N
,
&
L
,
&
ib
,
&
A
,
&
lda
,
&
B
,
&
ldb
,
&
T
,
&
ldt
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
M
,
0
.,
0
.,
T
,
ldt
);
CORE_ztplqt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
...
...
runtime/parsec/codelets/codelet_ztpqrt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ CORE_ztpqrt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
M
,
&
N
,
&
L
,
&
ib
,
&
A
,
&
lda
,
&
B
,
&
ldb
,
&
T
,
&
ldt
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
T
,
ldt
);
CORE_ztpqrt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
...
...
runtime/quark/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ void CORE_zgelqt_quark(Quark *quark)
CHAMELEON_Complex64_t
*
WORK
;
quark_unpack_args_9
(
quark
,
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
...
...
runtime/quark/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ void CORE_zgeqrt_quark(Quark *quark)
CHAMELEON_Complex64_t
*
WORK
;
quark_unpack_args_9
(
quark
,
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
...
...
Prev
1
2
3
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment