Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Chameleon
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
fa6d78a3
Commit
fa6d78a3
authored
Feb 08, 2019
by
Mathieu Faverge
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'starpu/alloc_on_the_fly' into 'master'
Starpu/alloc on the fly See merge request
solverstack/chameleon!140
parents
e932eacc
f676d524
Changes
42
Hide whitespace changes
Inline
Side-by-side
Showing
42 changed files
with
189 additions
and
153 deletions
+189
-153
compute/pzlange.c
compute/pzlange.c
+22
-23
compute/pzlansy.c
compute/pzlansy.c
+12
-11
compute/pzunmlq_param.c
compute/pzunmlq_param.c
+6
-0
compute/pzunmqr_param.c
compute/pzunmqr_param.c
+6
-0
compute/zlaset.c
compute/zlaset.c
+1
-1
control/compute_z.h
control/compute_z.h
+4
-3
control/descriptor.c
control/descriptor.c
+20
-14
control/workspace.c
control/workspace.c
+4
-2
coreblas/compute/core_zgelqt.c
coreblas/compute/core_zgelqt.c
+1
-1
include/chameleon/tasks_z.h
include/chameleon/tasks_z.h
+8
-0
runtime/openmp/codelets/codelet_zgelqt.c
runtime/openmp/codelets/codelet_zgelqt.c
+5
-2
runtime/openmp/codelets/codelet_zgeqrt.c
runtime/openmp/codelets/codelet_zgeqrt.c
+5
-2
runtime/openmp/codelets/codelet_ztplqt.c
runtime/openmp/codelets/codelet_ztplqt.c
+5
-1
runtime/openmp/codelets/codelet_ztpqrt.c
runtime/openmp/codelets/codelet_ztpqrt.c
+5
-1
runtime/parsec/codelets/codelet_zgelqt.c
runtime/parsec/codelets/codelet_zgelqt.c
+1
-0
runtime/parsec/codelets/codelet_zgeqrt.c
runtime/parsec/codelets/codelet_zgeqrt.c
+1
-0
runtime/parsec/codelets/codelet_ztplqt.c
runtime/parsec/codelets/codelet_ztplqt.c
+1
-0
runtime/parsec/codelets/codelet_ztpqrt.c
runtime/parsec/codelets/codelet_ztpqrt.c
+1
-0
runtime/quark/codelets/codelet_zgelqt.c
runtime/quark/codelets/codelet_zgelqt.c
+1
-0
runtime/quark/codelets/codelet_zgeqrt.c
runtime/quark/codelets/codelet_zgeqrt.c
+1
-0
runtime/quark/codelets/codelet_ztplqt.c
runtime/quark/codelets/codelet_ztplqt.c
+1
-0
runtime/quark/codelets/codelet_ztpqrt.c
runtime/quark/codelets/codelet_ztpqrt.c
+1
-0
runtime/starpu/codelets/codelet_zgelqt.c
runtime/starpu/codelets/codelet_zgelqt.c
+30
-31
runtime/starpu/codelets/codelet_zgemm.c
runtime/starpu/codelets/codelet_zgemm.c
+1
-1
runtime/starpu/codelets/codelet_zgeqrt.c
runtime/starpu/codelets/codelet_zgeqrt.c
+31
-31
runtime/starpu/codelets/codelet_zlange.c
runtime/starpu/codelets/codelet_zlange.c
+4
-4
runtime/starpu/codelets/codelet_ztplqt.c
runtime/starpu/codelets/codelet_ztplqt.c
+1
-0
runtime/starpu/codelets/codelet_ztpqrt.c
runtime/starpu/codelets/codelet_ztpqrt.c
+1
-0
runtime/starpu/control/runtime_descriptor.c
runtime/starpu/control/runtime_descriptor.c
+1
-1
runtime/starpu/control/runtime_options.c
runtime/starpu/control/runtime_options.c
+3
-3
testing/testing_zgels.c
testing/testing_zgels.c
+0
-1
testing/testing_zgels_hqr.c
testing/testing_zgels_hqr.c
+0
-2
testing/testing_zgels_systolic.c
testing/testing_zgels_systolic.c
+0
-2
timing/time_zgelqf.c
timing/time_zgelqf.c
+0
-1
timing/time_zgelqf_tile.c
timing/time_zgelqf_tile.c
+0
-1
timing/time_zgels.c
timing/time_zgels.c
+4
-5
timing/time_zgels_tile.c
timing/time_zgels_tile.c
+1
-2
timing/time_zgeqrf.c
timing/time_zgeqrf.c
+0
-1
timing/time_zgeqrf_hqr.c
timing/time_zgeqrf_hqr.c
+0
-2
timing/time_zgeqrf_hqr_tile.c
timing/time_zgeqrf_hqr_tile.c
+0
-2
timing/time_zgeqrf_tile.c
timing/time_zgeqrf_tile.c
+0
-1
timing/time_zgeqrs_tile.c
timing/time_zgeqrs_tile.c
+0
-1
No files found.
compute/pzlange.c
View file @
fa6d78a3
...
...
@@ -72,11 +72,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
}
if
(
m
>=
P
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
m
%
P
,
n
),
1
);
INSERT_TASK_daxpy
(
options
,
tempnn
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
%
P
,
n
),
1
);
}
}
...
...
@@ -85,11 +84,10 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
* For each i, W(i, n) = reduce( W(0..P-1, n) )
*/
for
(
m
=
1
;
m
<
P
;
m
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
1
,
tempnn
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
1
,
1
.
0
,
W
(
Wcol
,
0
,
n
),
1
);
INSERT_TASK_daxpy
(
options
,
tempnn
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
0
,
n
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -165,11 +163,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
}
if
(
n
>=
Q
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
n
%
Q
),
1
);
}
}
...
...
@@ -178,11 +175,10 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
0
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -407,11 +403,14 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamOneNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
1
,
A
->
nb
,
A
->
nb
,
workmt
,
worknt
*
A
->
nb
,
0
,
0
,
workmt
,
worknt
*
A
->
nb
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
/*
* Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
A
->
p
,
worknt
,
0
,
0
,
A
->
p
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
...
...
@@ -424,7 +423,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
case
ChamInfNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
A
->
mb
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
...
...
@@ -522,7 +521,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
}
CHAMELEON_Desc_Flush
(
&
Welt
,
sequence
);
CHAMELEON_Desc_Flush
(
A
,
sequence
);
RUNTIME_sequence_wait
(
chamctxt
,
sequence
);
RUNTIME_sequence_wait
(
chamctxt
,
sequence
);
*
result
=
*
((
double
*
)
Welt
.
get_blkaddr
(
&
Welt
,
A
->
myrank
/
A
->
q
,
A
->
myrank
%
A
->
q
));
...
...
compute/pzlansy.c
View file @
fa6d78a3
...
...
@@ -81,11 +81,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
int
tempmm
=
(
m
==
(
MT
-
1
)
)
?
M
-
m
*
A
->
mb
:
A
->
mb
;
for
(
n
=
Q
;
n
<
NT
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
nb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
n
%
Q
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
n
%
Q
),
1
);
}
/**
...
...
@@ -93,11 +92,10 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
* For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) )
*/
for
(
n
=
1
;
n
<
Q
;
n
++
)
{
INSERT_TASK_dgeadd
(
options
,
ChamNoTrans
,
tempmm
,
1
,
A
->
mb
,
1
.
0
,
W
(
Wcol
,
m
,
n
),
tempmm
,
1
.
0
,
W
(
Wcol
,
m
,
0
),
tempmm
);
INSERT_TASK_daxpy
(
options
,
tempmm
,
1
.,
W
(
Wcol
,
m
,
n
),
1
,
W
(
Wcol
,
m
,
0
),
1
);
}
INSERT_TASK_dlange
(
...
...
@@ -334,11 +332,14 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
case
ChamInfNorm
:
RUNTIME_options_ws_alloc
(
&
options
,
1
,
0
);
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
chameleon_desc_init
(
&
Wcol
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamRealDouble
,
A
->
mb
,
1
,
A
->
mb
,
workmt
*
A
->
mb
,
worknt
,
0
,
0
,
workmt
*
A
->
mb
,
worknt
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
wcol_init
=
1
;
/*
* Use the global allocator for Welt, otherwise flush may free the data before the result is read.
*/
chameleon_desc_init
(
&
Welt
,
CHAMELEON_MAT_ALLOC_GLOBAL
,
ChamRealDouble
,
1
,
1
,
1
,
workmt
,
A
->
q
,
0
,
0
,
workmt
,
A
->
q
,
A
->
p
,
A
->
q
,
NULL
,
NULL
,
NULL
);
...
...
compute/pzunmlq_param.c
View file @
fa6d78a3
...
...
@@ -466,6 +466,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
RUNTIME_data_flush
(
sequence
,
T
(
k
,
n
)
);
}
/* Restore the original location of the tiles */
for
(
m
=
0
;
m
<
B
->
mt
;
m
++
)
{
RUNTIME_data_migrate
(
sequence
,
B
(
m
,
k
),
B
->
get_rankof
(
B
,
m
,
k
)
);
}
RUNTIME_iteration_pop
(
chamctxt
);
}
}
...
...
compute/pzunmqr_param.c
View file @
fa6d78a3
...
...
@@ -467,6 +467,12 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
RUNTIME_data_flush
(
sequence
,
T
(
n
,
k
)
);
}
/* Restore the original location of the tiles */
for
(
m
=
0
;
m
<
B
->
mt
;
m
++
)
{
RUNTIME_data_migrate
(
sequence
,
B
(
m
,
k
),
B
->
get_rankof
(
B
,
m
,
k
)
);
}
RUNTIME_iteration_pop
(
chamctxt
);
}
}
...
...
compute/zlaset.c
View file @
fa6d78a3
...
...
@@ -266,7 +266,7 @@ int CHAMELEON_zlaset_Tile_Async( cham_uplo_t uplo,
return
chameleon_request_fail
(
sequence
,
request
,
CHAMELEON_ERR_ILLEGAL_VALUE
);
}
/* Check input arguments */
if
(
A
->
nb
!=
A
->
mb
)
{
if
(
(
alpha
!=
beta
)
&&
(
A
->
nb
!=
A
->
mb
)
)
{
chameleon_error
(
"CHAMELEON_zlaset_Tile_Async"
,
"only square tiles supported"
);
return
chameleon_request_fail
(
sequence
,
request
,
CHAMELEON_ERR_ILLEGAL_VALUE
);
}
...
...
control/compute_z.h
View file @
fa6d78a3
...
...
@@ -134,7 +134,7 @@ void chameleon_pzungqr_param( int genD, int K, const libhqr_tree_t *qrtree,
static
inline
int
chameleon_zdesc_alloc_diag
(
CHAM_desc_t
*
descA
,
int
nb
,
int
m
,
int
n
,
int
p
,
int
q
)
{
int
diag_m
=
chameleon_min
(
m
,
n
);
return
chameleon_desc_init
(
descA
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
return
chameleon_desc_init
(
descA
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamComplexDouble
,
nb
,
nb
,
nb
*
nb
,
diag_m
,
nb
,
0
,
0
,
diag_m
,
nb
,
p
,
q
,
chameleon_getaddr_diag
,
...
...
@@ -145,7 +145,7 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int
#define chameleon_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \
{ \
int rc; \
rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_
GLOBAL
, \
rc = chameleon_desc_init( &(descA), CHAMELEON_MAT_ALLOC_
TILE
, \
ChamComplexDouble, (mb), (nb), ((mb)*(nb)), \
(m), (n), (i), (j), (m), (n), 1, 1, \
NULL, NULL, NULL ); \
...
...
@@ -174,7 +174,7 @@ chameleon_zlap2tile( CHAM_context_t *chamctxt,
if
(
CHAMELEON_TRANSLATION
==
ChamOutOfPlace
)
{
/* Initialize the tile descriptor */
chameleon_desc_init
(
descAt
,
CHAMELEON_MAT_ALLOC_
GLOBAL
,
ChamComplexDouble
,
mb
,
nb
,
(
mb
)
*
(
nb
),
chameleon_desc_init
(
descAt
,
CHAMELEON_MAT_ALLOC_
TILE
,
ChamComplexDouble
,
mb
,
nb
,
(
mb
)
*
(
nb
),
lm
,
ln
,
0
,
0
,
m
,
n
,
1
,
1
,
chameleon_getaddr_ccrb
,
chameleon_getblkldd_ccrb
,
NULL
);
...
...
@@ -235,6 +235,7 @@ chameleon_ztile2lap( CHAM_context_t *chamctxt, CHAM_desc_t *descAl, CHAM_desc_t
static
inline
void
chameleon_ztile2lap_cleanup
(
CHAM_context_t
*
chamctxt
,
CHAM_desc_t
*
descAl
,
CHAM_desc_t
*
descAt
)
{
(
void
)
chamctxt
;
chameleon_desc_destroy
(
descAl
);
chameleon_desc_destroy
(
descAt
);
}
...
...
control/descriptor.c
View file @
fa6d78a3
...
...
@@ -226,26 +226,32 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
/* The matrix is alocated tile by tile with out of core */
desc
->
ooc
=
0
;
// Matrix address
if
(
mat
==
CHAMELEON_MAT_ALLOC_GLOBAL
)
{
rc
=
chameleon_desc_mat_alloc
(
desc
);
switch
(
(
intptr_t
)
mat
)
{
case
(
intptr_t
)
CHAMELEON_MAT_ALLOC_TILE
:
if
(
chamctxt
->
scheduler
==
RUNTIME_SCHED_STARPU
)
{
/* Let's use the allocation on the fly as in OOC */
desc
->
get_blkaddr
=
chameleon_getaddr_null
;
desc
->
mat
=
NULL
;
break
;
}
/* Otherwise we switch back to the full allocation */
desc
->
alloc_mat
=
1
;
desc
->
use_mat
=
1
;
}
else
if
(
mat
==
CHAMELEON_MAT_ALLOC_TILE
)
{
//chameleon_error( "chameleon_desc_init", "CHAMELEON_MAT_ALLOC_TILE is not available yet" );
//desc->mat = NULL;
case
(
intptr_t
)
CHAMELEON_MAT_ALLOC_GLOBAL
:
rc
=
chameleon_desc_mat_alloc
(
desc
);
desc
->
alloc_mat
=
1
;
desc
->
use_mat
=
1
;
break
;
desc
->
alloc_mat
=
1
;
}
else
if
(
mat
==
CHAMELEON_MAT_OOC
)
{
case
(
intptr_t
)
CHAMELEON_MAT_OOC
:
if
(
chamctxt
->
scheduler
!=
RUNTIME_SCHED_STARPU
)
{
chameleon_error
(
"CHAMELEON_Desc_Create"
,
"CHAMELEON Out-of-Core descriptors are supported only with StarPU"
);
return
CHAMELEON_ERR_NOT_SUPPORTED
;
}
desc
->
mat
=
NULL
;
desc
->
ooc
=
1
;
}
else
{
break
;
default:
/* memory of the matrix is handled by users */
desc
->
mat
=
mat
;
desc
->
use_mat
=
1
;
...
...
control/workspace.c
View file @
fa6d78a3
...
...
@@ -74,7 +74,8 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM
lm
=
IB
*
MT
;
ln
=
NB
*
NT
;
return
CHAMELEON_Desc_Create
(
desc
,
NULL
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
return
CHAMELEON_Desc_Create
(
desc
,
CHAMELEON_MAT_ALLOC_TILE
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
}
/**
...
...
@@ -119,7 +120,8 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc
/* TODO: Fix the distribution for IPIV */
*
IPIV
=
(
int
*
)
malloc
(
size
);
return
CHAMELEON_Desc_Create
(
desc
,
NULL
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
return
CHAMELEON_Desc_Create
(
desc
,
CHAMELEON_MAT_ALLOC_TILE
,
type
,
IB
,
NB
,
IB
*
NB
,
lm
,
ln
,
0
,
0
,
lm
,
ln
,
p
,
q
);
}
/**
...
...
coreblas/compute/core_zgelqt.c
View file @
fa6d78a3
...
...
@@ -67,7 +67,7 @@
* The leading dimension of the array A. LDA >= max(1,M).
*
* @param[out] T
* The IB-by-
N
triangular factor T of the block reflector.
* The IB-by-
M
triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
...
...
include/chameleon/tasks_z.h
View file @
fa6d78a3
...
...
@@ -483,6 +483,8 @@ INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmlqt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
0
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -497,6 +499,8 @@ INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmqrt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
0
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -511,6 +515,8 @@ INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmlqt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
n2
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
@@ -525,6 +531,8 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
const
CHAM_desc_t
*
V
,
int
Vm
,
int
Vn
,
int
ldv
,
const
CHAM_desc_t
*
T
,
int
Tm
,
int
Tn
,
int
ldt
)
{
(
void
)
m1
;
(
void
)
n1
;
return
INSERT_TASK_ztpmqrt
(
options
,
side
,
trans
,
m2
,
n2
,
k
,
m2
,
ib
,
nb
,
V
,
Vm
,
Vn
,
ldv
,
T
,
Tm
,
Tn
,
ldt
,
A1
,
A1m
,
A1n
,
lda1
,
A2
,
A2m
,
A2n
,
lda2
);
...
...
runtime/openmp/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -98,10 +98,13 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrA
=
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
TAU
[
ws_size
];
CHAMELEON_Complex64_t
*
work
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zgelqt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
}
}
runtime/openmp/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -99,10 +99,13 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrA
=
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(inout:ptrT[0])
#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
TAU
[
ws_size
];
CHAMELEON_Complex64_t
*
work
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zgeqrt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
ptrA
,
lda
,
ptrT
,
ldt
,
TAU
,
work
);
}
}
runtime/openmp/codelets/codelet_ztplqt.c
View file @
fa6d78a3
...
...
@@ -31,9 +31,13 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrB
=
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0], ptrT[0])
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
work
[
ws_size
];
CORE_zlaset
(
ChamUpperLower
,
ib
,
M
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_ztplqt
(
M
,
N
,
L
,
ib
,
ptrA
,
lda
,
ptrB
,
ldb
,
ptrT
,
ldt
,
work
);
}
...
...
runtime/openmp/codelets/codelet_ztpqrt.c
View file @
fa6d78a3
...
...
@@ -30,9 +30,13 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
CHAMELEON_Complex64_t
*
ptrB
=
RTBLKADDR
(
B
,
CHAMELEON_Complex64_t
,
Bm
,
Bn
);
CHAMELEON_Complex64_t
*
ptrT
=
RTBLKADDR
(
T
,
CHAMELEON_Complex64_t
,
Tm
,
Tn
);
int
ws_size
=
options
->
ws_wsize
;
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrT[0]) depend(inout:ptrA[0], ptrB[0])
#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
{
CHAMELEON_Complex64_t
tmp
[
ws_size
];
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
ptrT
,
ldt
);
CORE_ztpqrt
(
M
,
N
,
L
,
ib
,
ptrA
,
lda
,
ptrB
,
ldb
,
ptrT
,
ldt
,
tmp
);
}
...
...
runtime/parsec/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -98,6 +98,7 @@ CORE_zgelqt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
m
,
&
n
,
&
ib
,
&
A
,
&
lda
,
&
T
,
&
ldt
,
&
TAU
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
(
void
)
context
;
...
...
runtime/parsec/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -99,6 +99,7 @@ CORE_zgeqrt_parsec ( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
m
,
&
n
,
&
ib
,
&
A
,
&
lda
,
&
T
,
&
ldt
,
&
TAU
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
(
void
)
context
;
...
...
runtime/parsec/codelets/codelet_ztplqt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ CORE_ztplqt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
M
,
&
N
,
&
L
,
&
ib
,
&
A
,
&
lda
,
&
B
,
&
ldb
,
&
T
,
&
ldt
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
M
,
0
.,
0
.,
T
,
ldt
);
CORE_ztplqt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
...
...
runtime/parsec/codelets/codelet_ztpqrt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ CORE_ztpqrt_parsec( parsec_execution_stream_t *context,
parsec_dtd_unpack_args
(
this_task
,
&
M
,
&
N
,
&
L
,
&
ib
,
&
A
,
&
lda
,
&
B
,
&
ldb
,
&
T
,
&
ldt
,
&
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
T
,
ldt
);
CORE_ztpqrt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
...
...
runtime/quark/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ void CORE_zgelqt_quark(Quark *quark)
CHAMELEON_Complex64_t
*
WORK
;
quark_unpack_args_9
(
quark
,
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
...
...
runtime/quark/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -40,6 +40,7 @@ void CORE_zgeqrt_quark(Quark *quark)
CHAMELEON_Complex64_t
*
WORK
;
quark_unpack_args_9
(
quark
,
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
...
...
runtime/quark/codelets/codelet_ztplqt.c
View file @
fa6d78a3
...
...
@@ -39,6 +39,7 @@ CORE_ztplqt_quark( Quark *quark )
quark_unpack_args_11
(
quark
,
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
T
,
ldt
);
CORE_ztplqt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
}
...
...
runtime/quark/codelets/codelet_ztpqrt.c
View file @
fa6d78a3
...
...
@@ -39,6 +39,7 @@ CORE_ztpqrt_quark( Quark *quark )
quark_unpack_args_11
(
quark
,
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
N
,
0
.,
0
.,
T
,
ldt
);
CORE_ztpqrt
(
M
,
N
,
L
,
ib
,
A
,
lda
,
B
,
ldb
,
T
,
ldt
,
WORK
);
}
...
...
runtime/starpu/codelets/codelet_zgelqt.c
View file @
fa6d78a3
...
...
@@ -26,6 +26,36 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgelqt_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAMELEON_starpu_ws_t
*
h_work
;
int
m
;
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
lda
;
CHAMELEON_Complex64_t
*
T
;
int
ldt
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + ib*n */
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
m
,
0
.,
0
.,
T
,
ldt
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU
(
zgelqt
,
3
,
cl_zgelqt_cpu_func
)
/**
*
* @ingroup INSERT_TASK_Complex64_t
...
...
@@ -87,7 +117,6 @@
* \retval <0 if -i, the i-th argument had an illegal value
*
*/
void
INSERT_TASK_zgelqt
(
const
RUNTIME_option_t
*
options
,
int
m
,
int
n
,
int
ib
,
int
nb
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
lda
,
...
...
@@ -123,33 +152,3 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
#endif
0
);
}
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgelqt_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAMELEON_starpu_ws_t
*
h_work
;
int
m
;
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
lda
;
CHAMELEON_Complex64_t
*
T
;
int
ldt
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + ib*n */
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zgelqt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS_CPU
(
zgelqt
,
3
,
cl_zgelqt_cpu_func
)
runtime/starpu/codelets/codelet_zgemm.c
View file @
fa6d78a3
...
...
@@ -35,7 +35,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
cham_trans_t
transA
,
cham_trans_t
transB
,
int
m
,
int
n
,
int
k
,
int
nb
,
CHAMELEON_Complex64_t
alpha
,
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
,
int
lda
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ldb
,
const
CHAM_desc_t
*
B
,
int
Bm
,
int
Bn
,
int
ldb
,
CHAMELEON_Complex64_t
beta
,
const
CHAM_desc_t
*
C
,
int
Cm
,
int
Cn
,
int
ldc
)
{
(
void
)
nb
;
...
...
runtime/starpu/codelets/codelet_zgeqrt.c
View file @
fa6d78a3
...
...
@@ -26,6 +26,37 @@
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgeqrt_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAMELEON_starpu_ws_t
*
h_work
;
int
m
;
int
n
;
int
ib
;
CHAMELEON_Complex64_t
*
A
;
int
lda
;
CHAMELEON_Complex64_t
*
T
;
int
ldt
;
CHAMELEON_Complex64_t
*
TAU
,
*
WORK
;
A
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
0
]);
T
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
1
]);
TAU
=
(
CHAMELEON_Complex64_t
*
)
STARPU_MATRIX_GET_PTR
(
descr
[
2
]);
/* max(m,n) + n * ib */
starpu_codelet_unpack_args
(
cl_arg
,
&
m
,
&
n
,
&
ib
,
&
lda
,
&
ldt
,
&
h_work
);
WORK
=
TAU
+
chameleon_max
(
m
,
n
);
CORE_zlaset
(
ChamUpperLower
,
ib
,
n
,
0
.,
0
.,
T
,
ldt
);
CORE_zgeqrt
(
m
,
n
,
ib
,
A
,
lda
,
T
,
ldt
,
TAU
,
WORK
);
}
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*