Commit 5d03b644 authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael

moved diagonal copies to driver level

parent a0a034be
......@@ -30,6 +30,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
{
int k;
int tempkm, tempkn;
MORSE_desc_t D;
if (A.m >= A.n){
for (k = 0; k < A.nt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
......@@ -38,6 +39,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_pzgeqrf(
morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
sequence, request);
morse_pzunmqr(
......@@ -46,6 +48,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb),
morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
sequence, request);
if (k+1 < A.nt){
......@@ -54,6 +57,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_pzgelqf(
morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb),
morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb),
sequence, request);
morse_pzunmlq(
......@@ -61,6 +65,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb),
morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb),
sequence, request);
}
}
......@@ -73,6 +78,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_pzgelqf(
morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb),
morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb),
sequence, request);
morse_pzunmlq(
......@@ -80,6 +86,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb),
morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb),
morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb),
sequence, request);
if (k+1 < A.mt){
......@@ -88,6 +95,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_pzgeqrf(
morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
sequence, request);
morse_pzunmqr(
......@@ -95,6 +103,7 @@ void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
sequence, request);
}
}
......
......@@ -33,22 +33,21 @@
#define A(m,n) A, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) A, k, k
#define D(k) A, k, k
#endif
/***************************************************************************//**
* Parallel tile LQ factorization - dynamic scheduling
**/
void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldam;
......@@ -91,12 +90,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < minMNT; k++) {
RUNTIME_iteration_push(morse, k);
......@@ -114,13 +107,13 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
&options,
MorseUpper, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
D(k), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, A->mb, A->nb,
0., 1.,
DIAG(k), ldak );
D(k), ldak );
#endif
#endif
}
......@@ -131,7 +124,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
&options,
MorseRight, MorseConjTrans,
tempmm, tempkn, tempkn, ib, T->nb,
DIAG(k), ldak,
D(k), ldak,
T(k, k), T->mb,
A(m, k), ldam);
}
......@@ -162,11 +155,4 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -34,22 +34,21 @@
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+A->nt
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(m,n) DIAG, ((n)/BS), 0
#define D(m,n) D, ((n)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#define D(m,n) A, (m), (n)
#endif
/*
* Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
*/
void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int K, N, RD;
......@@ -89,15 +88,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
{
int nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
/* The number of the factorization */
......@@ -120,13 +110,13 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
&options,
MorseUpper, tempkm, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
D(k, N), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkm, tempNn,
0., 1.,
DIAG(k, N), ldak );
D(k, N), ldak );
#endif
#endif
}
......@@ -137,7 +127,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
&options,
MorseRight, MorseConjTrans,
tempmm, tempNn, tempkmin, ib, T->nb,
DIAG(k, N), ldak,
D(k, N), ldak,
T(k, N), T->mb,
A(m, N), ldam);
}
......@@ -193,11 +183,4 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -33,22 +33,21 @@
#define A(m,n) A, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) A, k, k
#define D(k) A, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldam;
......@@ -86,12 +85,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
#endif
for (k = 0; k < minMNT; k++) {
RUNTIME_iteration_push(morse, k);
......@@ -109,13 +102,13 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
&options,
MorseLower, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
D(k), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, A->mb, A->nb,
0., 1.,
DIAG(k), ldak );
D(k), ldak );
#endif
#endif
}
......@@ -125,7 +118,7 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
&options,
MorseLeft, MorseConjTrans,
tempkm, tempnn, tempkm, ib, T->nb,
DIAG(k), ldak,
D(k), ldak,
T(k, k), T->mb,
A(k, n), ldak);
}
......@@ -156,11 +149,4 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -35,22 +35,21 @@
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), ((n)+A->nt)
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(m,n) DIAG, ((m)/BS), 0
#define D(m,n) D, ((m)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#define D(m,n) A, (m), (n)
#endif
/***************************************************************************//**
* Parallel tile QR factorization (reduction Householder) - dynamic scheduling
**/
void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int K, M, RD;
......@@ -90,15 +89,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
{
/* necessary to avoid dependencies between tasks regarding the diag tile */
int nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
for (k = 0; k < K; k++) {
RUNTIME_iteration_push(morse, k);
......@@ -119,13 +109,13 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
&options,
MorseLower, tempMm, A->nb, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
D(M, k), ldaM );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempMm, A->nb,
0., 1.,
DIAG(M, k), ldaM );
D(M, k), ldaM );
#endif
#endif
}
......@@ -135,7 +125,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
&options,
MorseLeft, MorseConjTrans,
tempMm, tempnn, tempkmin, ib, T->nb,
DIAG(M, k), ldaM,
D(M, k), ldaM,
T(M, k), T->mb,
A(M, n), ldaM);
}
......@@ -193,11 +183,4 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -34,22 +34,21 @@
#define Q(m,n) Q, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) A, k, k
#define D(k) A, k, k
#endif
/***************************************************************************//**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
**/
void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldqm;
......@@ -91,12 +90,6 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -125,13 +118,13 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options,
MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
D(k), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkn,
0., 1.,
DIAG(k), ldak );
D(k), ldak );
#endif
#endif
for (m = k; m < Q->mt; m++) {
......@@ -141,7 +134,7 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options,
MorseRight, MorseNoTrans,
tempmm, tempkn, tempkmin, ib, T->nb,
DIAG(k), ldak,
D(k), ldak,
T(k, k), T->mb,
Q(m, k), ldqm);
}
......@@ -151,11 +144,4 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -34,9 +34,9 @@
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+(A->nt)
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(m,n) DIAG, ((n)/BS), 0
#define D(m,n) D, ((n)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#define D(m,n) A, (m), (n)
#endif
/**
......@@ -44,14 +44,13 @@
* reduction Householder) - dynamic scheduling
**/
void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MORSE_desc_t *T, int BS,
MORSE_desc_t *T, MORSE_desc_t *D, int BS,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int K, N, RD, lastRD;
......@@ -88,15 +87,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
{
/* necessary to avoid dependencies between tasks regarding the diag tile */
int nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -149,13 +139,13 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
&options,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
D(k, N), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
0., 1.,
DIAG(k, N), ldak );
D(k, N), ldak );
#endif
#endif
for (m = k; m < Q->mt; m++) {
......@@ -166,7 +156,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseRight, MorseNoTrans,
tempmm, tempNn,
tempkmin, ib, T->nb,
DIAG(k, N), ldak,
D(k, N), ldak,
T(k, N), T->mb,
Q(m, N), ldqm);
}
......@@ -177,11 +167,4 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -34,22 +34,21 @@
#define Q(m,n) Q, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) A, k, k
#define D(k) A, k, k
#endif
/***************************************************************************//**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
**/
void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T, MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldqk, ldam, ldqm;
......@@ -91,12 +90,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
for (k = minMT-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -127,13 +120,13 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options,
MorseLower, tempkm, tempkmin, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
D(k), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkmin,
0., 1.,
DIAG(k), ldak );
D(k), ldak );
#endif
#endif
for (n = k; n < Q->nt; n++) {
......@@ -142,7 +135,7 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkmin, ib, T->nb,
DIAG(k), ldak,
D(k), ldak,
T(k, k), T->mb,
Q(k, n), ldqk);
}
......@@ -152,11 +145,4 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -36,9 +36,9 @@
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+(A->nt)
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(m,n) DIAG, ((m)/BS), 0
#define D(m,n) D, ((m)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#define D(m,n) A, (m), (n)
#endif
/**
......@@ -46,14 +46,13 @@
* reduction Householder) - dynamic scheduling
**/
void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MORSE_desc_t *T, int BS,
MORSE_desc_t *T, MORSE_desc_t *D, int BS,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int K, M, RD, lastRD;
......@@ -90,15 +89,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
{
/* necessary to avoid dependencies between tasks regarding the diag tile */
int nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
......@@ -155,13 +145,13 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
&options,
MorseLower, tempMm, tempkmin, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
D(M, k), ldaM );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempMm, tempkmin,
0., 1.,
DIAG(M, k), ldaM );
D(M, k), ldaM );
#endif
#endif
for (n = k; n < Q->nt; n++) {
......@@ -171,7 +161,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
MorseLeft, MorseNoTrans,
tempMm, tempnn,
tempkmin, ib, T->nb,
DIAG(M, k), ldaM,
D(M, k), ldaM,
T(M, k), T->mb,
Q(M, n), ldqM);
}
......@@ -182,11 +172,4 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -35,23 +35,22 @@
#define B(m,n) B, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#define D(k) D, k, 0
#else
#define DIAG(k) A, k, k
#define D(k) A, k, k
#endif
/***************************************************************************//**
* Parallel application of Q using tile V - LQ factorization - dynamic scheduling
**/
void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T,
MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldbk, ldbm;
......@@ -93,12 +92,6 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
#if defined(CHAMELEON_COPY_DIAG)
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb, A->p, A->q);
#endif
if (side == MorseLeft ) {
if (trans == MorseNoTrans) {
/*
......@@ -116,13 +109,13 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
&options,
MorseUpper, tempkmin, tempkm, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
D(k), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkm,
0., 1.,