Attention une mise à jour du service Gitlab va être effectuée le mardi 18 janvier (et non lundi 17 comme annoncé précédemment) entre 18h00 et 18h30. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes.

Commit 729f9461 authored by PRUVOST Florent's avatar PRUVOST Florent
Browse files

copy the diag tile even in homogeneous to allow tsqrt and unmqr concurent executions

parent a2c4f527
......@@ -32,11 +32,8 @@
#define A(m,n) A, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) A, k, k
#endif
/***************************************************************************//**
* Parallel tile LQ factorization - dynamic scheduling
**/
......@@ -77,9 +74,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
/* necessary to use UNMLQ on GPU */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, (minMT-1)*A->mb, A->nb, 0, 0, (minMT-1)*A->mb, A->nb);
/* Worker space
*
* zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
......@@ -104,6 +98,10 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tslqt and unmlq tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, (minMT-1)*A->mb, A->nb, 0, 0, (minMT-1)*A->mb, A->nb);
for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -113,20 +111,20 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
tempkm, tempkn, ib, T->nb,
A(k, k), ldak,
T(k, k), T->mb);
#if defined(CHAMELEON_USE_MAGMA)
if ( k < (A->mt-1) ) {
MORSE_TASK_zlacpy(
&options,
MorseUpper, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, A->mb, A->nb,
0., 1.,
DIAG(k), A->mb );
}
#endif
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
......@@ -164,8 +162,6 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -36,11 +36,8 @@
#define A(m,n) A, (m), (n)
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+A->nt
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(m,n) DIAG, ((n)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#endif
/***************************************************************************//**
* Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
**/
......@@ -58,6 +55,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
int ldak, ldam;
int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
int ib;
int nblk;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -78,12 +76,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
{
/* necessary to use UNMLQ on GPU */
int nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
}
/* Worker space
*
* zgelqt = max( A->nb * (ib+1), ib * (ib + A->nb) )
......@@ -108,6 +100,11 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
for (k = 0; k < min(A->mt, A->nt); k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
......@@ -119,20 +116,20 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
tempkm, tempNn, ib, T->nb,
A(k, N), ldak,
T(k, N), T->mb);
#if defined(CHAMELEON_USE_MAGMA)
if ( k < (A->mt-1) ) {
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkm, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkm, tempNn,
0., 1.,
DIAG(k, N), ldak );
}
#endif
}
for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m);
......@@ -196,8 +193,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -32,11 +32,8 @@
#define A(m,n) A, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) A, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
......@@ -72,9 +69,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
/* necessary to use UNMQR on GPU */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
/* Worker space
*
* zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
......@@ -99,6 +93,10 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tsqrt and unmqr tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
for (k = 0; k < minMNT; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -108,20 +106,20 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
tempkm, tempkn, ib, T->nb,
A(k, k), ldak,
T(k, k), T->mb);
#if defined(CHAMELEON_USE_MAGMA)
if ( k < (A->nt-1) ) {
MORSE_TASK_zlacpy(
&options,
MorseLower, A->mb, A->nb, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, A->mb, A->nb,
0., 1.,
DIAG(k), ldak );
}
#endif
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zunmqr(
......@@ -158,8 +156,6 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -34,11 +34,7 @@
#define A(m,n) A, (m), (n)
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), ((n)+A->nt)
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(m,n) DIAG, ((m)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#endif
/***************************************************************************//**
* Parallel tile QR factorization (reduction Householder) - dynamic scheduling
......@@ -57,6 +53,7 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
int ldaM, ldam, ldaMRD;
int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
int ib;
int nblk;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -77,12 +74,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
{
int nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
}
/* Worker space
*
* zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
......@@ -107,6 +98,11 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
K = min(A->mt, A->nt);
for (k = 0; k < K; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -119,20 +115,20 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
tempMm, tempkn, ib, T->nb,
A(M, k), ldaM,
T(M, k), T->mb);
#if defined(CHAMELEON_USE_MAGMA)
if ( k < (A->nt-1) ) {
MORSE_TASK_zlacpy(
&options,
MorseLower, tempMm, A->nb, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempMm, A->nb,
0., 1.,
DIAG(M, k), ldaM );
}
#endif
}
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
MORSE_TASK_zunmqr(
......@@ -196,8 +192,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -33,11 +33,7 @@
#include "common.h"
#define A(_m_,_n_) A, _m_, _n_
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(_k_) DIAG, _k_, 0
#else
#define DIAG(_k_) A, _k_, _k_
#endif
#define L(_m_,_n_) L, _m_, _n_
#define IPIV(_m_,_n_) &(IPIV[(int64_t)A->mb*((int64_t)(_m_)+(int64_t)A->mt*(int64_t)(_n_))])
......@@ -65,9 +61,6 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
ib = MORSE_IB;
#if defined(CHAMELEON_USE_MAGMA)
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
h_work_size = sizeof(MORSE_Complex64_t)*( 2*ib + 2*L->nb )*2*A->mb;
d_work_size = sizeof(MORSE_Complex64_t)*( ib )*2*A->mb;
#else
......@@ -76,6 +69,10 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
#endif
RUNTIME_options_ws_alloc( &options, h_work_size, d_work_size );
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
for (k = 0; k < minMNT; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -88,7 +85,6 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
IPIV(k, k),
k == A->mt-1, A->nb*k);
#if defined(CHAMELEON_USE_MAGMA)
if ( k < (minMNT-1) ) {
MORSE_TASK_zlacpy(
&options,
......@@ -96,7 +92,6 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
A(k, k), ldak,
DIAG(k), ldak);
}
#endif
for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
......@@ -137,8 +132,6 @@ void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -33,11 +33,8 @@
#define A(m,n) A, m, n
#define Q(m,n) Q, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) A, k, k
#endif
/***************************************************************************//**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
**/
......@@ -77,8 +74,6 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb);
/* Worker space
*
* zunmlq = A->nb * ib
......@@ -92,6 +87,10 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb);
for (k = min(A->mt, A->nt)-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -113,12 +112,12 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
T(k, n), T->mb);
}
}
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmin, tempkn, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkn,
......@@ -141,8 +140,6 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -33,11 +33,8 @@
#define Q(m,n) Q, (m), (n)
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+(A->nt)
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(m,n) DIAG, ((n)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#endif
/**
* Parallel construction of Q using tile V (application to identity;
* reduction Householder) - dynamic scheduling
......@@ -58,6 +55,7 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
int ldqm;
int tempkm, tempkmin, tempNn, tempnn, tempmm, tempNRDn;
int ib;
int nblk;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -74,12 +72,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_MAGMA)
{
/* necessary to use UNMLQ on GPU */
int nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
}
/* Worker space
*
* zunmqr = A->nb * ib
......@@ -93,6 +85,11 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->nt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
......@@ -138,12 +135,12 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
T(k, n), T->mb);
}
}
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
......@@ -168,8 +165,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -33,11 +33,8 @@
#define A(m,n) A, m, n
#define Q(m,n) Q, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) A, k, k
#endif
/***************************************************************************//**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
**/
......@@ -71,8 +68,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_MAGMA)
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
/* Worker space
*
* zunmqr = A->nb * ib
......@@ -86,6 +81,10 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb);
for (k = min(A->mt, A->nt)-1; k >= 0; k--) {
tempAkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
tempAkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -109,12 +108,12 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
T(m, k), T->mb);
}
}
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempkm, tempkmin, A->nb,
A(k, k), ldak,
DIAG(k), ldak );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkmin,
......@@ -136,8 +135,6 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -35,11 +35,7 @@
#define Q(m,n) Q, (m), (n)
#define T(m,n) T, (m), (n)
#define T2(m,n) T, (m), (n)+(A->nt)
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(m,n) DIAG, ((m)/BS), 0
#else
#define DIAG(m,n) A, (m), (n)
#endif
/**
* Parallel construction of Q using tile V (application to identity;
......@@ -61,6 +57,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
int ldbM, ldbm, ldbMRD;
int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
int ib;
int nblk;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
......@@ -77,12 +74,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_MAGMA)
{
int nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
}
/* Worker space
*
* zunmqr = A->nb * ib
......@@ -96,6 +87,11 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
nblk = ( A->mt + BS -1 ) / BS;
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb);
K = min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
......@@ -145,12 +141,12 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
T(m, k), T->mb);
}
}
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempMm, tempkmin, A->nb,
A(M, k), ldaM,
DIAG(M, k), ldaM );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempMm, tempkmin,
......@@ -174,8 +170,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_USE_MAGMA)
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
}
......@@ -34,11 +34,8 @@
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define T(m,n) T, m, n
#if defined(CHAMELEON_USE_MAGMA)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) A, k, k
#endif
/***************************************************************************//**
* Parallel application of Q using tile V - LQ factorization - dynamic scheduling
**/
......@@ -79,9 +76,6 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_MAGMA)
/* necessary to use UNMLQ on GPU */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb);
/* Worker space
*
* zunmlq = A->nb * ib
......@@ -95,6 +89,10 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc2(*DIAG, A->mb, A->nb, minMT*A->mb, A->nb, 0, 0, minMT*A->mb, A->nb);
if (side == MorseLeft ) {
if (trans == MorseNoTrans) {
/*
......@@ -105,12 +103,12 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmin, tempkm, A->nb,
A(k, k), ldak,
DIAG(k), A->mb );
#if defined(CHAMELEON_USE_MAGMA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempkm,
......@@ -168,12 +166,12 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
T(k, m), T->mb);
}