Mentions légales du service

Skip to content
Snippets Groups Projects

Modify the interface of the tpgqrt routines, and fix some numerical bug within the routine

Merged Mathieu Faverge requested to merge faverge/chameleon:tpqrt/hotfix into master
Files
11
+ 108
30
@@ -24,30 +24,42 @@
**/
#include "control/common.h"
#define V(m,n) V, m, n
#define T(m,n) T, m, n
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) V1, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldvm, ldbm;
int tempkn, tempnn, tempmm, templm;
int ib;
int ldvk, ldvm;
int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT;
/* Dimension of the first column */
int maxm = B->m - L;
int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1);
int maxm = Q2->m - L;
int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
int maxmtk;
morse = morse_context_self();
@@ -57,18 +69,24 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/*
* ztpmqrt = A->nb * ib
* ztpmqrt = Q1->nb * ib
*/
ws_worker = A->nb * ib;
ws_worker = Q1->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 2 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
#endif
ws_worker *= sizeof(MORSE_Complex64_t);
@@ -76,31 +94,91 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
for (k = V->nt-1; k >= 0; k--) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) {
tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
ldvk = BLKLDD(V1, k);
ldqk = BLKLDD(Q1, k);
maxmtk = chameleon_min( B->mt, maxmt+k ) - 1;
/* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
for (m = maxmtk; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V, m);
ldbm = BLKLDD(B, m);
tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V2, m);
ldqm = BLKLDD(Q2, m);
for (n = k; n < Q2->nt; n++) {
tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, templm, ib, T2->nb,
V2(m, k), ldvm,
T2(m, k), T2->mb,
Q1(k, n), ldqk,
Q2(m, n), ldqm );
}
}
for (n = k; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
for (m = Q1->mt - 1; m > k; m--) {
tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
ldvm = BLKLDD(V1, m);
ldqm = BLKLDD(Q1, m);
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
tempmm, tempnn, tempkn, templm, ib, T->nb,
V(m, k), ldvm,
T(m, k), T->mb,
A(k, n), ldak,
B(m, n), ldbm );
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, 0, ib, T1->nb,
V1(m, k), ldvm,
T1(m, k), T1->mb,
Q1(k, n), ldqk,
Q1(m, n), ldqm );
}
}
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk,
DIAG(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkk,
0., 1.,
DIAG(k), ldvk );
#endif
#endif
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_zunmqr(
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk,
T1(k, k), T1->mb,
Q1(k, n), ldqk);
}
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
Loading