Commit 52c188d9 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Factorize migrate calls, and fix tpm* kernels

parent 2b10aac1
...@@ -26,8 +26,7 @@ ...@@ -26,8 +26,7 @@
#define T(m,n) T, (m), (n) #define T(m,n) T, (m), (n)
#define D(m,n) D, (m), (n) #define D(m,n) D, (m), (n)
/**
/*
* Parallel tile LQ factorization (reduction Householder) - dynamic scheduling * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
*/ */
void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A, void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A,
...@@ -41,11 +40,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -41,11 +40,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
size_t ws_host = 0; size_t ws_host = 0;
int k, m, n, i, p; int k, m, n, i, p;
int K, L; int K, L, nbgeqrt;
int ldak, ldam, lddk; int ldak, ldam, lddk;
int tempkmin, tempkm, tempnn, tempmm, temppn; int tempkmin, tempkm, tempnn, tempmm, temppn;
int ib; int ib, node, nbtiles, *tiles;
int *tiles;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) if (sequence->status != CHAMELEON_SUCCESS)
...@@ -60,37 +58,32 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -60,37 +58,32 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
} }
/* /*
* zgelqt = A->nb * (ib+1) * zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1) * ztplqt = A->nb * (ib+1)
* zttlqt = A->nb * (ib+1) * ztpmlqt = A->nb * ib
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
*/ */
ws_worker = A->nb * (ib+1); ws_worker = A->nb * (ib+1);
/* Allocation of temporary (scratch) working space */ /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /*
* * zunmqr = A->nb * ib
* zunmlq = A->nb * ib * ztpmqrt = 2 * A->nb * ib
* ztsmlq = 2 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
/* Initialisation of tiles */
tiles = (int*)calloc(qrtree->mt, sizeof(int));
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
ws_host *= sizeof(CHAMELEON_Complex64_t); ws_host *= sizeof(CHAMELEON_Complex64_t);
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
/* Initialisation of temporary tiles array */
tiles = (int*)calloc(qrtree->mt, sizeof(int));
K = chameleon_min(A->mt, A->nt); K = chameleon_min(A->mt, A->nt);
/* The number of the factorization */
for (k = 0; k < K; k++) { for (k = 0; k < K; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
...@@ -98,9 +91,11 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -98,9 +91,11 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldak = BLKLDD(A, k); ldak = BLKLDD(A, k);
lddk = BLKLDD(D, k); lddk = BLKLDD(D, k);
T = TS;
/* The number of geqrt to apply */ /* The number of geqrt to apply */
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { nbgeqrt = qrtree->getnbgeqrf(qrtree, k);
T = TS;
for (i = 0; i < nbgeqrt; i++) {
p = qrtree->getm(qrtree, k, i); p = qrtree->getm(qrtree, k, i);
temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
tempkmin = chameleon_min(tempkm, temppn); tempkmin = chameleon_min(tempkm, temppn);
...@@ -110,6 +105,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -110,6 +105,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
tempkm, temppn, ib, T->nb, tempkm, temppn, ib, T->nb,
A( k, p), ldak, A( k, p), ldak,
T(k, p), T->mb); T(k, p), T->mb);
if ( genD ) { if ( genD ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
&options, &options,
...@@ -124,13 +120,14 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -124,13 +120,14 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
D(k, p), lddk ); D(k, p), lddk );
#endif #endif
} }
for (m = k+1; m < A->mt; m++) { for (m = k+1; m < A->mt; m++) {
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
INSERT_TASK_zunmlq( INSERT_TASK_zunmlq(
&options, &options,
ChamRight, ChamConjTrans, ChamRight, ChamConjTrans,
tempmm, temppn, tempkmin, ib, T->nb, tempmm, temppn, tempkmin, ib, T->nb,
D(k, p), lddk, D(k, p), lddk,
T(k, p), T->mb, T(k, p), T->mb,
A(m, p), ldam); A(m, p), ldam);
...@@ -140,15 +137,15 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -140,15 +137,15 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
} }
/* Setting the order of the tiles */ /* Setting the order of the tiles */
libhqr_walk_stepk( qrtree, k, tiles + (k+1) ); nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
for (i = k+1; i < A->nt; i++) { for (i = 0; i < nbtiles; i++) {
n = tiles[i]; n = tiles[i];
p = qrtree->currpiv(qrtree, k, n); p = qrtree->currpiv(qrtree, k, n);
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
if (qrtree->gettype(qrtree, k, n) == 0) { if ( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */ /* TS kernel */
T = TS; T = TS;
L = 0; L = 0;
...@@ -159,10 +156,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -159,10 +156,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
L = tempnn; L = tempnn;
} }
RUNTIME_data_migrate( sequence, A(k, p), node = A->get_rankof( A, k, n );
A->get_rankof( A, k, n ) ); RUNTIME_data_migrate( sequence, A(k, p), node );
RUNTIME_data_migrate( sequence, A(k, n), RUNTIME_data_migrate( sequence, A(k, n), node );
A->get_rankof( A, k, n ) );
INSERT_TASK_ztplqt( INSERT_TASK_ztplqt(
&options, &options,
...@@ -175,10 +171,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -175,10 +171,9 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
RUNTIME_data_migrate( sequence, A(m, p), node = A->get_rankof( A, m, n );
A->get_rankof( A, m, n ) ); RUNTIME_data_migrate( sequence, A(m, p), node );
RUNTIME_data_migrate( sequence, A(m, n), RUNTIME_data_migrate( sequence, A(m, n), node );
A->get_rankof( A, m, n ) );
INSERT_TASK_ztpmlqt( INSERT_TASK_ztpmlqt(
&options, &options,
......
...@@ -46,7 +46,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -46,7 +46,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
int K, N, RD; int K, N, RD;
int ldak, ldam, lddk; int ldak, ldam, lddk;
int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn; int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
int ib; int ib, node;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) if (sequence->status != CHAMELEON_SUCCESS)
...@@ -167,10 +167,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -167,10 +167,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for (N = k; N+RD < A->nt; N += 2*RD) { for (N = k; N+RD < A->nt; N += 2*RD) {
tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb; tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
RUNTIME_data_migrate( sequence, A(k, N), node = A->get_rankof( A, k, N+RD );
A->get_rankof( A, k, N+RD ) ); RUNTIME_data_migrate( sequence, A(k, N), node );
RUNTIME_data_migrate( sequence, A(k, N+RD), RUNTIME_data_migrate( sequence, A(k, N+RD), node );
A->get_rankof( A, k, N+RD ) );
/* TT kernel */ /* TT kernel */
INSERT_TASK_ztplqt( INSERT_TASK_ztplqt(
...@@ -184,10 +183,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -184,10 +183,9 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
ldam = BLKLDD(A, m ); ldam = BLKLDD(A, m );
RUNTIME_data_migrate( sequence, A(m, N), node = A->get_rankof( A, m, N+RD );
A->get_rankof( A, m, N+RD ) ); RUNTIME_data_migrate( sequence, A(m, N), node );
RUNTIME_data_migrate( sequence, A(m, N+RD), RUNTIME_data_migrate( sequence, A(m, N+RD), node );
A->get_rankof( A, m, N+RD ) );
INSERT_TASK_ztpmlqt( INSERT_TASK_ztpmlqt(
&options, &options,
......
...@@ -22,16 +22,21 @@ ...@@ -22,16 +22,21 @@
#include <stdlib.h> #include <stdlib.h>
#include "libhqr.h" #include "libhqr.h"
#define A(m,n) A, (m), (n) #define A(m,n) A, (m), (n)
#define T(m,n) T, (m), (n) #define T(m,n) T, (m), (n)
#define D(m,n) D, (m), (n) #define D(m,n) D, (m), (n)
/** /**
* Parallel tile QR factorization (reduction Householder) - dynamic scheduling * Parallel tile QR factorization (reduction Householder) - dynamic scheduling
*
* @param[in] genD
* Indicate if the copies of the geqrt tiles must be done to speedup
* computations in updates.
*/ */
void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A, void chameleon_pzgeqrf_param( int genD, int K,
const libhqr_tree_t *qrtree, CHAM_desc_t *A,
CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
RUNTIME_option_t options; RUNTIME_option_t options;
...@@ -40,11 +45,10 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -40,11 +45,10 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
size_t ws_host = 0; size_t ws_host = 0;
int k, m, n, i, p; int k, m, n, i, p;
int K, L, nbgeqrt; int L, nbgeqrt;
int ldap, ldam, lddm; int ldap, ldam, lddm;
int tempkmin, tempkn, tempnn, tempmm; int tempkmin, tempkn, tempnn, tempmm;
int ib; int ib, node, nbtiles, *tiles;
int *tiles;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) if (sequence->status != CHAMELEON_SUCCESS)
...@@ -75,23 +79,22 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -75,23 +79,22 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
/* Initialisation of temporary tiles array */
tiles = (int*)calloc(qrtree->mt, sizeof(int));
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
ws_host *= sizeof(CHAMELEON_Complex64_t); ws_host *= sizeof(CHAMELEON_Complex64_t);
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
K = chameleon_min(A->mt, A->nt); /* Initialisation of temporary tiles array */
tiles = (int*)calloc(qrtree->mt, sizeof(int));
/* The number of the factorization */
for (k = 0; k < K; k++) { for (k = 0; k < K; k++) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
/* The number of geqrt to apply */ /* The number of geqrt to apply */
nbgeqrt = qrtree->getnbgeqrf(qrtree, k); nbgeqrt = qrtree->getnbgeqrf(qrtree, k);
T = TS;
for (i = 0; i < nbgeqrt; i++) { for (i = 0; i < nbgeqrt; i++) {
m = qrtree->getm(qrtree, k, i); m = qrtree->getm(qrtree, k, i);
tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
...@@ -99,13 +102,12 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -99,13 +102,12 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
lddm = BLKLDD(D, m); lddm = BLKLDD(D, m);
T = TS;
INSERT_TASK_zgeqrt( INSERT_TASK_zgeqrt(
&options, &options,
tempmm, tempkn, ib, T->nb, tempmm, tempkn, ib, T->nb,
A(m, k), ldam, A(m, k), ldam,
T(m, k), T->mb); T(m, k), T->mb);
if ( genD ) { if ( genD ) {
INSERT_TASK_zlacpy( INSERT_TASK_zlacpy(
&options, &options,
...@@ -120,6 +122,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -120,6 +122,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
D(m, k), lddm ); D(m, k), lddm );
#endif #endif
} }
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
INSERT_TASK_zunmqr( INSERT_TASK_zunmqr(
...@@ -135,9 +138,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -135,9 +138,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
} }
/* Setting the order of the tiles */ /* Setting the order of the tiles */
libhqr_walk_stepk( qrtree, k, tiles + (k+1) ); nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
for (i = k+1; i < A->mt; i++) { for (i = 0; i < nbtiles; i++) {
m = tiles[i]; m = tiles[i];
p = qrtree->currpiv(qrtree, k, m); p = qrtree->currpiv(qrtree, k, m);
...@@ -145,7 +148,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -145,7 +148,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
ldap = BLKLDD(A, p); ldap = BLKLDD(A, p);
ldam = BLKLDD(A, m); ldam = BLKLDD(A, m);
if (qrtree->gettype(qrtree, k, m) == 0) { if ( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
/* TS kernel */ /* TS kernel */
T = TS; T = TS;
L = 0; L = 0;
...@@ -156,10 +159,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -156,10 +159,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
L = tempmm; L = tempmm;
} }
RUNTIME_data_migrate( sequence, A(p, k), node = A->get_rankof( A, m, k );
A->get_rankof( A, m, k ) ); RUNTIME_data_migrate( sequence, A(p, k), node );
RUNTIME_data_migrate( sequence, A(m, k), RUNTIME_data_migrate( sequence, A(m, k), node );
A->get_rankof( A, m, k ) );
INSERT_TASK_ztpqrt( INSERT_TASK_ztpqrt(
&options, &options,
...@@ -171,10 +173,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -171,10 +173,9 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
RUNTIME_data_migrate( sequence, A(p, n), node = A->get_rankof( A, m, n );
A->get_rankof( A, m, n ) ); RUNTIME_data_migrate( sequence, A(p, n), node );
RUNTIME_data_migrate( sequence, A(m, n), RUNTIME_data_migrate( sequence, A(m, n), node );
A->get_rankof( A, m, n ) );
INSERT_TASK_ztpmqrt( INSERT_TASK_ztpmqrt(
&options, &options,
......
...@@ -46,7 +46,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -46,7 +46,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
int K, M, RD; int K, M, RD;
int ldaM, ldam, ldaMRD, lddM; int ldaM, ldam, ldaMRD, lddM;
int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm; int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
int ib; int ib, node;
chamctxt = chameleon_context_self(); chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) if (sequence->status != CHAMELEON_SUCCESS)
...@@ -166,10 +166,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -166,10 +166,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
ldaM = BLKLDD(A, M ); ldaM = BLKLDD(A, M );
ldaMRD = BLKLDD(A, M+RD); ldaMRD = BLKLDD(A, M+RD);
RUNTIME_data_migrate( sequence, A(M, k), node = A->get_rankof( A, M+RD, k );
A->get_rankof( A, M+RD, k ) ); RUNTIME_data_migrate( sequence, A(M, k), node );
RUNTIME_data_migrate( sequence, A(M+RD, k), RUNTIME_data_migrate( sequence, A(M+RD, k), node );
A->get_rankof( A, M+RD, k ) );
/* TT kernel */ /* TT kernel */
INSERT_TASK_ztpqrt( INSERT_TASK_ztpqrt(
...@@ -182,10 +181,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -182,10 +181,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
for (n = k+1; n < A->nt; n++) { for (n = k+1; n < A->nt; n++) {
tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
RUNTIME_data_migrate( sequence, A(M, n), node = A->get_rankof( A, M+RD, n );
A->get_rankof( A, M+RD, n ) ); RUNTIME_data_migrate( sequence, A(M, n), node );
RUNTIME_data_migrate( sequence, A(M+RD, n), RUNTIME_data_migrate( sequence, A(M+RD, n), node );
A->get_rankof( A, M+RD, n ) );
INSERT_TASK_ztpmqrt( INSERT_TASK_ztpmqrt(
&options, &options,
......
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
*/ */
#include "control/common.h" #include "control/common.h"
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n #define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n #define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n #define Q1(m,n) Q1, m, n
...@@ -31,11 +29,9 @@ ...@@ -31,11 +29,9 @@
/** /**
* Parallel tile QR factorization - dynamic scheduling * Parallel tile QR factorization - dynamic scheduling
*/ */
void chameleon_pztpgqrt( int genD, int L, void chameleon_pztpgqrt( int KT, int L,
CHAM_desc_t *V1, CHAM_desc_t *T1,
CHAM_desc_t *V2, CHAM_desc_t *T2, CHAM_desc_t *V2, CHAM_desc_t *T2,
CHAM_desc_t *Q1, CHAM_desc_t *Q2, CHAM_desc_t *Q1, CHAM_desc_t *Q2,
CHAM_desc_t *D,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{ {
CHAM_context_t *chamctxt; CHAM_context_t *chamctxt;
...@@ -46,7 +42,7 @@ void chameleon_pztpgqrt( int genD, int L, ...@@ -46,7 +42,7 @@ void chameleon_pztpgqrt( int genD, int L,
int k, m, n; int k, m, n;
int ldvk, ldvm, lddk; int ldvk, ldvm, lddk;
int ldqk, ldqm; int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm; int tempkn, tempnn, tempmm, templm;
int ib; int ib;
/* Dimension of the first column */ /* Dimension of the first column */
...@@ -61,11 +57,6 @@ void chameleon_pztpgqrt( int genD, int L, ...@@ -61,11 +57,6 @@ void chameleon_pztpgqrt( int genD, int L,
ib = CHAMELEON_IB; ib = CHAMELEON_IB;
if ( D == NULL ) {
D = V1;
genD = 0;
}
/* /*
* ztpmqrt = Q1->nb * ib * ztpmqrt = Q1->nb * ib
*/ */
...@@ -85,21 +76,17 @@ void chameleon_pztpgqrt( int genD, int L, ...@@ -85,21 +76,17 @@ void chameleon_pztpgqrt( int genD, int L,
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
for (k = V1->nt-1; k >= 0; k--) { for (k = KT-1; k >= 0; k--) {
RUNTIME_iteration_push(chamctxt, k); RUNTIME_iteration_push(chamctxt, k);
tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb; tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
ldvk = BLKLDD(V1, k);
lddk = BLKLDD(D, k);
ldqk = BLKLDD(Q1, k); ldqk = BLKLDD(Q1, k);
/* Equivalent to the tsmqr step on Q1,Q2 */ /* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt,