Commit 4b3a2f82 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'tpqrt/hotfix' into 'master'

Modify the interface of the tpgqrt routines, and fix some numerical bug within the routine

See merge request !8
parents 14486691 65aadc18
......@@ -24,30 +24,42 @@
**/
#include "control/common.h"
#define V(m,n) V, m, n
#define T(m,n) T, m, n
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) V1, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldvm, ldbm;
int tempkn, tempnn, tempmm, templm;
int ib;
int ldvk, ldvm;
int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT;
/* Dimension of the first column */
int maxm = B->m - L;
int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1);
int maxm = Q2->m - L;
int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
int maxmtk;
morse = morse_context_self();
......@@ -57,18 +69,24 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/*
* ztpmqrt = A->nb * ib
* ztpmqrt = Q1->nb * ib
*/
ws_worker = A->nb * ib;
ws_worker = Q1->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 2 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
#endif
ws_worker *= sizeof(MORSE_Complex64_t);
......@@ -76,31 +94,91 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
for (k = V->nt-1; k >= 0; k--) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) {
tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
ldvk = BLKLDD(V1, k);
ldqk = BLKLDD(Q1, k);
maxmtk = chameleon_min( B->mt, maxmt+k ) - 1;
/* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
for (m = maxmtk; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V, m);
ldbm = BLKLDD(B, m);
tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V2, m);
ldqm = BLKLDD(Q2, m);
for (n = k; n < Q2->nt; n++) {
tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, templm, ib, T2->nb,
V2(m, k), ldvm,
T2(m, k), T2->mb,
Q1(k, n), ldqk,
Q2(m, n), ldqm );
}
}
for (n = k; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
for (m = Q1->mt - 1; m > k; m--) {
tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
ldvm = BLKLDD(V1, m);
ldqm = BLKLDD(Q1, m);
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
tempmm, tempnn, tempkn, templm, ib, T->nb,
V(m, k), ldvm,
T(m, k), T->mb,
A(k, n), ldak,
B(m, n), ldbm );
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, 0, ib, T1->nb,
V1(m, k), ldvm,
T1(m, k), T1->mb,
Q1(k, n), ldqk,
Q1(m, n), ldqm );
}
}
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk,
DIAG(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkk,
0., 1.,
DIAG(k), ldvk );
#endif
#endif
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_zunmqr(
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk,
T1(k, k), T1->mb,
Q1(k, n), ldqk);
}
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -37,68 +37,85 @@
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrices B, and V. M >= 0.
* The number of rows of the matrices Q2, and V2. M >= 0.
*
* @param[in] N
* The number of columns of the matrices B, and A. N >= 0.
* The number of columns of the matrices Q1, and Q1. N >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q in the matrix V.
* The number of rows of the matrices V1, and Q1.
* The number of columns of the matrices V1, and V2.
*
* @param[in] L
* The number of rows of the upper trapezoidal part of V.
* The number of rows of the upper trapezoidal part of V2.
* MIN(M,N) >= L >= 0. See Further Details.
*
* @param[in] V
* @param[in] V1
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V.
* V is matrx of size M-by-K. The first M-L rows
* MORSE_ztpqrt().
* V1 is a matrix of size K-by-K.
*
* @param[in] LDV1
* The leading dimension of the array V1. LDV1 >= max(1,K).
*
* @param[int] descT1
* The auxiliary factorization data generated by the call to
* MORSE_zgeqrf() on V1.
*
* @param[in] V2
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V2.
* V2 is a matrix of size M-by-K. The first M-L rows
* are rectangular, and the last L rows are upper trapezoidal.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
* @param[in] LDV2
* The leading dimension of the array V2. LDV2 >= max(1,M).
*
* @param[int] descT
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to
* solve the system of equations, or by any function to apply the Q.
* @param[int] descT2
* The auxiliary factorization data, generated by the call to
* MORSE_ztpqrt() on V1 and V2, and associated to V2.
*
* @param[in,out] A
* A is COMPLEX*16 array, dimension (LDA,N)
* On entry, the K-by-N matrix A.
* On exit, A is overwritten by the corresponding block of
* Q*A. See Further Details.
* @param[in,out] Q1
* Q1 is COMPLEX*16 array, dimension (LDQ1,N)
* On entry, the K-by-N matrix Q1.
* On exit, Q1 is overwritten by the corresponding block of
* Q*Q1. See Further Details.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,K).
* @param[in] LDQ1
* The leading dimension of the array Q1. LDQ1 >= max(1,K).
*
* @param[in,out] B
* On entry, the pentagonal M-by-N matrix B.
* On exit, B contains Q.
* @param[in,out] Q2
* On entry, the pentagonal M-by-N matrix Q2.
* On exit, Q2 contains Q.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,M).
* @param[in] LDQ2
* The leading dimension of the array Q2. LDQ2 >= max(1,M).
*
* @par Further Details:
* =====================
*
* The input matrix Q is a (K+M)-by-N matrix
*
* Q = [ A ]
* [ B ]
* Q = [ Q1 ]
* [ Q2 ]
*
* where A is an identity matrix, and B is a M-by-N matrix of 0.
* V a matrix of householder reflectors with a pentagonal shape consisting of a
* (M-L)-by-K rectangular matrix V1 on top of a L-by-N
* Upper trapezoidal matrix V2:
* where Q1 is an identity matrix, and Q2 is a M-by-N matrix of 0.
* V is a matrix of householder reflectors with a pentagonal shape consisting
* of a K-by-K rectangular matrix V1 on top of a matrix V2 composed of
* (M-L)-by-K rectangular part on top of a L-by-N upper trapezoidal matrix:
*
* V = [ V1 ] <- (M-L)-by-N rectangular
* [ V2 ] <- L-by-N upper trapezoidal.
* V = [ V1 ] <- K-by-K rectangular
* [ V2a ] <- (M-L)-by-K rectangular
* [ V2b ] <- L-by-K upper trapezoidal.
*
* The upper trapezoidal matrix V2 consists of the first L rows of a
* K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0,
* V is rectangular M-by-K; if M=L=K, V is upper triangular.
* The upper trapezoidal part of the matrix V2 consists of the first L rows of
* a K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, V2 is
* rectangular M-by-K; if M=L=K, V2 is upper triangular. Those are the two
* cases only handled for now.
*
*******************************************************************************
*
......@@ -117,17 +134,17 @@
*
******************************************************************************/
int MORSE_ztpgqrt( int M, int N, int K, int L,
MORSE_Complex64_t *V, int LDV,
MORSE_desc_t *descT,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB )
MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1,
MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2,
MORSE_Complex64_t *Q1, int LDQ1,
MORSE_Complex64_t *Q2, int LDQ2 )
{
int NB;
int status;
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
MORSE_desc_t descA, descB, descV;
MORSE_desc_t descQ1, descQ2, descV1, descV2;
int minMK = chameleon_min( M, K );
morse = morse_context_self();
......@@ -153,18 +170,26 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_error("MORSE_ztpgqrt", "illegal value of N");
return -4;
}
if (LDV < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV");
if (K != N) {
morse_error("MORSE_ztpgqrt", "illegal value of K and N. K must be equal to N");
return -3;
}
if (LDV1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV1");
return -6;
}
if (LDA < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDA");
if (LDV2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV2");
return -9;
}
if (LDB < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDB");
if (LDQ1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ1");
return -11;
}
if (LDQ2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ2");
return -13;
}
/* Quick return */
if (minMK == 0)
......@@ -183,34 +208,41 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_sequence_create(morse, &sequence);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooplap2tile( descV, V, NB, NB, LDB, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV)) );
morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA))) );
morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA)),
morse_desc_mat_free(&(descB))) );
morse_zooplap2tile( descV1, V1, NB, NB, LDV1, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV1)) );
morse_zooplap2tile( descV2, V2, NB, NB, LDV2, K, 0, 0, M, K, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2))) );
morse_zooplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1))) );
morse_zooplap2tile( descQ2, Q2, NB, NB, LDQ2, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1)),
morse_desc_mat_free(&(descQ2))) );
/* } else {*/
/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/
/* morse_ziplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, M, N,*/
/* sequence, &request);*/
/* }*/
/* Call the tile interface */
MORSE_ztpgqrt_Tile_Async(L, &descV, descT, &descA, &descB, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, &descV1, descT1, &descV2, descT2, &descQ1, &descQ2, sequence, &request);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request);
morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request);
morse_zooptile2lap(descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);
morse_zooptile2lap(descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);
morse_sequence_wait(morse, sequence);
morse_desc_mat_free(&descV);
morse_desc_mat_free(&descA);
morse_desc_mat_free(&descB);
morse_desc_mat_free(&descV1);
morse_desc_mat_free(&descV2);
morse_desc_mat_free(&descQ1);
morse_desc_mat_free(&descQ2);
/* } else {*/
/* morse_ziptile2lap( descV, V, NB, NB, LDV, K, sequence, &request);*/
/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/
/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/
/* morse_ziptile2lap( descV1, V1, NB, NB, LDV1, K, sequence, &request);*/
/* morse_ziptile2lap( descV2, V2, NB, NB, LDV2, K, sequence, &request);*/
/* morse_ziptile2lap( descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);*/
/* morse_ziptile2lap( descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);*/
/* morse_sequence_wait(morse, sequence);*/
/* }*/
......@@ -231,16 +263,6 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the M-by-N matrix A.
* On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N
* upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the
* diagonal represent the unitary matrix Q as a product of elementary reflectors stored
* by tiles.
*
* @param[out] T
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system
* of equations.
*
*******************************************************************************
*
......@@ -257,7 +279,10 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
* @sa MORSE_zgeqrs_Tile
*
******************************************************************************/
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B )
int MORSE_ztpgqrt_Tile( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2 )
{
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
......@@ -270,10 +295,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
return MORSE_ERR_NOT_INITIALIZED;
}
morse_sequence_create(morse, &sequence);
MORSE_ztpgqrt_Tile_Async(L, V, T, A, B, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, V1, T1, V2, T2, Q1, Q2, sequence, &request);
morse_sequence_wait(morse, sequence);
RUNTIME_desc_getoncpu(A);
RUNTIME_desc_getoncpu(B);
RUNTIME_desc_getoncpu(Q1);
RUNTIME_desc_getoncpu(Q2);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
......@@ -309,7 +334,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
* @sa MORSE_zgeqrs_Tile_Async
*
******************************************************************************/
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
......@@ -334,37 +362,47 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_des
return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (morse_desc_check(V) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(V1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(V2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(T2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(A) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid first descriptor");
if (morse_desc_check(Q1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(B) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid second descriptor");
if (morse_desc_check(Q2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
if (Q1->nb != Q1->mb) {
morse_error("MORSE_ztpgqrt_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (((B->m - L) % B->mb) != 0) {
if ( (L != 0) && (((Q2->m - L) % Q2->mb) != 0) ) {
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pztpgqrt(L, V, T, A, B, sequence, request);
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request );
/* } */
/* else { */
/* morse_pztpgqrtrh(A, T, MORSE_RHBLK, sequence, request); */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */
return MORSE_SUCCESS;
......
......@@ -348,7 +348,7 @@ int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc
morse_error("MORSE_ztpqrt_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (((B->m - L) % B->mb) != 0) {
if ( (L != 0) && (((B->m - L) % B->mb) != 0) ) {
morse_error("MORSE_ztpqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
......
......@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -102,7 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64
int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC);
int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V, int LDV, MORSE_desc_t *descT, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB );
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1, MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2, MORSE_Complex64_t *Q1, int LDQ1, MORSE_Complex64_t *Q2, int LDQ2 );
int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT );
int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB);
int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
......@@ -181,7 +181,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha,
int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C);
int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B );
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2 );
int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T );
int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B);
int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B);
......@@ -257,7 +257,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M
int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -49,6 +49,7 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),