Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ce8be021 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Change tpgqrt interface because ungqr already includes laset, so it can not be...

Change tpgqrt interface because ungqr already includes laset, so it can not be used as a two steps algorithms
parent 6826cefc
No related branches found
No related tags found
1 merge request!8Modify the interface of the tpgqrt routines, and fix some numerical bug within the routine
......@@ -24,30 +24,42 @@
**/
#include "control/common.h"
#define V(m,n) V, m, n
#define T(m,n) T, m, n
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) V1, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldvm, ldbm;
int tempkn, tempnn, tempmm, templm;
int ib;
int ldvk, ldvm;
int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT;
/* Dimension of the first column */
int maxm = B->m - L;
int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1);
int maxm = Q2->m - L;
int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
int maxmtk;
morse = morse_context_self();
......@@ -57,18 +69,24 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/*
* ztpmqrt = A->nb * ib
* ztpmqrt = Q1->nb * ib
*/
ws_worker = A->nb * ib;
ws_worker = Q1->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 2 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
#endif
ws_worker *= sizeof(MORSE_Complex64_t);
......@@ -76,31 +94,91 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
for (k = V->nt-1; k >= 0; k--) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) {
tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
ldvk = BLKLDD(V1, k);
ldqk = BLKLDD(Q1, k);
maxmtk = chameleon_min( B->mt, maxmt+k ) - 1;
/* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
for (m = maxmtk; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V, m);
ldbm = BLKLDD(B, m);
tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V2, m);
ldqm = BLKLDD(Q2, m);
for (n = k; n < Q2->nt; n++) {
tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, templm, ib, T2->nb,
V2(m, k), ldvm,
T2(m, k), T2->mb,
Q1(k, n), ldqk,
Q2(m, n), ldqm );
}
}
for (n = k; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
for (m = Q1->mt - 1; m > k; m--) {
tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
ldvm = BLKLDD(V1, m);
ldqm = BLKLDD(Q1, m);
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
tempmm, tempnn, tempkn, templm, ib, T->nb,
V(m, k), ldvm,
T(m, k), T->mb,
A(k, n), ldak,
B(m, n), ldbm );
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, 0, ib, T1->nb,
V1(m, k), ldvm,
T1(m, k), T1->mb,
Q1(k, n), ldqk,
Q1(m, n), ldqm );
}
}
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk,
DIAG(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkk,
0., 1.,
DIAG(k), ldvk );
#endif
#endif
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_zunmqr(
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk,
T1(k, k), T1->mb,
Q1(k, n), ldqk);
}
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -37,68 +37,85 @@
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrices B, and V. M >= 0.
* The number of rows of the matrices Q2, and V2. M >= 0.
*
* @param[in] N
* The number of columns of the matrices B, and A. N >= 0.
* The number of columns of the matrices Q1, and Q1. N >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q in the matrix V.
* The number of rows of the matrices V1, and Q1.
* The number of columns of the matrices V1, and V2.
*
* @param[in] L
* The number of rows of the upper trapezoidal part of V.
* The number of rows of the upper trapezoidal part of V2.
* MIN(M,N) >= L >= 0. See Further Details.
*
* @param[in] V
* @param[in] V1
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V.
* V is matrx of size M-by-K. The first M-L rows
* MORSE_ztpqrt().
* V1 is a matrix of size K-by-K.
*
* @param[in] LDV1
* The leading dimension of the array V1. LDV1 >= max(1,K).
*
* @param[int] descT1
* The auxiliary factorization data generated by the call to
* MORSE_zgeqrf() on V1.
*
* @param[in] V2
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V2.
* V2 is a matrix of size M-by-K. The first M-L rows
* are rectangular, and the last L rows are upper trapezoidal.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
* @param[in] LDV2
* The leading dimension of the array V2. LDV2 >= max(1,M).
*
* @param[int] descT
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to
* solve the system of equations, or by any function to apply the Q.
* @param[int] descT2
* The auxiliary factorization data, generated by the call to
* MORSE_ztpqrt() on V1 and V2, and associated to V2.
*
* @param[in,out] A
* A is COMPLEX*16 array, dimension (LDA,N)
* On entry, the K-by-N matrix A.
* On exit, A is overwritten by the corresponding block of
* Q*A. See Further Details.
* @param[in,out] Q1
* Q1 is COMPLEX*16 array, dimension (LDQ1,N)
* On entry, the K-by-N matrix Q1.
* On exit, Q1 is overwritten by the corresponding block of
* Q*Q1. See Further Details.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,K).
* @param[in] LDQ1
* The leading dimension of the array Q1. LDQ1 >= max(1,K).
*
* @param[in,out] B
* On entry, the pentagonal M-by-N matrix B.
* On exit, B contains Q.
* @param[in,out] Q2
* On entry, the pentagonal M-by-N matrix Q2.
* On exit, Q2 contains Q.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,M).
* @param[in] LDQ2
* The leading dimension of the array Q2. LDQ2 >= max(1,M).
*
* @par Further Details:
* =====================
*
* The input matrix Q is a (K+M)-by-N matrix
*
* Q = [ A ]
* [ B ]
* Q = [ Q1 ]
* [ Q2 ]
*
* where A is an identity matrix, and B is a M-by-N matrix of 0.
* V a matrix of householder reflectors with a pentagonal shape consisting of a
* (M-L)-by-K rectangular matrix V1 on top of a L-by-N
* Upper trapezoidal matrix V2:
* where Q1 is an identity matrix, and Q2 is a M-by-N matrix of 0.
* V is a matrix of householder reflectors with a pentagonal shape consisting
* of a K-by-K rectangular matrix V1 on top of a matrix V2 composed of
* (M-L)-by-K rectangular part on top of a L-by-N upper trapezoidal matrix:
*
* V = [ V1 ] <- (M-L)-by-N rectangular
* [ V2 ] <- L-by-N upper trapezoidal.
* V = [ V1 ] <- K-by-K rectangular
* [ V2a ] <- (M-L)-by-K rectangular
* [ V2b ] <- L-by-K upper trapezoidal.
*
* The upper trapezoidal matrix V2 consists of the first L rows of a
* K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0,
* V is rectangular M-by-K; if M=L=K, V is upper triangular.
* The upper trapezoidal part of the matrix V2 consists of the first L rows of
* a K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, V2 is
* rectangular M-by-K; if M=L=K, V2 is upper triangular. Those are the two
* cases only handled for now.
*
*******************************************************************************
*
......@@ -117,17 +134,17 @@
*
******************************************************************************/
int MORSE_ztpgqrt( int M, int N, int K, int L,
MORSE_Complex64_t *V, int LDV,
MORSE_desc_t *descT,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB )
MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1,
MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2,
MORSE_Complex64_t *Q1, int LDQ1,
MORSE_Complex64_t *Q2, int LDQ2 )
{
int NB;
int status;
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
MORSE_desc_t descA, descB, descV;
MORSE_desc_t descQ1, descQ2, descV1, descV2;
int minMK = chameleon_min( M, K );
morse = morse_context_self();
......@@ -153,18 +170,26 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_error("MORSE_ztpgqrt", "illegal value of N");
return -4;
}
if (LDV < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV");
if (K != N) {
morse_error("MORSE_ztpgqrt", "illegal value of K and N. K must be equal to N");
return -3;
}
if (LDV1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV1");
return -6;
}
if (LDA < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDA");
if (LDV2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV2");
return -9;
}
if (LDB < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDB");
if (LDQ1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ1");
return -11;
}
if (LDQ2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ2");
return -13;
}
/* Quick return */
if (minMK == 0)
......@@ -183,34 +208,41 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_sequence_create(morse, &sequence);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooplap2tile( descV, V, NB, NB, LDB, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV)) );
morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA))) );
morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA)),
morse_desc_mat_free(&(descB))) );
morse_zooplap2tile( descV1, V1, NB, NB, LDV1, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV1)) );
morse_zooplap2tile( descV2, V2, NB, NB, LDV2, K, 0, 0, M, K, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2))) );
morse_zooplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1))) );
morse_zooplap2tile( descQ2, Q2, NB, NB, LDQ2, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1)),
morse_desc_mat_free(&(descQ2))) );
/* } else {*/
/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/
/* morse_ziplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, M, N,*/
/* sequence, &request);*/
/* }*/
/* Call the tile interface */
MORSE_ztpgqrt_Tile_Async(L, &descV, descT, &descA, &descB, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, &descV1, descT1, &descV2, descT2, &descQ1, &descQ2, sequence, &request);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request);
morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request);
morse_zooptile2lap(descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);
morse_zooptile2lap(descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);
morse_sequence_wait(morse, sequence);
morse_desc_mat_free(&descV);
morse_desc_mat_free(&descA);
morse_desc_mat_free(&descB);
morse_desc_mat_free(&descV1);
morse_desc_mat_free(&descV2);
morse_desc_mat_free(&descQ1);
morse_desc_mat_free(&descQ2);
/* } else {*/
/* morse_ziptile2lap( descV, V, NB, NB, LDV, K, sequence, &request);*/
/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/
/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/
/* morse_ziptile2lap( descV1, V1, NB, NB, LDV1, K, sequence, &request);*/
/* morse_ziptile2lap( descV2, V2, NB, NB, LDV2, K, sequence, &request);*/
/* morse_ziptile2lap( descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);*/
/* morse_ziptile2lap( descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);*/
/* morse_sequence_wait(morse, sequence);*/
/* }*/
......@@ -231,16 +263,6 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the M-by-N matrix A.
* On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N
* upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the
* diagonal represent the unitary matrix Q as a product of elementary reflectors stored
* by tiles.
*
* @param[out] T
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system
* of equations.
*
*******************************************************************************
*
......@@ -257,7 +279,10 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
* @sa MORSE_zgeqrs_Tile
*
******************************************************************************/
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B )
int MORSE_ztpgqrt_Tile( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2 )
{
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
......@@ -270,10 +295,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
return MORSE_ERR_NOT_INITIALIZED;
}
morse_sequence_create(morse, &sequence);
MORSE_ztpgqrt_Tile_Async(L, V, T, A, B, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, V1, T1, V2, T2, Q1, Q2, sequence, &request);
morse_sequence_wait(morse, sequence);
RUNTIME_desc_getoncpu(A);
RUNTIME_desc_getoncpu(B);
RUNTIME_desc_getoncpu(Q1);
RUNTIME_desc_getoncpu(Q2);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
......@@ -309,7 +334,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
* @sa MORSE_zgeqrs_Tile_Async
*
******************************************************************************/
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
......@@ -334,37 +362,47 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_des
return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (morse_desc_check(V) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(V1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(V2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(T2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(A) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid first descriptor");
if (morse_desc_check(Q1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(B) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid second descriptor");
if (morse_desc_check(Q2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
if (Q1->nb != Q1->mb) {
morse_error("MORSE_ztpgqrt_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (((B->m - L) % B->mb) != 0) {
if (((Q2->m - L) % Q2->mb) != 0) {
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pztpgqrt(L, V, T, A, B, sequence, request);
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request );
/* } */
/* else { */
/* morse_pztpgqrtrh(A, T, MORSE_RHBLK, sequence, request); */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */
return MORSE_SUCCESS;
......
......@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -102,7 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64
int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC);
int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V, int LDV, MORSE_desc_t *descT, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB );
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1, MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2, MORSE_Complex64_t *Q1, int LDQ1, MORSE_Complex64_t *Q2, int LDQ2 );
int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT );
int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB);
int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
......@@ -181,7 +181,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha,
int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C);
int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B );
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2 );
int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T );
int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B);
int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B);
......@@ -257,7 +257,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M
int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment