Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 4b3a2f82 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'tpqrt/hotfix' into 'master'

Modify the interface of the tpgqrt routines, and fix some numerical bug within the routine

See merge request !8
parents 14486691 65aadc18
No related branches found
No related tags found
1 merge request!8Modify the interface of the tpgqrt routines, and fix some numerical bug within the routine
Pipeline #
......@@ -24,30 +24,42 @@
**/
#include "control/common.h"
#define V(m,n) V, m, n
#define T(m,n) T, m, n
#define A(m,n) A, m, n
#define B(m,n) B, m, n
#define V1(m,n) V1, m, n
#define T1(m,n) T1, m, n
#define V2(m,n) V2, m, n
#define T2(m,n) T2, m, n
#define Q1(m,n) Q1, m, n
#define Q2(m,n) Q2, m, n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) V1, k, k
#endif
/***************************************************************************//**
* Parallel tile QR factorization - dynamic scheduling
**/
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
void morse_pztpgqrt( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *DIAG = NULL;
int k, m, n;
int ldak, ldvm, ldbm;
int tempkn, tempnn, tempmm, templm;
int ib;
int ldvk, ldvm;
int ldqk, ldqm;
int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
int ib, minMT;
/* Dimension of the first column */
int maxm = B->m - L;
int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1);
int maxm = Q2->m - L;
int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
int maxmtk;
morse = morse_context_self();
......@@ -57,18 +69,24 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
ib = MORSE_IB;
if (V1->m > V1->n) {
minMT = V1->nt;
} else {
minMT = V1->mt;
}
/*
* ztpmqrt = A->nb * ib
* ztpmqrt = Q1->nb * ib
*/
ws_worker = A->nb * ib;
ws_worker = Q1->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 2 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
#endif
ws_worker *= sizeof(MORSE_Complex64_t);
......@@ -76,31 +94,91 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
for (k = V->nt-1; k >= 0; k--) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
ldak = BLKLDD(A, k);
#if defined(CHAMELEON_COPY_DIAG)
/* necessary to avoid dependencies between tasks regarding the diag tile */
DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif
for (k = V1->nt-1; k >= 0; k--) {
tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
ldvk = BLKLDD(V1, k);
ldqk = BLKLDD(Q1, k);
maxmtk = chameleon_min( B->mt, maxmt+k ) - 1;
/* Equivalent to the tsmqr step on Q1,Q2 */
maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
for (m = maxmtk; m > -1; m--) {
tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V, m);
ldbm = BLKLDD(B, m);
tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
templm = m == maxmtk ? tempmm : 0;
ldvm = BLKLDD(V2, m);
ldqm = BLKLDD(Q2, m);
for (n = k; n < Q2->nt; n++) {
tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, templm, ib, T2->nb,
V2(m, k), ldvm,
T2(m, k), T2->mb,
Q1(k, n), ldqk,
Q2(m, n), ldqm );
}
}
for (n = k; n < B->nt; n++) {
tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
for (m = Q1->mt - 1; m > k; m--) {
tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
ldvm = BLKLDD(V1, m);
ldqm = BLKLDD(Q1, m);
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_ztpmqrt(
&options,
MorseLeft, MorseConjTrans,
tempmm, tempnn, tempkn, templm, ib, T->nb,
V(m, k), ldvm,
T(m, k), T->mb,
A(k, n), ldak,
B(m, n), ldbm );
MorseLeft, MorseNoTrans,
tempmm, tempnn, tempkn, 0, ib, T1->nb,
V1(m, k), ldvm,
T1(m, k), T1->mb,
Q1(k, n), ldqk,
Q1(m, n), ldqm );
}
}
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseLower, tempkm, tempkk, V1->nb,
V1(k, k), ldvk,
DIAG(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseUpper, tempkm, tempkk,
0., 1.,
DIAG(k), ldvk );
#endif
#endif
for (n = k; n < Q1->nt; n++) {
tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
MORSE_TASK_zunmqr(
&options,
MorseLeft, MorseNoTrans,
tempkm, tempnn, tempkk, ib, T1->nb,
DIAG(k), ldvk,
T1(k, k), T1->mb,
Q1(k, n), ldqk);
}
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(DIAG);
free(DIAG);
#endif
(void)DIAG;
}
......@@ -37,68 +37,85 @@
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrices B, and V. M >= 0.
* The number of rows of the matrices Q2, and V2. M >= 0.
*
* @param[in] N
* The number of columns of the matrices B, and A. N >= 0.
* The number of columns of the matrices Q1, and Q1. N >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q in the matrix V.
* The number of rows of the matrices V1, and Q1.
* The number of columns of the matrices V1, and V2.
*
* @param[in] L
* The number of rows of the upper trapezoidal part of V.
* The number of rows of the upper trapezoidal part of V2.
* MIN(M,N) >= L >= 0. See Further Details.
*
* @param[in] V
* @param[in] V1
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V.
* V is matrx of size M-by-K. The first M-L rows
* MORSE_ztpqrt().
* V1 is a matrix of size K-by-K.
*
* @param[in] LDV1
* The leading dimension of the array V1. LDV1 >= max(1,K).
*
* @param[int] descT1
* The auxiliary factorization data generated by the call to
* MORSE_zgeqrf() on V1.
*
* @param[in] V2
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* MORSE_ztpqrt() in the first k rows of its array argument V2.
* V2 is a matrix of size M-by-K. The first M-L rows
* are rectangular, and the last L rows are upper trapezoidal.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
* @param[in] LDV2
* The leading dimension of the array V2. LDV2 >= max(1,M).
*
* @param[int] descT
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to
* solve the system of equations, or by any function to apply the Q.
* @param[int] descT2
* The auxiliary factorization data, generated by the call to
* MORSE_ztpqrt() on V1 and V2, and associated to V2.
*
* @param[in,out] A
* A is COMPLEX*16 array, dimension (LDA,N)
* On entry, the K-by-N matrix A.
* On exit, A is overwritten by the corresponding block of
* Q*A. See Further Details.
* @param[in,out] Q1
* Q1 is COMPLEX*16 array, dimension (LDQ1,N)
* On entry, the K-by-N matrix Q1.
* On exit, Q1 is overwritten by the corresponding block of
* Q*Q1. See Further Details.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,K).
* @param[in] LDQ1
* The leading dimension of the array Q1. LDQ1 >= max(1,K).
*
* @param[in,out] B
* On entry, the pentagonal M-by-N matrix B.
* On exit, B contains Q.
* @param[in,out] Q2
* On entry, the pentagonal M-by-N matrix Q2.
* On exit, Q2 contains Q.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,M).
* @param[in] LDQ2
* The leading dimension of the array Q2. LDQ2 >= max(1,M).
*
* @par Further Details:
* =====================
*
* The input matrix Q is a (K+M)-by-N matrix
*
* Q = [ A ]
* [ B ]
* Q = [ Q1 ]
* [ Q2 ]
*
* where A is an identity matrix, and B is a M-by-N matrix of 0.
* V a matrix of householder reflectors with a pentagonal shape consisting of a
* (M-L)-by-K rectangular matrix V1 on top of a L-by-N
* Upper trapezoidal matrix V2:
* where Q1 is an identity matrix, and Q2 is a M-by-N matrix of 0.
* V is a matrix of householder reflectors with a pentagonal shape consisting
* of a K-by-K rectangular matrix V1 on top of a matrix V2 composed of
* (M-L)-by-K rectangular part on top of a L-by-N upper trapezoidal matrix:
*
* V = [ V1 ] <- (M-L)-by-N rectangular
* [ V2 ] <- L-by-N upper trapezoidal.
* V = [ V1 ] <- K-by-K rectangular
* [ V2a ] <- (M-L)-by-K rectangular
* [ V2b ] <- L-by-K upper trapezoidal.
*
* The upper trapezoidal matrix V2 consists of the first L rows of a
* K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0,
* V is rectangular M-by-K; if M=L=K, V is upper triangular.
* The upper trapezoidal part of the matrix V2 consists of the first L rows of
* a K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, V2 is
* rectangular M-by-K; if M=L=K, V2 is upper triangular. Those are the two
* cases only handled for now.
*
*******************************************************************************
*
......@@ -117,17 +134,17 @@
*
******************************************************************************/
int MORSE_ztpgqrt( int M, int N, int K, int L,
MORSE_Complex64_t *V, int LDV,
MORSE_desc_t *descT,
MORSE_Complex64_t *A, int LDA,
MORSE_Complex64_t *B, int LDB )
MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1,
MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2,
MORSE_Complex64_t *Q1, int LDQ1,
MORSE_Complex64_t *Q2, int LDQ2 )
{
int NB;
int status;
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
MORSE_desc_t descA, descB, descV;
MORSE_desc_t descQ1, descQ2, descV1, descV2;
int minMK = chameleon_min( M, K );
morse = morse_context_self();
......@@ -153,18 +170,26 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_error("MORSE_ztpgqrt", "illegal value of N");
return -4;
}
if (LDV < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV");
if (K != N) {
morse_error("MORSE_ztpgqrt", "illegal value of K and N. K must be equal to N");
return -3;
}
if (LDV1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV1");
return -6;
}
if (LDA < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDA");
if (LDV2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDV2");
return -9;
}
if (LDB < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDB");
if (LDQ1 < chameleon_max(1, K)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ1");
return -11;
}
if (LDQ2 < chameleon_max(1, M)) {
morse_error("MORSE_ztpgqrt", "illegal value of LDQ2");
return -13;
}
/* Quick return */
if (minMK == 0)
......@@ -183,34 +208,41 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
morse_sequence_create(morse, &sequence);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooplap2tile( descV, V, NB, NB, LDB, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV)) );
morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA))) );
morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV)),
morse_desc_mat_free(&(descA)),
morse_desc_mat_free(&(descB))) );
morse_zooplap2tile( descV1, V1, NB, NB, LDV1, K, 0, 0, M, K, sequence, &request,
morse_desc_mat_free(&(descV1)) );
morse_zooplap2tile( descV2, V2, NB, NB, LDV2, K, 0, 0, M, K, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2))) );
morse_zooplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, K, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1))) );
morse_zooplap2tile( descQ2, Q2, NB, NB, LDQ2, N, 0, 0, M, N, sequence, &request,
(morse_desc_mat_free(&(descV1)),
morse_desc_mat_free(&(descV2)),
morse_desc_mat_free(&(descQ1)),
morse_desc_mat_free(&(descQ2))) );
/* } else {*/
/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/
/* morse_ziplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, M, N,*/
/* sequence, &request);*/
/* }*/
/* Call the tile interface */
MORSE_ztpgqrt_Tile_Async(L, &descV, descT, &descA, &descB, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, &descV1, descT1, &descV2, descT2, &descQ1, &descQ2, sequence, &request);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request);
morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request);
morse_zooptile2lap(descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);
morse_zooptile2lap(descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);
morse_sequence_wait(morse, sequence);
morse_desc_mat_free(&descV);
morse_desc_mat_free(&descA);
morse_desc_mat_free(&descB);
morse_desc_mat_free(&descV1);
morse_desc_mat_free(&descV2);
morse_desc_mat_free(&descQ1);
morse_desc_mat_free(&descQ2);
/* } else {*/
/* morse_ziptile2lap( descV, V, NB, NB, LDV, K, sequence, &request);*/
/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/
/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/
/* morse_ziptile2lap( descV1, V1, NB, NB, LDV1, K, sequence, &request);*/
/* morse_ziptile2lap( descV2, V2, NB, NB, LDV2, K, sequence, &request);*/
/* morse_ziptile2lap( descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);*/
/* morse_ziptile2lap( descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);*/
/* morse_sequence_wait(morse, sequence);*/
/* }*/
......@@ -231,16 +263,6 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the M-by-N matrix A.
* On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N
* upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the
* diagonal represent the unitary matrix Q as a product of elementary reflectors stored
* by tiles.
*
* @param[out] T
* On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system
* of equations.
*
*******************************************************************************
*
......@@ -257,7 +279,10 @@ int MORSE_ztpgqrt( int M, int N, int K, int L,
* @sa MORSE_zgeqrs_Tile
*
******************************************************************************/
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B )
int MORSE_ztpgqrt_Tile( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2 )
{
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
......@@ -270,10 +295,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
return MORSE_ERR_NOT_INITIALIZED;
}
morse_sequence_create(morse, &sequence);
MORSE_ztpgqrt_Tile_Async(L, V, T, A, B, sequence, &request);
MORSE_ztpgqrt_Tile_Async(L, V1, T1, V2, T2, Q1, Q2, sequence, &request);
morse_sequence_wait(morse, sequence);
RUNTIME_desc_getoncpu(A);
RUNTIME_desc_getoncpu(B);
RUNTIME_desc_getoncpu(Q1);
RUNTIME_desc_getoncpu(Q2);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
......@@ -309,7 +334,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A
* @sa MORSE_zgeqrs_Tile_Async
*
******************************************************************************/
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B,
int MORSE_ztpgqrt_Tile_Async( int L,
MORSE_desc_t *V1, MORSE_desc_t *T1,
MORSE_desc_t *V2, MORSE_desc_t *T2,
MORSE_desc_t *Q1, MORSE_desc_t *Q2,
MORSE_sequence_t *sequence, MORSE_request_t *request )
{
MORSE_context_t *morse;
......@@ -334,37 +362,47 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_des
return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (morse_desc_check(V) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(V1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(V2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid V2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(T) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor");
if (morse_desc_check(T2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid T2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(A) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid first descriptor");
if (morse_desc_check(Q1) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q1 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(B) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid second descriptor");
if (morse_desc_check(Q2) != MORSE_SUCCESS) {
morse_error("MORSE_ztpgqrt_Tile", "invalid Q2 descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
if (Q1->nb != Q1->mb) {
morse_error("MORSE_ztpgqrt_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (((B->m - L) % B->mb) != 0) {
if ( (L != 0) && (((Q2->m - L) % Q2->mb) != 0) ) {
morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
morse_pztpgqrt(L, V, T, A, B, sequence, request);
morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request );
/* } */
/* else { */
/* morse_pztpgqrtrh(A, T, MORSE_RHBLK, sequence, request); */
/* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
/* } */
return MORSE_SUCCESS;
......
......@@ -348,7 +348,7 @@ int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc
morse_error("MORSE_ztpqrt_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (((B->m - L) % B->mb) != 0) {
if ( (L != 0) && (((B->m - L) % B->mb) != 0) ) {
morse_error("MORSE_ztpqrt_Tile", "Triangular part must be aligned with tiles");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
......
......@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -102,7 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64
int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC);
int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V, int LDV, MORSE_desc_t *descT, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB );
int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1, MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2, MORSE_Complex64_t *Q1, int LDQ1, MORSE_Complex64_t *Q2, int LDQ2 );
int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT );
int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB);
int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
......@@ -181,7 +181,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha,
int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C);
int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B );
int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2 );
int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T );
int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B);
int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B);
......@@ -257,7 +257,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M
int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -49,6 +49,7 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
......
......@@ -43,6 +43,7 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options,
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
......
......@@ -89,6 +89,7 @@ set(ZSRC
#testing_zhegst.c
#testing_zhegv.c
#testing_zhegvd.c
testing_zgeqrf_qdwh.c
)
# Add include and link directories
......
......@@ -304,6 +304,9 @@ int main (int argc, char **argv)
/* else if ( strcmp(func, "GETMI") == 0 ) { */
/* info += testing_zgetmi( argc, argv ); */
/* } */
else if ( strcmp(func, "GEQRF_QDWH") == 0 ) {
info += testing_zgeqrf_qdwh( argc, argv );
}
else {
fprintf(stderr, "Function unknown\n");
}
......
......@@ -108,6 +108,8 @@ int testing_zhegst(int argc, char **argv);
int testing_zgecfi(int argc, char **argv);
int testing_zgetmi(int argc, char **argv);
int testing_zgeqrf_qdwh(int argc, char **argv);
#ifdef DOUBLE
int testing_zcposv(int argc, char **argv);
int testing_zcgesv(int argc, char **argv);
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file testing_zgels.c
*
* MORSE testing routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Bilel Hadri
* @author Hatem Ltaief
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
**/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <morse.h>
#include <coreblas/include/cblas.h>
#include <coreblas/include/lapacke.h>
#include <coreblas/include/coreblas.h>
#include <coreblas/include/coreblas_z.h>
#include "testing_zauxiliary.h"
#undef REAL
#define COMPLEX
static int check_orthogonality(int, int, const MORSE_Complex64_t*, int, double);
static int check_factorization(int, int, const MORSE_Complex64_t*, int, const MORSE_Complex64_t*, int, MORSE_Complex64_t*, int, double);
int testing_zgeqrf_qdwh(int argc, char **argv)
{
int hres = 0;
if ( argc != 4 ) {
USAGE("GEQRF_QDWH", "optid M NB LDA",
" - optid: Take into account the fact that A2 is Id or not\n"
" - M : number of rows of the matrix A1 and A2\n"
" - NB : tile size\n"
" - IB : inner tile size\n");
return -1;
}
int optid = atoi(argv[0]) ? 1: 0;
int M = atoi(argv[1]);
int NB = atoi(argv[2]);
int IB = atoi(argv[3]);
int MxM = M * M;
int LDA = 2*M;
double eps;
int info_ortho, info_solution, info_factorization;
int i, j;
/**
* Compute A = QR with
*
* A = [ A1 ] and Q = [ Q1 ]
* [ A2 ] = [ Q2 ]
*
* and where A1 is the same size as A2
*
*/
MORSE_Complex64_t *A1 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t));
MORSE_Complex64_t *A2 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t));
MORSE_Complex64_t *Q1 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t));
MORSE_Complex64_t *Q2 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t));
MORSE_Complex64_t *A = (MORSE_Complex64_t *)malloc(2*M*M*sizeof(MORSE_Complex64_t));
MORSE_Complex64_t *Q;
MORSE_desc_t *T1, *T2;
/* Check if unable to allocate memory */
if ( (!A) || (!A1) || (!A2) || (!Q1) || (!Q2) ){
printf("Out of Memory \n ");
return -2;
}
MORSE_Disable(MORSE_AUTOTUNING);
MORSE_Set(MORSE_TILE_SIZE, NB);
MORSE_Set(MORSE_INNER_BLOCK_SIZE, IB);
MORSE_Alloc_Workspace_zgels(M, M, &T1, 1, 1);
MORSE_Alloc_Workspace_zgels(M, M, &T2, 1, 1);
eps = LAPACKE_dlamch('e');
/* Initialize A1, A2, and A */
LAPACKE_zlarnv_work(IONE, ISEED, MxM, A1);
LAPACKE_zlaset_work( LAPACK_COL_MAJOR, 'A', M, M, 0., 1., A2, M );
LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, A1, M, A, LDA );
LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, A2, M, A + M, LDA );
/* Factorize A */
MORSE_zgeqrf( M, M, A1, M, T1 );
MORSE_ztpqrt( M, M, optid ? M : 0,
A1, M,
A2, M, T2 );
/* Generate the Q */
MORSE_ztpgqrt( M, M, M, (optid) ? M : 0,
A1, M, T1, A2, M, T2, Q1, M, Q2, M );
/* Copy Q in a single matrix */
Q = (MORSE_Complex64_t *)malloc(2*M*M*sizeof(MORSE_Complex64_t));
LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, Q1, M, Q, LDA );
free(Q1);
LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, Q2, M, Q + M, LDA );
free(Q2);
printf("\n");
printf("------ TESTS FOR CHAMELEON ZGELS ROUTINE ------- \n");
printf(" Size of the Matrix %d by %d\n", M, M);
printf("\n");
printf(" The matrix A is randomly generated for each test.\n");
printf("============\n");
printf(" The relative machine precision (eps) is to be %e \n",eps);
printf(" Computational tests pass if scaled residuals are less than 60.\n");
/* Check the orthogonality, factorization and the solution */
info_ortho = check_orthogonality( 2*M, M, Q, LDA, eps );
info_factorization = check_factorization( 2*M, M, A, LDA, A1, M, Q, LDA, eps );
if ((info_factorization == 0) & (info_ortho == 0)) {
printf("***************************************************\n");
printf(" ---- TESTING ZGELS ...................... PASSED !\n");
printf("***************************************************\n");
}
else {
printf("************************************************\n");
printf(" - TESTING ZGELS ... FAILED !\n"); hres++;
printf("************************************************\n");
}
free(A); free(A1); free(A2); free(Q);
MORSE_Dealloc_Workspace( &T1 );
MORSE_Dealloc_Workspace( &T2 );
return hres;
}
/*-------------------------------------------------------------------
* Check the orthogonality of Q
*/
static int
check_orthogonality( int M, int N,
const MORSE_Complex64_t *Q, int LDQ,
double eps )
{
MORSE_Complex64_t *Id;
double alpha, beta;
double normQ;
int info_ortho;
int i;
int minMN = min(M, N);
double *work = (double *)malloc(minMN*sizeof(double));
alpha = 1.0;
beta = -1.0;
/* Build the idendity matrix */
Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t));
LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN );
/* Perform Id - Q'Q */
if (M >= N)
cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, beta, Q, LDQ, alpha, Id, N);
else
cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, beta, Q, LDQ, alpha, Id, M);
normQ = LAPACKE_zlansy_work( LAPACK_COL_MAJOR, 'I', 'U', minMN, Id, minMN, work );
printf("============\n");
printf("Checking the orthogonality of Q \n");
printf("||Id-Q'*Q||_oo / (N*eps) = %e \n", normQ/(minMN*eps));
if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) {
printf("-- Orthogonality is suspicious ! \n");
info_ortho=1;
}
else {
printf("-- Orthogonality is CORRECT ! \n");
info_ortho=0;
}
free(work); free(Id);
return info_ortho;
}
/*------------------------------------------------------------
* Check the factorization QR
*/
static int
check_factorization(int M, int N,
const MORSE_Complex64_t *A, int LDA,
const MORSE_Complex64_t *R, int LDR,
MORSE_Complex64_t *Q, int LDQ,
double eps )
{
double Anorm, Rnorm;
MORSE_Complex64_t alpha, beta;
int info_factorization;
int i,j;
double *work = (double *)malloc(max(M,N)*sizeof(double));
alpha = 1.0;
beta = 0.0;
if (M >= N) {
/* Perform Q = Q * R */
cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, M, N, CBLAS_SADDR(alpha), R, LDR, Q, LDQ);
}
else {
/* Perform Q = L * Q */
cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, M, N, CBLAS_SADDR(alpha), R, LDR, Q, LDQ);
}
/* Compute the Residual */
CORE_zgeadd( MorseNoTrans, M, N, -1., A, LDA, 1., Q, LDQ );
Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Q, LDQ, work );
Anorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, A, LDA, work );
if (M >= N) {
printf("============\n");
printf("Checking the QR Factorization \n");
printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
}
else {
printf("============\n");
printf("Checking the LQ Factorization \n");
printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
}
if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) {
printf("-- Factorization is suspicious ! \n");
info_factorization = 1;
}
else {
printf("-- Factorization is CORRECT ! \n");
info_factorization = 0;
}
free(work);
return info_factorization;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment