Commit 434f428d authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael

testing works for all, need to test more

parent abf1d506
......@@ -125,6 +125,7 @@ set(ZSRC
pztrtri.c
pzpotrimm.c
pzunglq.c
pzunglq_param.c
pzunglqrh.c
pzungqr.c
pzungqr_param.c
......@@ -143,6 +144,7 @@ set(ZSRC
zgelqf.c
zgelqf_param.c
zgelqs.c
zgelqs_param.c
zgeqrf.c
zgeqrf_param.c
zgeqrs.c
......@@ -175,6 +177,7 @@ set(ZSRC
zsytrs.c
ztrtri.c
zunglq.c
zunglq_param.c
zungqr.c
zungqr_param.c
zunmlq.c
......
......@@ -66,12 +66,12 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
ib = MORSE_IB;
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1)
* zttqrt = A->nb * (ib+1)
* ztsmqr = A->nb * ib
* zttmqr = A->nb * ib
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* zttlqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -79,8 +79,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file pzunglq_pram.c
*
* MORSE auxiliary routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2011-05-24
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
#define A(m,n) A, (m), (n)
#define Q(m,n) Q, (m), (n)
#define TS(m,n) TS, (m), (n)
#define TT(m,n) TT, (m), (n)
#if defined(CHAMELEON_COPY_DIAG)
#define D(m,n) D, ((n)/BS), 0
#else
#define D(m,n) A, (m), (n)
#endif
/**
* Parallel construction of Q using tile V - dynamic scheduling
*/
void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
MORSE_desc_t *TS, MORSE_desc_t *TT,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *D = NULL;
int k, m, n, i, p;
int K;
int ldak, ldqp, ldqm;
int tempkm, tempkmin, temppn, tempnn, tempmm;
int ib;
int *tiles;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
return;
RUNTIME_options_init(&options, morse, sequence, request);
ib = MORSE_IB;
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zttmqr = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
/* Initialisation of tiles */
tiles = (int*)malloc((qrtree->mt)*sizeof(int));
memset( tiles, 0, (qrtree->mt)*sizeof(int) );
ws_worker *= sizeof(MORSE_Complex64_t);
ws_host *= sizeof(MORSE_Complex64_t);
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
{
/* necessary to avoid dependencies between tasks regarding the diag tile */
int nblk = ( A->nt + BS -1 ) / BS;
D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
/* Setting the order of the tiles*/
libhqr_treewalk(qrtree, k, tiles);
for (i = A->nt-2; i >= k; i--) {
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
ldqp = BLKLDD(Q, p);
/* TT or TS */
if(qrtree->gettype(qrtree, k, n) == 0){
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m);
MORSE_TASK_ztsmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TS->nb,
Q( m, p), ldqm,
Q( m, n), ldqm,
A( k, n), ldak,
TS(k, n), TS->mb);
}
}
else {
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
MORSE_TASK_zttmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TT->nb,
Q( m, p), ldqm,
Q( m, n), ldqm,
A( k, n), ldak,
TT(k, n), TT->mb);
}
}
}
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == A->mt-1 ? A->m-p*A->mb : A->mb;
tempkmin = chameleon_min(tempkm, temppn);
ldqp = BLKLDD(Q, p);
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmim, temppn, A->nb,
A(k, p), ldak,
D(k, p), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, temppn,
0., 1.,
D(k, p), ldak );
#endif
#endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
MORSE_TASK_zunmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, temppn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
Q( m, p), ldqm);
}
}
RUNTIME_iteration_pop(morse);
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(D);
free(D);
#endif
(void)D;
}
This diff is collapsed.
This diff is collapsed.
......@@ -401,24 +401,15 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
morse_pztile_zero(subB, sequence, request);
free(subB); */
if (morse->householder == MORSE_FLAT_HOUSEHOLDER) {
morse_pzgelqf(A, TS, sequence, request);
}
else {
morse_pzgelqfrh(A, TS, MORSE_RHBLK, sequence, request);
}
morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request);
subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
free(subA);
free(subB);
if (morse->householder == MORSE_FLAT_HOUSEHOLDER) {
morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request);
}
else {
morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, TS, MORSE_RHBLK, sequence, request);
}
morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
}
return MORSE_SUCCESS;
}
This diff is collapsed.
......@@ -283,12 +283,18 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
int MORSE_zgelqf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT);
int MORSE_zgelqf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT);
int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB);
int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B);
int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT);
int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT);
int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zgeqrs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB);
int MORSE_zgeqrs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B);
int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zunglq_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB);
int MORSE_zunglq_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B);
int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB);
int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B);
int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
......
......@@ -118,7 +118,7 @@ int testing_zgels_param(int argc, char **argv)
domino = -1; /* -1 */
llvl = -1; /* -1 */
hlvl = -1; /* -1 */
qr_a = TS->mt; /* -1 */
qr_a = ( M >= N ) ? -1 : TS->nt; /* -1 */
qr_p = 1; /* matrix.p */
tsrr = 0; /* 0 */
......@@ -126,7 +126,6 @@ int testing_zgels_param(int argc, char **argv)
( M >= N ) ? LIBHQR_QR : LIBHQR_LQ,
&matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr );
#if 1
/* Initialize A1 and A2 */
LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA );
......@@ -145,7 +144,7 @@ int testing_zgels_param(int argc, char **argv)
MORSE_zungqr_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
else
/* Building the economy-size Q */
MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
printf("\n");
printf("------ TESTS FOR CHAMELEON ZGELS_PARAM ROUTINE ------- \n");
......@@ -171,7 +170,6 @@ int testing_zgels_param(int argc, char **argv)
printf(" - TESTING ZGELS_PARAM ... FAILED !\n"); hres++;
printf("************************************************\n");
}
#endif
/*-------------------------------------------------------------
* TESTING ZGEQRF + ZGEQRS or ZGELQF + ZGELQS
......@@ -228,8 +226,8 @@ int testing_zgels_param(int argc, char **argv)
/* Morse routines */
MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT);
MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB);
MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB);
/* Check the orthogonality, factorization and the solution */
info_ortho = check_orthogonality(M, N, LDA, Q, eps);
......@@ -292,7 +290,7 @@ int testing_zgels_param(int argc, char **argv)
MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT);
MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB);
MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA);
MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA);
MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment