Commit 434f428d authored by BOUCHERIE Raphael's avatar BOUCHERIE Raphael

testing works for all, need to test more

parent abf1d506
......@@ -125,6 +125,7 @@ set(ZSRC
pztrtri.c
pzpotrimm.c
pzunglq.c
pzunglq_param.c
pzunglqrh.c
pzungqr.c
pzungqr_param.c
......@@ -143,6 +144,7 @@ set(ZSRC
zgelqf.c
zgelqf_param.c
zgelqs.c
zgelqs_param.c
zgeqrf.c
zgeqrf_param.c
zgeqrs.c
......@@ -175,6 +177,7 @@ set(ZSRC
zsytrs.c
ztrtri.c
zunglq.c
zunglq_param.c
zungqr.c
zungqr_param.c
zunmlq.c
......
......@@ -66,12 +66,12 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
ib = MORSE_IB;
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1)
* zttqrt = A->nb * (ib+1)
* ztsmqr = A->nb * ib
* zttmqr = A->nb * ib
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* zttlqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -79,8 +79,8 @@ void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2016 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file pzunglq_pram.c
*
* MORSE auxiliary routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2011-05-24
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
#define A(m,n) A, (m), (n)
#define Q(m,n) Q, (m), (n)
#define TS(m,n) TS, (m), (n)
#define TT(m,n) TT, (m), (n)
#if defined(CHAMELEON_COPY_DIAG)
#define D(m,n) D, ((n)/BS), 0
#else
#define D(m,n) A, (m), (n)
#endif
/**
* Parallel construction of Q using tile V - dynamic scheduling
*/
void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
MORSE_desc_t *TS, MORSE_desc_t *TT,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_context_t *morse;
MORSE_option_t options;
size_t ws_worker = 0;
size_t ws_host = 0;
MORSE_desc_t *D = NULL;
int k, m, n, i, p;
int K;
int ldak, ldqp, ldqm;
int tempkm, tempkmin, temppn, tempnn, tempmm;
int ib;
int *tiles;
morse = morse_context_self();
if (sequence->status != MORSE_SUCCESS)
return;
RUNTIME_options_init(&options, morse, sequence, request);
ib = MORSE_IB;
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zttmqr = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
/* Initialisation of tiles */
tiles = (int*)malloc((qrtree->mt)*sizeof(int));
memset( tiles, 0, (qrtree->mt)*sizeof(int) );
ws_worker *= sizeof(MORSE_Complex64_t);
ws_host *= sizeof(MORSE_Complex64_t);
RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
#if defined(CHAMELEON_COPY_DIAG)
{
/* necessary to avoid dependencies between tasks regarding the diag tile */
int nblk = ( A->nt + BS -1 ) / BS;
D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
}
#endif
K = chameleon_min(A->mt, A->nt);
for (k = K-1; k >= 0; k--) {
RUNTIME_iteration_push(morse, k);
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
ldak = BLKLDD(A, k);
/* Setting the order of the tiles*/
libhqr_treewalk(qrtree, k, tiles);
for (i = A->nt-2; i >= k; i--) {
n = tiles[i];
p = qrtree->currpiv(qrtree, k, n);
tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
ldqp = BLKLDD(Q, p);
/* TT or TS */
if(qrtree->gettype(qrtree, k, n) == 0){
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
ldqm = BLKLDD(Q, m);
MORSE_TASK_ztsmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TS->nb,
Q( m, p), ldqm,
Q( m, n), ldqm,
A( k, n), ldak,
TS(k, n), TS->mb);
}
}
else {
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
MORSE_TASK_zttmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TT->nb,
Q( m, p), ldqm,
Q( m, n), ldqm,
A( k, n), ldak,
TT(k, n), TT->mb);
}
}
}
for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
p = qrtree->getm(qrtree, k, i);
temppn = p == A->mt-1 ? A->m-p*A->mb : A->mb;
tempkmin = chameleon_min(tempkm, temppn);
ldqp = BLKLDD(Q, p);
#if defined(CHAMELEON_COPY_DIAG)
MORSE_TASK_zlacpy(
&options,
MorseUpper, tempkmim, temppn, A->nb,
A(k, p), ldak,
D(k, p), ldak );
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, temppn,
0., 1.,
D(k, p), ldak );
#endif
#endif
for (m = k; m < Q->mt; m++) {
tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
MORSE_TASK_zunmlq(
&options,
MorseRight, MorseNoTrans,
tempmm, temppn, tempkmin, ib, TS->nb,
D( k, p), ldak,
TS(k, p), TS->mb,
Q( m, p), ldqm);
}
}
RUNTIME_iteration_pop(morse);
}
RUNTIME_options_ws_free(&options);
RUNTIME_options_finalize(&options, morse);
MORSE_TASK_dataflush_all();
#if defined(CHAMELEON_COPY_DIAG)
MORSE_Sequence_Wait(sequence);
morse_desc_mat_free(D);
free(D);
#endif
(void)D;
}
This diff is collapsed.
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file zgelqs_param.c
*
* MORSE computational routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
/***************************************************************************//**
*
* @ingroup MORSE_Complex64_t
*
* MORSE_zgelqs_param - Compute a minimum-norm solution min || A*X - B || using the LQ factorization
* A = L*Q computed by MORSE_zgelqf.
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrix A. M >= 0.
*
* @param[in] N
* The number of columns of the matrix A. N >= M >= 0.
*
* @param[in] NRHS
* The number of columns of B. NRHS >= 0.
*
* @param[in] A
* Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= M.
*
* @param[in] descT
* Auxiliary factorization data, computed by MORSE_zgelqf.
*
* @param[in,out] B
* On entry, the M-by-NRHS right hand side matrix B.
* On exit, the N-by-NRHS solution matrix X.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= N.
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*******************************************************************************
*
* @sa MORSE_zgelqs_param_Tile
* @sa MORSE_zgelqs_param_Tile_Async
* @sa MORSE_cgelqs
* @sa MORSE_dgelqs
* @sa MORSE_sgelqs
* @sa MORSE_zgelqf
*
******************************************************************************/
int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS,
MORSE_Complex64_t *A, int LDA,
MORSE_desc_t *descTS, MORSE_desc_t *descTT,
MORSE_Complex64_t *B, int LDB)
{
int NB;
int status;
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
MORSE_desc_t descA, descB;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zgelqs_param", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (M < 0) {
morse_error("MORSE_zgelqs_param", "illegal value of M");
return -1;
}
if (N < 0 || M > N) {
morse_error("MORSE_zgelqs_param", "illegal value of N");
return -2;
}
if (NRHS < 0) {
morse_error("MORSE_zgelqs_param", "illegal value of N");
return -3;
}
if (LDA < chameleon_max(1, M)) {
morse_error("MORSE_zgelqs_param", "illegal value of LDA");
return -5;
}
if (LDB < chameleon_max(1, chameleon_max(1, N))) {
morse_error("MORSE_zgelqs_param", "illegal value of LDB");
return -8;
}
/* Quick return */
if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) {
return MORSE_SUCCESS;
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS);
if (status != MORSE_SUCCESS) {
morse_error("MORSE_zgelqs_param", "morse_tune() failed");
return status;
}
/* Set NT */
NB = MORSE_NB;
morse_sequence_create(morse, &sequence);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request,
morse_desc_mat_free(&(descA)) );
morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, sequence, &request,
morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB)));
/* } else {*/
/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, */
/* sequence, &request);*/
/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS,*/
/* sequence, &request);*/
/* }*/
/* Call the tile interface */
MORSE_zgelqs_param_Tile_Async(qrtree, &descA, descTS, descTT, &descB, sequence, &request);
/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/
morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request);
morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request);
morse_sequence_wait(morse, sequence);
morse_desc_mat_free(&descA);
morse_desc_mat_free(&descB);
/* } else {*/
/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/
/* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/
/* morse_sequence_wait(morse, sequence);*/
/* }*/
status = sequence->status;
morse_sequence_destroy(morse, sequence);
return status;
}
/**
*******************************************************************************
*
* @ingroup MORSE_Complex64_t_Tile
*
* MORSE_zgelqs_param_Tile - Computes a minimum-norm solution using previously computed
* LQ factorization.
* Tile equivalent of MORSE_zgelqs_param().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] A
* Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf.
*
* @param[in] T
* Auxiliary factorization data, computed by MORSE_zgelqf.
*
* @param[in,out] B
* On entry, the M-by-NRHS right hand side matrix B.
* On exit, the N-by-NRHS solution matrix X.
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
*
*******************************************************************************
*
* @sa MORSE_zgelqs_param
* @sa MORSE_zgelqs_param_Tile_Async
* @sa MORSE_cgelqs_Tile
* @sa MORSE_dgelqs_Tile
* @sa MORSE_sgelqs_Tile
* @sa MORSE_zgelqf_Tile
*
******************************************************************************/
int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B)
{
MORSE_context_t *morse;
MORSE_sequence_t *sequence = NULL;
MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
int status;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
morse_sequence_create(morse, &sequence);
MORSE_zgelqs_param_Tile_Async(qrtree, A, TS, TT, B, sequence, &request);
morse_sequence_wait(morse, sequence);
RUNTIME_desc_getoncpu(A);
RUNTIME_desc_getoncpu(B);
status = sequence->status;
morse_sequence_destroy(morse, sequence);
return status;
}
/**
*******************************************************************************
*
* @ingroup MORSE_Complex64_t_Tile_Async
*
* MORSE_zgelqs_param_Tile_Async - Computes a minimum-norm solution using previously
* computed LQ factorization.
* Non-blocking equivalent of MORSE_zgelqs_param_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa MORSE_zgelqs_param
* @sa MORSE_zgelqs_param_Tile
* @sa MORSE_cgelqs_Tile_Async
* @sa MORSE_dgelqs_Tile_Async
* @sa MORSE_sgelqs_Tile_Async
* @sa MORSE_zgelqf_Tile_Async
*
******************************************************************************/
int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B,
MORSE_sequence_t *sequence, MORSE_request_t *request)
{
MORSE_desc_t *subB;
MORSE_desc_t *subA;
MORSE_context_t *morse;
morse = morse_context_self();
if (morse == NULL) {
morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL sequence");
return MORSE_ERR_UNALLOCATED;
}
if (request == NULL) {
morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL request");
return MORSE_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == MORSE_SUCCESS)
request->status = MORSE_SUCCESS;
else
return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (morse_desc_check(A) != MORSE_SUCCESS) {
morse_error("MORSE_zgelqs_param_Tile", "invalid first descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(TS) != MORSE_SUCCESS) {
morse_error("MORSE_zgelqs_param_Tile", "invalid second descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(TT) != MORSE_SUCCESS) {
morse_error("MORSE_zgelqs_param_Tile", "invalid third descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
if (morse_desc_check(B) != MORSE_SUCCESS) {
morse_error("MORSE_zgelqs_param_Tile", "invalid fourth descriptor");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb || B->nb != B->mb) {
morse_error("MORSE_zgelqs_param_Tile", "only square tiles supported");
return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
}
/* Quick return */
/*
if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) {
return MORSE_SUCCESS;
}
*/
/* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n);
morse_pztile_zero(subB, sequence, request);
free(subB); */
subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
free(subA);
free(subB);
morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
return MORSE_SUCCESS;
}
......@@ -401,24 +401,15 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
morse_pztile_zero(subB, sequence, request);
free(subB); */
if (morse->householder == MORSE_FLAT_HOUSEHOLDER) {
morse_pzgelqf(A, TS, sequence, request);
}
else {
morse_pzgelqfrh(A, TS, MORSE_RHBLK, sequence, request);
}
morse_pzgelqf_param(qrtree, A, TS, TT, sequence, request);
subB = morse_desc_submatrix(B, 0, 0, A->m, B->n);
subA = morse_desc_submatrix(A, 0, 0, A->m, A->m);
morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
free(subA);
free(subB);
if (morse->householder == MORSE_FLAT_HOUSEHOLDER) {
morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request);
}
else {
morse_pzunmlqrh(MorseLeft, MorseConjTrans, A, B, TS, MORSE_RHBLK, sequence, request);
}
morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
}
return MORSE_SUCCESS;
}
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file zunglq_param.c
*
* MORSE computational routines
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> s d c
*
**/
#include "control/common.h"
/**
*******************************************************************************
*
* @ingroup MORSE_Complex64_t