Mentions légales du service

Skip to content
Snippets Groups Projects
zgels_param.c 18.19 KiB
/**
 *
 * @file zgels_param.c
 *
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
 * @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 ***
 *
 * @brief Chameleon zgels_param wrappers
 *
 * @version 1.2.0
 * @author Raphael Boucherie
 * @author Mathieu Faverge
 * @author Alycia Lisito
 * @date 2022-02-22
 * @precisions normal z -> s d c
 *
 */
#include "control/common.h"
#include <stdlib.h>

/**
 *******************************************************************************
 *
 * @ingroup CHAMELEON_Complex64_t
 *
 *  CHAMELEON_zgels_param - solves overdetermined or underdetermined linear
 *  systems involving an M-by-N matrix A using the QR or the LQ factorization of
 *  A.  It is assumed that A has full rank.  The following options are provided:
 *
 *  # trans = ChamNoTrans and M >= N: find the least squares solution of an
 *    overdetermined system, i.e., solve the least squares problem: minimize
 *    || B - A*X ||.
 *
 *  # trans = ChamNoTrans and M < N: find the minimum norm solution of an
 *    underdetermined system A * X = B.
 *
 *  Several right hand side vectors B and solution vectors X can be handled in a
 *  single call; they are stored as the columns of the M-by-NRHS right hand side
 *  matrix B and the N-by-NRHS solution matrix X.
 *
 *******************************************************************************
 *
 * @param[in] qrtree
 *          The tree used for the factorization
 *
 * @param[in] trans
 *          Intended usage:
 *          = ChamNoTrans:   the linear system involves A;
 *          = ChamConjTrans: the linear system involves A^H.
 *
 * @param[in] M
 *          The number of rows of the matrix A. M >= 0.
 *
 * @param[in] N
 *          The number of columns of the matrix A. N >= 0.
 *
 * @param[in] NRHS
 *          The number of right hand sides, i.e., the number of columns of the
 *          matrices B and X.  NRHS >= 0.
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix A.
 *          On exit, if M >= N, A is overwritten by details of its QR
 *          factorization as returned by CHAMELEON_zgeqrf; if M < N, A is
 *          overwritten by details of its LQ factorization as returned by
 *          CHAMELEON_zgelqf.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,M).
 *
 * @param[out] descTS
 *          On exit, auxiliary factorization data.
 *
 * @param[out] descTT
 *          On exit, auxiliary factorization data.
 *
 * @param[in,out] B
 *          On entry, the M-by-NRHS matrix B of right hand side vectors, stored
 *          columnwise;
 *          On exit, if return value = 0, B is overwritten by the solution
 *          vectors, stored columnwise: if M >= N, rows 1 to N of B contain the
 *          least squares solution vectors; the residual sum of squares for the
 *          solution in each column is given by the sum of squares of the
 *          modulus of elements N+1 to M in that column; if M < N, rows 1 to N
 *          of B contain the minimum norm solution vectors;
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= MAX(1,M,N).
 *
 *******************************************************************************
 *
 * @retval CHAMELEON_SUCCESS successful exit
 * @retval <0 if -i, the i-th argument had an illegal value
 *
 *******************************************************************************
 *
 * @sa CHAMELEON_zgels_param_Tile
 * @sa CHAMELEON_zgels_param_Tile_Async
 * @sa CHAMELEON_cgels
 * @sa CHAMELEON_dgels
 * @sa CHAMELEON_sgels
 *
 */
int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int M, int N, int NRHS,
                           CHAMELEON_Complex64_t *A, int LDA,
                           CHAM_desc_t *descTS, CHAM_desc_t *descTT,
                           CHAMELEON_Complex64_t *B, int LDB )
{
    int i, j;
    int NB;
    int status;
    CHAM_context_t *chamctxt;
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
    CHAM_desc_t descAl, descAt;
    CHAM_desc_t descBl, descBt;

    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_param", "CHAMELEON not initialized");
        return CHAMELEON_ERR_NOT_INITIALIZED;
    }

    /* Check input arguments */
    if ( (trans != ChamNoTrans) && (trans != ChamConjTrans) ) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of trans");
        return CHAMELEON_ERR_NOT_SUPPORTED;
    }
    if (M < 0) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of M");
        return -2;
    }
    if (N < 0) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of N");
        return -3;
    }
    if (NRHS < 0) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of NRHS");
        return -4;
    }
    if (LDA < chameleon_max(1, M)) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of LDA");
        return -6;
    }
    if (LDB < chameleon_max(1, chameleon_max(M, N))) {
        chameleon_error("CHAMELEON_zgels_param", "illegal value of LDB");
        return -9;
    }
    /* Quick return */
    if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) {
        for (i = 0; i < chameleon_max(M, N); i++)
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
        return CHAMELEON_SUCCESS;
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
    status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, NRHS);
    if (status != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_param", "chameleon_tune() failed");
        return status;
    }

    /* Set NT */
    NB = CHAMELEON_NB;

    chameleon_sequence_create( chamctxt, &sequence );

    /* Submit the matrix conversion */
    int maxMN = chameleon_max( M, N );

    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
                         A, NB, NB, LDA, N,    M,     N,    sequence, &request );
    chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
                         B, NB, NB, LDB, NRHS, maxMN, NRHS, sequence, &request );

    /* Call the tile interface */
    CHAMELEON_zgels_param_Tile_Async( qrtree, trans, &descAt, descTS, descTT, &descBt, sequence, &request );

    /* Submit the matrix conversion back */
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
                         ChamDescInout, ChamUpperLower, sequence, &request );
    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
                         ChamDescInout, ChamUpperLower, sequence, &request );
    CHAMELEON_Desc_Flush( descTS, sequence );
    CHAMELEON_Desc_Flush( descTT, sequence );

    chameleon_sequence_wait( chamctxt, sequence );

    /* Cleanup the temporary data */
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );

    status = sequence->status;
    chameleon_sequence_destroy( chamctxt, sequence );
    return status;
}

/**
 ********************************************************************************
 *
 * @ingroup CHAMELEON_Complex64_t_Tile
 *
 *  CHAMELEON_zgels_param_Tile - solves overdetermined or underdetermined linear
 *  systems involving an M-by-N matrix A using the QR or the LQ factorization of
 *  A.
 *  Tile equivalent of CHAMELEON_zgels_param().
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Intended usage:
 *          = ChamNoTrans:   the linear system involves A;
 *          = ChamConjTrans: the linear system involves A^H.
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix A.
 *          On exit, if M >= N, A is overwritten by details of its QR
 *          factorization as returned by CHAMELEON_zgeqrf; if M < N, A is
 *          overwritten by details of its LQ factorization as returned by
 *          CHAMELEON_zgelqf.
 *
 * @param[out] TS
 *          On exit, auxiliary factorization data.
 *
 * @param[out] TT
 *          On exit, auxiliary factorization data.
 *
 * @param[in,out] B
 *          On entry, the M-by-NRHS matrix B of right hand side vectors, stored
 *          columnwise;
 *          On exit, if return value = 0, B is overwritten by the solution
 *          vectors, stored columnwise: if M >= N, rows 1 to N of B contain the
 *          least squares solution vectors; the residual sum of squares for the
 *          solution in each column is given by the sum of squares of the
 *          modulus of elements N+1 to M in that column; if M < N, rows 1 to N
 *          of B contain the minimum norm solution vectors;
 *
 *******************************************************************************
 *
 * @return CHAMELEON_SUCCESS successful exit
 *
 *******************************************************************************
 *
 * @sa CHAMELEON_zgels_param
 * @sa CHAMELEON_zgels_param_Tile_Async
 * @sa CHAMELEON_cgels_Tile
 * @sa CHAMELEON_dgels_Tile
 * @sa CHAMELEON_sgels_Tile
 *
 */
int CHAMELEON_zgels_param_Tile( const libhqr_tree_t *qrtree, cham_trans_t trans, CHAM_desc_t *A,
                                CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B )
{
    CHAM_context_t *chamctxt;
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
    int status;

    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_param_Tile", "CHAMELEON not initialized");
        return CHAMELEON_ERR_NOT_INITIALIZED;
    }
    chameleon_sequence_create( chamctxt, &sequence );

    CHAMELEON_zgels_param_Tile_Async( qrtree, trans, A, TS, TT, B, sequence, &request );

    CHAMELEON_Desc_Flush( A, sequence );
    CHAMELEON_Desc_Flush( TS, sequence );
    CHAMELEON_Desc_Flush( TT, sequence );
    CHAMELEON_Desc_Flush( B, sequence );
    chameleon_sequence_wait( chamctxt, sequence );
    status = sequence->status;
    chameleon_sequence_destroy( chamctxt, sequence );
    return status;
}

/**
 ********************************************************************************
 *
 * @ingroup CHAMELEON_Complex64_t_Tile_Async
 *
 *  CHAMELEON_zgels_param_Tile_Async - Solves overdetermined or underdetermined linear
 *  system of equations using the tile QR or the tile LQ factorization.
 *  Non-blocking equivalent of CHAMELEON_zgels_param_Tile().
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
 * @sa CHAMELEON_zgels_param
 * @sa CHAMELEON_zgels_param_Tile
 * @sa CHAMELEON_cgels_Tile_Async
 * @sa CHAMELEON_dgels_Tile_Async
 * @sa CHAMELEON_sgels_Tile_Async
 *
 */
int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t trans, CHAM_desc_t *A,
                                      CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *B,
                                      RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
    CHAM_desc_t *subA;
    CHAM_desc_t *subB;
    CHAM_context_t *chamctxt;
    CHAM_desc_t D, *Dptr = NULL;

    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_param_Tile", "CHAMELEON not initialized");
        return CHAMELEON_ERR_NOT_INITIALIZED;
    }
    if (sequence == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_param_Tile", "NULL sequence");
        return CHAMELEON_ERR_UNALLOCATED;
    }
    if (request == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_param_Tile", "NULL request");
        return CHAMELEON_ERR_UNALLOCATED;
    }
    /* Check sequence status */
    if (sequence->status == CHAMELEON_SUCCESS) {
        request->status = CHAMELEON_SUCCESS;
    }
    else {
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
    }

    /* Check descriptors for correctness */
    if ( (trans != ChamNoTrans) && (trans != ChamConjTrans) ) {
        chameleon_error("CHAMELEON_zgels_param_Tile_Async", "illegal value of trans");
        return CHAMELEON_ERR_NOT_SUPPORTED;
    }
    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_param_Tile", "invalid first descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    if (chameleon_desc_check(TS) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_param_Tile", "invalid second descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    if (chameleon_desc_check(TT) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_param_Tile", "invalid third descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    if (chameleon_desc_check(B) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_param_Tile", "invalid fourth descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }
    /* Check input arguments */
    if (A->nb != A->mb || B->nb != B->mb) {
        chameleon_error("CHAMELEON_zgels_param_Tile", "only square tiles supported");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
    }

    if (A->m >= A->n) {
#if defined(CHAMELEON_COPY_DIAG)
        {
            int n = chameleon_min( A->m, A->n );
            chameleon_zdesc_copy_and_restrict( A, &D, A->m, n );
            Dptr = &D;
        }
#endif
        /*
         * compute QR factorization of A
         */
        chameleon_pzgeqrf_param( 1, A->nt, qrtree, A,
                                 TS, TT, Dptr, sequence, request );

        /* Perform the solve part */
        if ( trans == ChamNoTrans ) {
            /*
             * Least-Squares Problem min || A * X - B ||
             *
             * B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS)
             */
            chameleon_pzunmqr_param( 0, qrtree, ChamLeft, ChamConjTrans, A, B, TS, TT, Dptr, sequence, request );

            /*
             * B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
             */
            subB = chameleon_desc_submatrix(B, 0, 0, A->n, B->n);
            subA = chameleon_desc_submatrix(A, 0, 0, A->n, A->n);
            chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );
        }
        else {
            /*
             * Underdetermined system of equations A**H * X = B
             *
             * B(1:N,1:NRHS) := inv(R**H) * B(1:N,1:NRHS)
             */
            subB = chameleon_desc_submatrix(B, 0, 0, A->n, B->n);
            subA = chameleon_desc_submatrix(A, 0, 0, A->n, A->n);
            chameleon_pztrsm( ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );

            /*
             * B(M+1:N,1:NRHS) = 0
             */
            /* TODO: enable subdescriptor with non aligned starting point in laset */
            /* free(subB); */
            /* subB = chameleon_desc_submatrix(B, A->n, 0, A->m-A->n, B->n); */
            /* chameleon_pzlaset( ChamUpperLower, 0., 0., subB, sequence, request ); */
            /*
             * B(1:N,1:NRHS) := Q(1:N,:)**H * B(1:M,1:NRHS)
             */
            chameleon_pzunmqr_param( 0, qrtree, ChamLeft, ChamNoTrans, A, B, TS, TT, Dptr, sequence, request );
        }
    }
    else {
#if defined(CHAMELEON_COPY_DIAG)
        {
            int m = chameleon_min( A->m, A->n );
            chameleon_zdesc_copy_and_restrict( A, &D, m, A->n );
            Dptr = &D;
        }
#endif
        /*
         * Compute LQ factorization of A
         */
        chameleon_pzgelqf_param( 1, A->mt, qrtree, A, TS, TT, Dptr, sequence, request );

        /* Perform the solve part */
        if ( trans == ChamNoTrans ) {
            /*
             * Underdetermined system of equations A * X = B
             *
             * B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
             */
            subB = chameleon_desc_submatrix(B, 0, 0, A->m, B->n);
            subA = chameleon_desc_submatrix(A, 0, 0, A->m, A->m);
            chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );

            /*
             * B(M+1:N,1:NRHS) = 0
             */
            /* TODO: enable subdescriptor with non aligned starting point in laset */
            /* free(subB); */
            /* subB = chameleon_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); */
            /* chameleon_pzlaset( ChamUpperLower, 0., 0., subB, sequence, request ); */

            /*
             * B(1:N,1:NRHS) := Q(1:N,:)**H * B(1:M,1:NRHS)
             */
            chameleon_pzunmlq_param( 0, qrtree, ChamLeft, ChamConjTrans, A, B, TS, TT, Dptr, sequence, request );
        }
        else {
            /*
             * Overdetermined system min || A**H * X - B ||
             *
             * B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
             */
            chameleon_pzunmlq_param( 0, qrtree, ChamLeft, ChamNoTrans, A, B, TS, TT, Dptr, sequence, request );

            /*
             * B(1:M,1:NRHS) := inv(L**H) * B(1:M,1:NRHS)
             */
            subB = chameleon_desc_submatrix(B, 0, 0, A->m, B->n);
            subA = chameleon_desc_submatrix(A, 0, 0, A->m, A->m);
            chameleon_pztrsm( ChamLeft, ChamLower, ChamConjTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );
        }
    }

    free(subA);
    free(subB);

    if (Dptr != NULL) {
        CHAMELEON_Desc_Flush( A, sequence );
        CHAMELEON_Desc_Flush( B, sequence );
        CHAMELEON_Desc_Flush( TS, sequence );
        CHAMELEON_Desc_Flush( TT, sequence );
        CHAMELEON_Desc_Flush( Dptr, sequence );
        chameleon_sequence_wait( chamctxt, sequence );
        chameleon_desc_destroy( Dptr );
    }
    (void)D;
    return CHAMELEON_SUCCESS;
}