diff --git a/compute/zunglq.c b/compute/zunglq.c index 6222017546d0b0731d896bf85599cde34f842b6d..0e7591030eb42538f7a4ad3dc1bf5eb271646652 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -12,10 +12,6 @@ * @brief Chameleon zunglq wrappers * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 - * @author Hatem Ltaief - * @author Jakub Kurzak * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede @@ -30,8 +26,9 @@ * * @ingroup CHAMELEON_Complex64_t * - * CHAMELEON_zunglq - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the - * first M rows of a product of the elementary reflectors returned by CHAMELEON_zgelqf. + * @brief Generates an M-by-N matrix Q with orthonormal rows, which is defined + * as the first M rows of a product of the elementary reflectors returned by + * CHAMELEON_zgelqf(). * ******************************************************************************* * @@ -46,13 +43,14 @@ * M >= K >= 0. * * @param[in] A - * Details of the LQ factorization of the original matrix A as returned by CHAMELEON_zgelqf. + * Details of the LQ factorization of the original matrix A as returned by + * CHAMELEON_zgelqf(). * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] descT - * Auxiliary factorization data, computed by CHAMELEON_zgelqf. + * Auxiliary factorization data, computed by CHAMELEON_zgelqf(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -63,7 +61,7 @@ ******************************************************************************* * * @retval CHAMELEON_SUCCESS successful exit - * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -75,54 +73,56 @@ * @sa CHAMELEON_zgelqf * */ -int CHAMELEON_zunglq( int M, int N, int K, - CHAMELEON_Complex64_t *A, int LDA, - CHAM_desc_t *descT, +int +CHAMELEON_zunglq( int M, int N, int K, + CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *Q, int LDQ ) { - int NB; - int status; - CHAM_context_t *chamctxt; + int NB; + int status; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - CHAM_desc_t descAl, descAt; - CHAM_desc_t descQl, descQt; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descQl, descQt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } + /* Check input arguments */ - if (M < 0) { - chameleon_error("CHAMELEON_zunglq", "illegal value of M"); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zunglq", "illegal value of M" ); return -1; } - if (N < M) { - chameleon_error("CHAMELEON_zunglq", "illegal value of N"); + if ( N < M ) { + chameleon_error( "CHAMELEON_zunglq", "illegal value of N" ); return -2; } - if (K < 0 || K > M) { - chameleon_error("CHAMELEON_zunglq", "illegal value of K"); + if ( ( K < 0 ) || ( K > M ) ) { + chameleon_error( "CHAMELEON_zunglq", "illegal value of K" ); return -3; } - if (LDA < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zunglq", "illegal value of LDA"); + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zunglq", "illegal value of LDA" ); return -5; } - if (LDQ < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zunglq", "illegal value of LDQ"); + if ( LDQ < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zunglq", "illegal value of LDQ" ); return -8; } - /* Quick return - currently NOT equivalent to LAPACK's: - * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ - if (chameleon_min(M, chameleon_min(N, K)) == 0) + + /* Quick return */ + if ( M <= 0 ) { return CHAMELEON_SUCCESS; + } - /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ - status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, 0); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq", "chameleon_tune() failed"); + /* Tune NB & IB depending on M & N; Set NBNB */ + status = chameleon_tune( CHAMELEON_FUNC_ZGELS, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq", "chameleon_tune() failed" ); return status; } @@ -133,18 +133,18 @@ int CHAMELEON_zunglq( int M, int N, int K, /* Submit the matrix conversion */ chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpper, - A, NB, NB, LDA, N, K, N, sequence, &request ); + A, NB, NB, LDA, N, K, N, sequence, &request ); chameleon_zlap2tile( chamctxt, &descQl, &descQt, ChamDescInout, ChamUpperLower, - Q, NB, NB, LDQ, N, M, N, sequence, &request ); + Q, NB, NB, LDQ, N, M, N, sequence, &request ); /* Call the tile interface */ CHAMELEON_zunglq_Tile_Async( &descAt, descT, &descQt, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, - ChamDescInput, ChamUpper, sequence, &request ); + ChamDescInput, ChamUpper, sequence, &request ); chameleon_ztile2lap( chamctxt, &descQl, &descQt, - ChamDescInout, ChamUpperLower, sequence, &request ); + ChamDescInout, ChamUpperLower, sequence, &request ); CHAMELEON_Desc_Flush( descT, sequence ); chameleon_sequence_wait( chamctxt, sequence ); @@ -163,17 +163,19 @@ int CHAMELEON_zunglq( int M, int N, int K, * * @ingroup CHAMELEON_Complex64_t_Tile * - * CHAMELEON_zunglq_Tile - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the - * first M rows of a product of the elementary reflectors returned by CHAMELEON_zgelqf. - * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * @brief Generates an M-by-N matrix Q with orthonormal rows, which is defined + * as the first M rows of a product of the elementary reflectors returned by + * CHAMELEON_zgelqf(). All matrices are passed through descriptors. All + * dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A - * Details of the LQ factorization of the original matrix A as returned by CHAMELEON_zgelqf. + * Details of the LQ factorization of the original matrix A as returned by + * CHAMELEON_zgelqf(). * * @param[in] T - * Auxiliary factorization data, computed by CHAMELEON_zgelqf. + * Auxiliary factorization data, computed by CHAMELEON_zgelqf(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -192,16 +194,17 @@ int CHAMELEON_zunglq( int M, int N, int K, * @sa CHAMELEON_zgelqf_Tile * */ -int CHAMELEON_zunglq_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) +int +CHAMELEON_zunglq_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) { - CHAM_context_t *chamctxt; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_Tile", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } chameleon_sequence_create( chamctxt, &sequence ); @@ -223,9 +226,10 @@ int CHAMELEON_zunglq_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) * * @ingroup CHAMELEON_Complex64_t_Tile_Async * - * Non-blocking equivalent of CHAMELEON_zunglq_Tile(). - * May return before the computation is finished. - * Allows for pipelining of operations at runtime. + * @brief Non-blocking equivalent of CHAMELEON_zunglq_Tile(). + * + * This function may return before the computation is finished. + * Allows for pipelining of operations at runtime. * ******************************************************************************* * @@ -246,75 +250,88 @@ int CHAMELEON_zunglq_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) * @sa CHAMELEON_zgelqf_Tile_Async * */ -int CHAMELEON_zunglq_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, +int +CHAMELEON_zunglq_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; - CHAM_desc_t D, *Dptr = NULL; + CHAM_desc_t D, *Dptr = NULL; + int M, N, K; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_Tile_Async", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } - if (sequence == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_Tile", "NULL sequence"); + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_Tile_Async", "NULL sequence" ); return CHAMELEON_ERR_UNALLOCATED; } - if (request == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_Tile", "NULL request"); + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_Tile_Async", "NULL request" ); return CHAMELEON_ERR_UNALLOCATED; } /* Check sequence status */ - if (sequence->status == CHAMELEON_SUCCESS) { + if ( sequence->status == CHAMELEON_SUCCESS ) { request->status = CHAMELEON_SUCCESS; } else { - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); } /* Check descriptors for correctness */ - if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_Tile", "invalid first descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_Tile_Async", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( T ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_Tile_Async", "invalid second descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_Tile", "invalid second descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( Q ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_Tile_Async", "invalid third descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(Q) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_Tile", "invalid third descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( ( A->nb != A->mb ) || ( Q->nb != Q->mb ) ) { + chameleon_error( "CHAMELEON_zunglq_Tile_Async", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + + M = Q->m; + N = Q->n; + K = chameleon_min( A->m, A->n ); + /* Check input arguments */ - if (A->nb != A->mb || Q->nb != Q->mb) { - chameleon_error("CHAMELEON_zunglq_Tile", "only square tiles supported"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( N < M ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of N" ); + return chameleon_request_fail( sequence, request, -2 ); + } + if ( K > M ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of K" ); + return chameleon_request_fail( sequence, request, -3 ); + } + + /* Quick return */ + if ( M == 0 ) { + return CHAMELEON_SUCCESS; } - /* Quick return - currently NOT equivalent to LAPACK's: - * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */ - /* - if (chameleon_min(M, N) == 0) - return CHAMELEON_SUCCESS; - */ -#if defined(CHAMELEON_COPY_DIAG) +#if defined( CHAMELEON_COPY_DIAG ) { - int m = chameleon_min( A->m, A->n ); - chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, K, A->n ); Dptr = &D; } #endif chameleon_pzlaset( ChamUpperLower, 0., 1., Q, sequence, request ); - if (chamctxt->householder == ChamFlatHouseholder) { + if ( chamctxt->householder == ChamFlatHouseholder ) { chameleon_pzunglq( 1, A, Q, T, Dptr, sequence, request ); } else { chameleon_pzunglqrh( 1, CHAMELEON_RHBLK, A, Q, T, Dptr, sequence, request ); } - if (Dptr != NULL) { + if ( Dptr != NULL ) { CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( Q, sequence ); CHAMELEON_Desc_Flush( T, sequence ); diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index d381ee71678dc1e8ee1bbeeb23e3cd94f7a2cbc5..6485e0e686735100840111783fb15e4e984a9c17 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -21,12 +21,13 @@ #include "control/common.h" /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t * - * CHAMELEON_zunglq_param - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the - * first M rows of a product of the elementary reflectors returned by CHAMELEON_zgelqf. + * @brief Generates an M-by-N matrix Q with orthonormal rows, which is defined + * as the first M rows of a product of the elementary reflectors returned by + * CHAMELEON_zgelqf_param(). * ******************************************************************************* * @@ -44,13 +45,17 @@ * M >= K >= 0. * * @param[in] A - * Details of the LQ factorization of the original matrix A as returned by CHAMELEON_zgelqf. + * Details of the LQ factorization of the original matrix A as returned by + * CHAMELEON_zgelqf_param(). * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * - * @param[in] descT - * Auxiliary factorization data, computed by CHAMELEON_zgelqf. + * @param[in] descTS + * Auxiliary factorization data, computed by CHAMELEON_zgelqf_param(). + * + * @param[in] descTT + * Auxiliary factorization data, computed by CHAMELEON_zgelqf_param(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -61,7 +66,7 @@ ******************************************************************************* * * @retval CHAMELEON_SUCCESS successful exit - * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value + * @retval <0 if -i, the i-th argument had an illegal value * ******************************************************************************* * @@ -73,54 +78,57 @@ * @sa CHAMELEON_zgelqf * */ -int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, +int +CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descTS, CHAM_desc_t *descTT, CHAMELEON_Complex64_t *Q, int LDQ ) { - int NB; - int status; - CHAM_context_t *chamctxt; + int NB; + int status; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - CHAM_desc_t descAl, descAt; - CHAM_desc_t descQl, descQt; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descQl, descQt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_param", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_param", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } + /* Check input arguments */ - if (M < 0) { - chameleon_error("CHAMELEON_zunglq_param", "illegal value of M"); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zunglq_param", "illegal value of M" ); return -1; } - if (N < M) { - chameleon_error("CHAMELEON_zunglq_param", "illegal value of N"); + if ( N < M ) { + chameleon_error( "CHAMELEON_zunglq_param", "illegal value of N" ); return -2; } - if (K < 0 || K > M) { - chameleon_error("CHAMELEON_zunglq_param", "illegal value of K"); + if ( ( K < 0 ) || ( K > M ) ) { + chameleon_error( "CHAMELEON_zunglq_param", "illegal value of K" ); return -3; } - if (LDA < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zunglq_param", "illegal value of LDA"); + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zunglq_param", "illegal value of LDA" ); return -5; } - if (LDQ < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zunglq_param", "illegal value of LDQ"); + if ( LDQ < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zunglq_param", "illegal value of LDQ" ); return -8; } - /* Quick return - currently NOT equivalent to LAPACK's: - * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ - if (chameleon_min(M, chameleon_min(N, K)) == 0) + + /* Quick return */ + if ( M <= 0 ) { return CHAMELEON_SUCCESS; + } - /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ - status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, 0); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_param", "chameleon_tune() failed"); + /* Tune NB & IB depending on M & N; Set NBNB */ + status = chameleon_tune( CHAMELEON_FUNC_ZGELS, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_param", "chameleon_tune() failed" ); return status; } @@ -131,18 +139,20 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, /* Submit the matrix conversion */ chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpper, - A, NB, NB, LDA, N, K, N, sequence, &request ); + A, NB, NB, LDA, N, K, N, sequence, &request ); chameleon_zlap2tile( chamctxt, &descQl, &descQt, ChamDescInout, ChamUpperLower, - Q, NB, NB, LDQ, N, M, N, sequence, &request ); + Q, NB, NB, LDQ, N, M, N, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zunglq_param_Tile_Async( qrtree, &descAt, descTS, descTT, &descQt, sequence, &request ); + CHAMELEON_zunglq_param_Tile_Async( qrtree, &descAt, descTS, descTT, &descQt, + sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, - ChamDescInput, ChamUpper, sequence, &request ); + ChamDescInput, ChamUpper, sequence, &request ); chameleon_ztile2lap( chamctxt, &descQl, &descQt, - ChamDescInout, ChamUpperLower, sequence, &request ); + ChamDescInout, ChamUpperLower, sequence, &request ); + CHAMELEON_Desc_Flush( descTS, sequence ); CHAMELEON_Desc_Flush( descTT, sequence ); @@ -158,21 +168,26 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, } /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t_Tile * - * CHAMELEON_zunglq_param_Tile - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the - * first M rows of a product of the elementary reflectors returned by CHAMELEON_zgelqf. - * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * @brief Generates an M-by-N matrix Q with orthonormal rows, which is defined + * as the first M rows of a product of the elementary reflectors returned by + * CHAMELEON_zgelqf_param(). All matrices are passed through descriptors. All + * dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A - * Details of the LQ factorization of the original matrix A as returned by CHAMELEON_zgelqf. + * Details of the LQ factorization of the original matrix A as returned by + * CHAMELEON_zgelqf_param(). * - * @param[in] T - * Auxiliary factorization data, computed by CHAMELEON_zgelqf. + * @param[in] TS + * Auxiliary factorization data, computed by CHAMELEON_zgelqf_param(). + * + * @param[in] TT + * Auxiliary factorization data, computed by CHAMELEON_zgelqf_param(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -191,16 +206,18 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K, * @sa CHAMELEON_zgelqf_Tile * */ -int CHAMELEON_zunglq_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q ) +int +CHAMELEON_zunglq_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, + CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q ) { - CHAM_context_t *chamctxt; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_param_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_param_Tile", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } chameleon_sequence_create( chamctxt, &sequence ); @@ -219,13 +236,14 @@ int CHAMELEON_zunglq_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CH } /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t_Tile_Async * - * Non-blocking equivalent of CHAMELEON_zunglq_param_Tile(). - * May return before the computation is finished. - * Allows for pipelining of operations at runtime. + * @brief Non-blocking equivalent of CHAMELEON_zunglq_param_Tile(). + * + * This function may return before the computation is finished. + * Allows for pipelining of operations at runtime. * ******************************************************************************* * @@ -246,65 +264,80 @@ int CHAMELEON_zunglq_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CH * @sa CHAMELEON_zgelqf_Tile_Async * */ -int CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q, +int +CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t *A, + CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; - CHAM_desc_t D, *Dptr = NULL; + CHAM_desc_t D, *Dptr = NULL; + int M, N, K; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_param_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_param_Tile_Async", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } - if (sequence == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_param_Tile", "NULL sequence"); + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_param_Tile_Async", "NULL sequence" ); return CHAMELEON_ERR_UNALLOCATED; } - if (request == NULL) { - chameleon_fatal_error("CHAMELEON_zunglq_param_Tile", "NULL request"); + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zunglq_param_Tile_Async", "NULL request" ); return CHAMELEON_ERR_UNALLOCATED; } /* Check sequence status */ - if (sequence->status == CHAMELEON_SUCCESS) { + if ( sequence->status == CHAMELEON_SUCCESS ) { request->status = CHAMELEON_SUCCESS; } else { - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); } /* Check descriptors for correctness */ - if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_param_Tile", "invalid first descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( TS ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "invalid second descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(TS) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_param_Tile", "invalid second descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( TT ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "invalid third descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(TT) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_param_Tile", "invalid third descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( Q ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "invalid fourth descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(Q) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zunglq_param_Tile", "invalid fourth descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( ( A->nb != A->mb ) || ( Q->nb != Q->mb ) ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + + M = Q->m; + N = Q->n; + K = chameleon_min( A->m, A->n ); + /* Check input arguments */ - if (A->nb != A->mb || Q->nb != Q->mb) { - chameleon_error("CHAMELEON_zunglq_param_Tile", "only square tiles supported"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( N < M ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of N" ); + return chameleon_request_fail( sequence, request, -2 ); } - /* Quick return - currently NOT equivalent to LAPACK's: - * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */ - /* - if (chameleon_min(M, N) == 0) - return CHAMELEON_SUCCESS; - */ -#if defined(CHAMELEON_COPY_DIAG) + if ( K > M ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of K" ); + return chameleon_request_fail( sequence, request, -3 ); + } + + /* Quick return */ + if ( M == 0 ) { + return CHAMELEON_SUCCESS; + } + +#if defined( CHAMELEON_COPY_DIAG ) { - int m = chameleon_min( A->m, A->n ); - chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, K, A->n ); Dptr = &D; } #endif @@ -312,7 +345,7 @@ int CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t chameleon_pzlaset( ChamUpperLower, 0., 1., Q, sequence, request ); chameleon_pzunglq_param( 1, qrtree, A, Q, TS, TT, Dptr, sequence, request ); - if (Dptr != NULL) { + if ( Dptr != NULL ) { CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( Q, sequence ); CHAMELEON_Desc_Flush( TS, sequence ); diff --git a/compute/zungqr.c b/compute/zungqr.c index 213afe017cf2d9b81a61cb1b5f0b24211c2b4356..924617eadf21b77e402ae6aba63927306d161946 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -12,10 +12,6 @@ * @brief Chameleon zungqr wrappers * * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.5.0 for CHAMELEON 0.9.2 - * @author Hatem Ltaief - * @author Jakub Kurzak * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede @@ -30,8 +26,9 @@ * * @ingroup CHAMELEON_Complex64_t * - * CHAMELEON_zungqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the - * first N columns of a product of the elementary reflectors returned by CHAMELEON_zgeqrf. + * @brief Generates an M-by-N matrix Q with orthonormal columns, which is defined + * as the first N columns of a product of the elementary reflectors returned by + * CHAMELEON_zgeqrf(). * ******************************************************************************* * @@ -46,13 +43,14 @@ * M >= K >= 0. * * @param[in] A - * Details of the QR factorization of the original matrix A as returned by CHAMELEON_zgeqrf. + * Details of the QR factorization of the original matrix A as returned by + * CHAMELEON_zgeqrf(). * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * * @param[in] descT - * Auxiliary factorization data, computed by CHAMELEON_zgeqrf. + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -75,53 +73,56 @@ * @sa CHAMELEON_zgeqrf * */ -int CHAMELEON_zungqr( int M, int N, int K, - CHAMELEON_Complex64_t *A, int LDA, - CHAM_desc_t *descT, +int +CHAMELEON_zungqr( int M, int N, int K, + CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *Q, int LDQ ) { - int NB; - int status; - CHAM_context_t *chamctxt; + int NB; + int status; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - CHAM_desc_t descAl, descAt; - CHAM_desc_t descQl, descQt; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descQl, descQt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } /* Check input arguments */ - if (M < 0) { - chameleon_error("CHAMELEON_zungqr", "illegal value of M"); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zungqr", "illegal value of M" ); return -1; } - if (N < 0 || N > M) { - chameleon_error("CHAMELEON_zungqr", "illegal value of N"); + if ( ( N < 0 ) || ( N > M ) ) { + chameleon_error( "CHAMELEON_zungqr", "illegal value of N" ); return -2; } - if (K < 0 || K > N) { - chameleon_error("CHAMELEON_zungqr", "illegal value of K"); + if ( ( K < 0 ) || ( K > N ) ) { + chameleon_error( "CHAMELEON_zungqr", "illegal value of K" ); return -3; } - if (LDA < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zungqr", "illegal value of LDA"); + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zungqr", "illegal value of LDA" ); return -5; } - if (LDQ < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zungqr", "illegal value of LDQ"); + if ( LDQ < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zungqr", "illegal value of LDQ" ); return -8; } - if (chameleon_min(M, chameleon_min(N, K)) == 0) + + /* Quick return */ + if ( N <= 0 ) { return CHAMELEON_SUCCESS; + } /* Tune NB & IB depending on M & N; Set NBNB */ - status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, 0); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr", "chameleon_tune() failed"); + status = chameleon_tune( CHAMELEON_FUNC_ZGELS, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr", "chameleon_tune() failed" ); return status; } @@ -132,18 +133,18 @@ int CHAMELEON_zungqr( int M, int N, int K, /* Submit the matrix conversion */ chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamLower, - A, NB, NB, LDA, N, M, K, sequence, &request ); + A, NB, NB, LDA, N, M, K, sequence, &request ); chameleon_zlap2tile( chamctxt, &descQl, &descQt, ChamDescInout, ChamUpperLower, - Q, NB, NB, LDQ, N, M, N, sequence, &request ); + Q, NB, NB, LDQ, N, M, N, sequence, &request ); /* Call the tile interface */ CHAMELEON_zungqr_Tile_Async( &descAt, descT, &descQt, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, - ChamDescInput, ChamLower, sequence, &request ); + ChamDescInput, ChamLower, sequence, &request ); chameleon_ztile2lap( chamctxt, &descQl, &descQt, - ChamDescInout, ChamUpperLower, sequence, &request ); + ChamDescInout, ChamUpperLower, sequence, &request ); CHAMELEON_Desc_Flush( descT, sequence ); chameleon_sequence_wait( chamctxt, sequence ); @@ -162,17 +163,19 @@ int CHAMELEON_zungqr( int M, int N, int K, * * @ingroup CHAMELEON_Complex64_t_Tile * - * CHAMELEON_zungqr_Tile - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the - * first N columns of a product of the elementary reflectors returned by CHAMELEON_zgeqrf. - * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * @brief Generates an M-by-N matrix Q with orthonormal columns, which is defined + * as the first N columns of a product of the elementary reflectors returned by + * CHAMELEON_zgeqrf(). All matrices are passed through descriptors. All + * dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A - * Details of the QR factorization of the original matrix A as returned by CHAMELEON_zgeqrf. + * Details of the QR factorization of the original matrix A as returned by + * CHAMELEON_zgeqrf(). * * @param[in] T - * Auxiliary factorization data, computed by CHAMELEON_zgeqrf. + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -191,16 +194,17 @@ int CHAMELEON_zungqr( int M, int N, int K, * @sa CHAMELEON_zgeqrf_Tile * */ -int CHAMELEON_zungqr_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) +int +CHAMELEON_zungqr_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) { - CHAM_context_t *chamctxt; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_Tile", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } chameleon_sequence_create( chamctxt, &sequence ); @@ -222,9 +226,10 @@ int CHAMELEON_zungqr_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) * * @ingroup CHAMELEON_Complex64_t_Tile_Async * - * Non-blocking equivalent of CHAMELEON_zungqr_Tile(). - * May return before the computation is finished. - * Allows for pipelining of operations at runtime. + * @brief Non-blocking equivalent of CHAMELEON_zungqr_Tile(). + * + * This function may return before the computation is finished. + * Allows for pipelining of operations at runtime. * ******************************************************************************* * @@ -245,73 +250,88 @@ int CHAMELEON_zungqr_Tile( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q ) * @sa CHAMELEON_zgeqrf_Tile_Async * */ -int CHAMELEON_zungqr_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, +int +CHAMELEON_zungqr_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; - CHAM_desc_t D, *Dptr = NULL; + CHAM_desc_t D, *Dptr = NULL; + int M, N, K; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_Tile_Async", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } - if (sequence == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_Tile", "NULL sequence"); + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_Tile_Async", "NULL sequence" ); return CHAMELEON_ERR_UNALLOCATED; } - if (request == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_Tile", "NULL request"); + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_Tile_Async", "NULL request" ); return CHAMELEON_ERR_UNALLOCATED; } /* Check sequence status */ - if (sequence->status == CHAMELEON_SUCCESS) { + if ( sequence->status == CHAMELEON_SUCCESS ) { request->status = CHAMELEON_SUCCESS; } else { - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); } /* Check descriptors for correctness */ - if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_Tile", "invalid first descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_Tile_Async", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( T ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_Tile_Async", "invalid second descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_Tile", "invalid second descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( Q ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_Tile_Async", "invalid third descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(Q) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_Tile", "invalid third descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( ( A->nb != A->mb ) || ( Q->nb != Q->mb ) ) { + chameleon_error( "CHAMELEON_zungqr_Tile_Async", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + + M = Q->m; + N = Q->n; + K = chameleon_min( A->m, A->n ); + /* Check input arguments */ - if (A->nb != A->mb || Q->nb != Q->mb) { - chameleon_error("CHAMELEON_zungqr_Tile", "only square tiles supported"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( N > M ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of N" ); + return chameleon_request_fail( sequence, request, -2 ); } + if ( K > N ) { + chameleon_error( "CHAMELEON_zunglq_param_Tile_Async", "Incorrect value of K" ); + return chameleon_request_fail( sequence, request, -3 ); + } + /* Quick return */ - /* - if (N <= 0) - return CHAMELEON_SUCCESS; - */ -#if defined(CHAMELEON_COPY_DIAG) + if ( N == 0 ) { + return CHAMELEON_SUCCESS; + } + +#if defined( CHAMELEON_COPY_DIAG ) { - int n = chameleon_min( A->m, A->n ); - chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, K ); Dptr = &D; } #endif chameleon_pzlaset( ChamUpperLower, 0., 1., Q, sequence, request ); - if (chamctxt->householder == ChamFlatHouseholder) { + if ( chamctxt->householder == ChamFlatHouseholder ) { chameleon_pzungqr( 1, A, Q, T, Dptr, sequence, request ); } else { chameleon_pzungqrrh( 1, CHAMELEON_RHBLK, A, Q, T, Dptr, sequence, request ); } - if (Dptr != NULL) { + if ( Dptr != NULL ) { CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( Q, sequence ); CHAMELEON_Desc_Flush( T, sequence ); diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 47785cc20631235af9c998425804501153cebecc..2389befd3ce810f0277d35a64af072f450cfb3c0 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -21,12 +21,13 @@ #include "control/common.h" /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t * - * CHAMELEON_zungqr_param - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the - * first N columns of a product of the elementary reflectors returned by CHAMELEON_zgeqrf. + * @brief Generates an M-by-N matrix Q with orthonormal columns, which is defined + * as the first N columns of a product of the elementary reflectors returned by + * CHAMELEON_zgeqrf_param(). * ******************************************************************************* * @@ -44,13 +45,17 @@ * M >= K >= 0. * * @param[in] A - * Details of the QR factorization of the original matrix A as returned by CHAMELEON_zgeqrf. + * Details of the QR factorization of the original matrix A as returned by + * CHAMELEON_zgeqrf_param(). * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * - * @param[in] descT - * Auxiliary factorization data, computed by CHAMELEON_zgeqrf. + * @param[in] descTS + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf_param(). + * + * @param[in] descTT + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf_param(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -73,55 +78,57 @@ * @sa CHAMELEON_zgeqrf * */ -int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, - int M, int N, int K, - CHAMELEON_Complex64_t *A, int LDA, - CHAM_desc_t *descTS, - CHAM_desc_t *descTT, - CHAMELEON_Complex64_t *Q, int LDQ ) +int +CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, int M, int N, int K, + CHAMELEON_Complex64_t *A, int LDA, + CHAM_desc_t *descTS, CHAM_desc_t *descTT, + CHAMELEON_Complex64_t *Q, int LDQ ) { - int NB; - int status; - CHAM_context_t *chamctxt; + int NB; + int status; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - CHAM_desc_t descAl, descAt; - CHAM_desc_t descQl, descQt; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descQl, descQt; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_param", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_param", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } /* Check input arguments */ - if (M < 0) { - chameleon_error("CHAMELEON_zungqr_param", "illegal value of M"); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zungqr_param", "illegal value of M" ); return -1; } - if (N < 0 || N > M) { - chameleon_error("CHAMELEON_zungqr_param", "illegal value of N"); + if ( ( N < 0 ) || ( N > M ) ) { + chameleon_error( "CHAMELEON_zungqr_param", "illegal value of N" ); return -2; } - if (K < 0 || K > N) { - chameleon_error("CHAMELEON_zungqr_param", "illegal value of K"); + if ( ( K < 0 ) || ( K > N ) ) { + chameleon_error( "CHAMELEON_zungqr_param", "illegal value of K" ); return -3; } - if (LDA < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zungqr_param", "illegal value of LDA"); + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zungqr_param", "illegal value of LDA" ); return -5; } - if (LDQ < chameleon_max(1, M)) { - chameleon_error("CHAMELEON_zungqr_param", "illegal value of LDQ"); + if ( LDQ < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zungqr_param", "illegal value of LDQ" ); return -8; } - if (chameleon_min(M, chameleon_min(N, K)) == 0) + + /* Quick return */ + if ( N <= 0 ) { return CHAMELEON_SUCCESS; + } /* Tune NB & IB depending on M & N; Set NBNB */ - status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, 0); - if (status != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_param", "chameleon_tune() failed"); + status = chameleon_tune( CHAMELEON_FUNC_ZGELS, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_param", "chameleon_tune() failed" ); return status; } @@ -132,18 +139,20 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, /* Submit the matrix conversion */ chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamLower, - A, NB, NB, LDA, N, M, K, sequence, &request ); + A, NB, NB, LDA, N, M, K, sequence, &request ); chameleon_zlap2tile( chamctxt, &descQl, &descQt, ChamDescInout, ChamUpperLower, - Q, NB, NB, LDQ, N, M, N, sequence, &request ); + Q, NB, NB, LDQ, N, M, N, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zungqr_param_Tile_Async( qrtree, &descAt, descTS, descTT, &descQt, sequence, &request ); + CHAMELEON_zungqr_param_Tile_Async( qrtree, &descAt, descTS, descTT, &descQt, + sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, - ChamDescInput, ChamLower, sequence, &request ); + ChamDescInput, ChamLower, sequence, &request ); chameleon_ztile2lap( chamctxt, &descQl, &descQt, - ChamDescInout, ChamUpperLower, sequence, &request ); + ChamDescInout, ChamUpperLower, sequence, &request ); + CHAMELEON_Desc_Flush( descTS, sequence ); CHAMELEON_Desc_Flush( descTT, sequence ); @@ -159,21 +168,26 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, } /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t_Tile * - * CHAMELEON_zungqr_param_Tile - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the - * first N columns of a product of the elementary reflectors returned by CHAMELEON_zgeqrf. - * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * @brief Generates an M-by-N matrix Q with orthonormal columns, which is defined + * as the first N columns of a product of the elementary reflectors returned by + * CHAMELEON_zgeqrf_param(). All matrices are passed through descriptors. All + * dimensions are taken from the descriptors. * ******************************************************************************* * * @param[in] A - * Details of the QR factorization of the original matrix A as returned by CHAMELEON_zgeqrf. + * Details of the QR factorization of the original matrix A as returned by + * CHAMELEON_zgeqrf_param(). + * + * @param[in] TS + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf_param(). * - * @param[in] T - * Auxiliary factorization data, computed by CHAMELEON_zgeqrf. + * @param[in] TT + * Auxiliary factorization data, computed by CHAMELEON_zgeqrf_param(). * * @param[out] Q * On exit, the M-by-N matrix Q. @@ -192,16 +206,18 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree, * @sa CHAMELEON_zgeqrf_Tile * */ -int CHAMELEON_zungqr_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q ) +int +CHAMELEON_zungqr_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, + CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q ) { - CHAM_context_t *chamctxt; + CHAM_context_t * chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_param_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_param_Tile", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } chameleon_sequence_create( chamctxt, &sequence ); @@ -220,13 +236,14 @@ int CHAMELEON_zungqr_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CH } /** - ******************************************************************************* + ******************************************************************************** * * @ingroup CHAMELEON_Complex64_t_Tile_Async * - * Non-blocking equivalent of CHAMELEON_zungqr_param_Tile(). - * May return before the computation is finished. - * Allows for pipelining of operations at runtime. + * @brief Non-blocking equivalent of CHAMELEON_zungqr_param_Tile(). + * + * This function may return before the computation is finished. + * Allows for pipelining of operations at runtime. * ******************************************************************************* * @@ -247,79 +264,93 @@ int CHAMELEON_zungqr_param_Tile( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CH * @sa CHAMELEON_zgeqrf_Tile_Async * */ -int CHAMELEON_zungqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +int +CHAMELEON_zungqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t *A, + CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *Q, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; - CHAM_desc_t D, *Dptr = NULL; - int KT; + CHAM_desc_t D, *Dptr = NULL; + int M, N, K; chamctxt = chameleon_context_self(); - if (chamctxt == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_param_Tile", "CHAMELEON not initialized"); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_param_Tile_Async", "CHAMELEON not initialized" ); return CHAMELEON_ERR_NOT_INITIALIZED; } - if (sequence == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_param_Tile", "NULL sequence"); + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_param_Tile_Async", "NULL sequence" ); return CHAMELEON_ERR_UNALLOCATED; } - if (request == NULL) { - chameleon_fatal_error("CHAMELEON_zungqr_param_Tile", "NULL request"); + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zungqr_param_Tile_Async", "NULL request" ); return CHAMELEON_ERR_UNALLOCATED; } /* Check sequence status */ - if (sequence->status == CHAMELEON_SUCCESS) { + if ( sequence->status == CHAMELEON_SUCCESS ) { request->status = CHAMELEON_SUCCESS; } else { - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); } /* Check descriptors for correctness */ - if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_param_Tile", "invalid first descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( TS ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "invalid second descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(TS) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_param_Tile", "invalid second descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( TT ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "invalid third descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(TT) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_param_Tile", "invalid third descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( chameleon_desc_check( Q ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "invalid fourth descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } - if (chameleon_desc_check(Q) != CHAMELEON_SUCCESS) { - chameleon_error("CHAMELEON_zungqr_param_Tile", "invalid fourth descriptor"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( ( A->nb != A->mb ) || ( Q->nb != Q->mb ) ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); } + + M = Q->m; + N = Q->n; + K = chameleon_min( A->m, A->n ); + /* Check input arguments */ - if (A->nb != A->mb || Q->nb != Q->mb) { - chameleon_error("CHAMELEON_zungqr_param_Tile", "only square tiles supported"); - return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "Incorrect value of M" ); + return chameleon_request_fail( sequence, request, -1 ); } - /* Quick return */ - /* - if (N <= 0) - return CHAMELEON_SUCCESS; - */ - if ( A->m < A->n ) { - KT = A->mt; + if ( N > M ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "Incorrect value of N" ); + return chameleon_request_fail( sequence, request, -2 ); } - else { - KT = A->nt; + if ( K > N ) { + chameleon_error( "CHAMELEON_zungqr_param_Tile_Async", "Incorrect value of K" ); + return chameleon_request_fail( sequence, request, -3 ); + } + + /* Quick return */ + if ( N == 0 ) { + return CHAMELEON_SUCCESS; } -#if defined(CHAMELEON_COPY_DIAG) +#if defined( CHAMELEON_COPY_DIAG ) { - int n = chameleon_min( A->m, A->n ); - chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, K ); Dptr = &D; } #endif chameleon_pzlaset( ChamUpperLower, 0., 1., Q, sequence, request ); - chameleon_pzungqr_param( 1, KT, qrtree, A, Q, TS, TT, Dptr, sequence, request ); + chameleon_pzungqr_param( 1, chameleon_min( A->mt, A->nt ), qrtree, + A, Q, TS, TT, Dptr, sequence, request ); - if (Dptr != NULL) { + if ( Dptr != NULL ) { CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( Q, sequence ); CHAMELEON_Desc_Flush( TS, sequence );