Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/chameleon
  • lvilleve/chameleon-toto
  • jcletort/chameleon
  • thibault/chameleon
  • tcojean/chameleon
  • sylvand/chameleon
  • viroulea/chameleon
  • x-ltac/chameleon
  • agullo/chameleon
  • glucas/chameleon
  • pswartva/chameleon
  • aguermou1/chameleon
  • eyrauddu/chameleon
  • mverite/chameleon
  • alisito/chameleon
  • furmento/chameleon
  • fpruvost/chameleon
  • ahourcau/chameleon
  • bnicolas/chameleon
  • pesterie/chameleon
  • mmarcos/chameleon
21 results
Show changes
Showing with 1098 additions and 99 deletions
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved.
*
......@@ -12,9 +12,9 @@
*
* @brief Chameleon zgenm2 wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -2,7 +2,7 @@
*
* @file zgepdf_qdwh.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved.
*
......@@ -10,10 +10,10 @@
*
* @brief Chameleon zgepdf_qdwh wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Hatem Ltaief
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -2,7 +2,7 @@
*
* @file zgepdf_qr.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* @copyright 2016-2020 KAUST. All rights reserved.
*
......@@ -14,9 +14,9 @@
* timer/testing fot this subroutine of the QDWH/Zolo algorithms. That is why
* only the Tile version is available.
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @date 2022-02-22
* @date 2024-12-17
* @precisions normal z -> s d c
*
*/
......@@ -82,10 +82,15 @@ int CHAMELEON_zgepdf_qr_Tile( int doqr, int optid,
CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *Q1,
CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *Q2 )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
#if defined(CHAMELEON_COPY_DIAG)
CHAM_desc_t D1, D2;
#endif
CHAM_desc_t *D1ptr = NULL;
CHAM_desc_t *D2ptr = NULL;
int status;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -94,16 +99,36 @@ int CHAMELEON_zgepdf_qr_Tile( int doqr, int optid,
}
chameleon_sequence_create( chamctxt, &sequence );
#if defined(CHAMELEON_COPY_DIAG)
{
int n = A1->n;
chameleon_zdesc_copy_and_restrict( A1, &D1, A1->m, n );
D1ptr = &D1;
chameleon_zdesc_copy_and_restrict( A2, &D2, A2->m, n );
D2ptr = &D2;
}
#endif
chameleon_pzgepdf_qr( 1, doqr, optid, qrtreeT, qrtreeB,
A1, TS1, TT1, NULL, Q1,
A2, TS2, TT2, NULL, Q2,
A1, TS1, TT1, D1ptr, Q1,
A2, TS2, TT2, D2ptr, Q2,
sequence, &request );
CHAMELEON_Desc_Flush( Q1, sequence );
CHAMELEON_Desc_Flush( Q2, sequence );
if ( D1ptr != NULL ) {
CHAMELEON_Desc_Flush( D1ptr, sequence );
CHAMELEON_Desc_Flush( D2ptr, sequence );
}
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
if ( D1ptr != NULL ) {
chameleon_desc_destroy( D1ptr );
chameleon_desc_destroy( D2ptr );
}
return status;
}
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrf wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -20,7 +20,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,17 +4,17 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrf_param wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrs wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -20,7 +20,7 @@
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,17 +4,17 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgeqrs_param wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......
......@@ -2,7 +2,7 @@
*
* @file zgerst.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -12,7 +12,7 @@
* @version 1.3.0
* @author Mathieu Faverge
* @author Yuxi Hong
* @date 2023-07-06
* @date 2024-02-18
* @precisions normal z -> d
*
*/
......
/**
*
* @file zgesv.c
*
* @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgesv wrappers
*
* @version 1.3.0
* @author Matteo Marcos
* @date 2025-03-24
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* @brief Computes the solution to a system of linear equations A * X = B,
* where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
*
* The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A.
* The factored form of A is then used to solve the system of equations A * X = B.
*
*******************************************************************************
*
* @param[in] N
* The number of linear equations, i.e., the order of the matrix A. N >= 0.
*
* @param[in] NRHS
* The number of right hand sides, i.e., the number of columns of the matrix B.
* NRHS >= 0.
*
* @param[in,out] A
* On entry, the N-by-N coefficient matrix A.
* On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[out] IPIV
* On exit, the pivot indices that define the permutations (not equivalent to LAPACK).
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, if return value = 0, the N-by-NRHS solution matrix X.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,N).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
* @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
* but the factor U is exactly singular, so the solution could not be computed.
*
*******************************************************************************
*
* @sa CHAMELEON_zgesv_Tile
* @sa CHAMELEON_zgesv_Tile_Async
* @sa CHAMELEON_cgesv
* @sa CHAMELEON_dgesv
* @sa CHAMELEON_sgesv
*
*/
int CHAMELEON_zgesv( int N, int NRHS,
CHAMELEON_Complex64_t *A, int LDA,
int *IPIV,
CHAMELEON_Complex64_t *B, int LDB )
{
int NB;
int status;
CHAM_context_t *chamctxt;
CHAM_ipiv_t descIPIV;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
CHAM_desc_t descBl, descBt;
struct chameleon_pzgetrf_s *wsA, *wsB;
int P, Q;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
chameleon_error( "CHAMELEON_zgesv", "CHAMELEON not initialized" );
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if ( N < 0 ) {
chameleon_error( "CHAMELEON_zgesv", "illegal value of N" );
return -1;
}
if ( NRHS < 0 ) {
chameleon_error( "CHAMELEON_zgesv", "illegal value of NRHS" );
return -2;
}
if ( LDA < chameleon_max( 1, N ) ) {
chameleon_error( "CHAMELEON_zgesv", "illegal value of LDA" );
return -4;
}
if ( LDB < chameleon_max( 1, N ) ) {
chameleon_error( "CHAMELEON_zgesv", "illegal value of LDB" );
return -8;
}
/* Quick return */
if ( chameleon_min( N, NRHS ) == 0 ) {
return CHAMELEON_SUCCESS;
}
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS );
if ( status != CHAMELEON_SUCCESS ) {
chameleon_error( "CHAMELEON_zgesv", "chameleon_tune() failed" );
return status;
}
/* Set NT & NTRHS */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
A, NB, NB, LDA, N, N, N, sequence, &request );
chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
/* Allocate workspace for partial pivoting */
wsA = CHAMELEON_zgetrf_WS_Alloc( &descAt );
wsB = CHAMELEON_zgetrf_WS_Alloc( &descBt );
if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
( wsA->alg == ChamGetrfPPiv ) )
{
chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag );
}
/* Call the tile interface */
CHAMELEON_zgesv_Tile_Async( &descAt, &descIPIV, &descBt, wsA, wsB, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInout, ChamUpperLower, sequence, &request );
chameleon_ztile2lap( chamctxt, &descBl, &descBt,
ChamDescInout, ChamUpperLower, sequence, &request );
if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
( wsA->alg == ChamGetrfPPiv ) )
{
RUNTIME_ipiv_gather( sequence, &descIPIV, IPIV, 0 );
}
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
if ( ( wsA->alg == ChamGetrfPPivPerColumn ) ||
( wsA->alg == ChamGetrfPPiv ) )
{
chameleon_ipiv_destroy( &descIPIV );
}
/* Cleanup the temporary data */
CHAMELEON_zgetrf_WS_Free( wsA );
CHAMELEON_zgetrf_WS_Free( wsB );
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* @brief Solves a system of linear equations using the tile LU factorization.
* Tile equivalent of CHAMELEON_zgetrf_nopiv().
*
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the N-by-N coefficient matrix A.
* On exit, the tile L and U factors from the factorization (not equivalent to LAPACK).
*
* @param[in,out] IPIV
* On entry, ipiv descriptor associated to A and created with
* CHAMELEON_Ipiv_Create().
* On exit, it contains the pivot indices associated to the PLU
* factorization of A.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, if return value = 0, the N-by-NRHS solution matrix X.
*
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
* but the factor U is exactly singular, so the solution could not be computed.
*
*******************************************************************************
*
* @sa CHAMELEON_zgesv
* @sa CHAMELEON_zgesv_Tile_Async
* @sa CHAMELEON_cgesv_Tile
* @sa CHAMELEON_dgesv_Tile
* @sa CHAMELEON_sgesv_Tile
* @sa CHAMELEON_zcgesv_Tile
*
*/
int CHAMELEON_zgesv_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
void *wsA, *wsB;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" );
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
wsA = CHAMELEON_zgetrf_WS_Alloc( A );
wsB = CHAMELEON_zgetrf_WS_Alloc( B );
CHAMELEON_zgesv_Tile_Async( A, IPIV, B, wsA, wsB, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( B, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_WS_Free( wsA );
CHAMELEON_zgetrf_WS_Free( wsB );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* @brief Solves a system of linear equations using the tile LU factorization.
*
* Non-blocking equivalent of CHAMELEON_zgesv_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the M-by-N matrix to be factored.
* On exit, the tile factors L and U from the factorization.
*
* @param[in,out] IPIV
* On entry, ipiv descriptor associated to A and created with
* CHAMELEON_Ipiv_Create().
* On exit, it contains the pivot indices associated to the PLU
* factorization of A.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, the N-by-NRHS solution matrix X.
*
* @param[in,out] user_wsA
* The opaque pointer to pre-allocated getrf workspace through
* CHAMELEON_zgetrf_WS_Alloc() for A. If user_ws is NULL, it is automatically
* allocated, but BE CAREFULL as it switches the call from asynchronous
* to synchronous call.
*
* @param[in,out] user_wsB
* The opaque pointer to pre-allocated getrf workspace through
* CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically
* allocated, but BE CAREFULL as it switches the call from asynchronous
* to synchronous call.*
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zgesv
* @sa CHAMELEON_zgesv_Tile
* @sa CHAMELEON_cgesv_Tile_Async
* @sa CHAMELEON_dgesv_Tile_Async
* @sa CHAMELEON_sgesv_Tile_Async
* @sa CHAMELEON_zcgesv_Tile_Async
*
*/
int CHAMELEON_zgesv_Tile_Async( CHAM_desc_t *A,
CHAM_ipiv_t *IPIV,
CHAM_desc_t *B,
void *user_wsA,
void *user_wsB,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *wsA, *wsB;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" );
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if ( sequence == NULL ) {
chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL sequence" );
return CHAMELEON_ERR_UNALLOCATED;
}
if ( request == NULL ) {
chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL request" );
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if ( sequence->status == CHAMELEON_SUCCESS ) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED );
}
/* Check descriptors for correctness */
if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) {
chameleon_error( "CHAMELEON_zgesv_Tile", "invalid first descriptor" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) {
chameleon_error( "CHAMELEON_zgesv_Tile", "invalid third descriptor" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
/* Check input arguments */
if ( A->nb != A->mb || B->nb != B->mb ) {
chameleon_error( "CHAMELEON_zgesv_Tile", "only square tiles supported" );
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( user_wsA == NULL ) {
wsA = CHAMELEON_zgetrf_WS_Alloc( A );
}
else {
wsA = user_wsA;
}
if ( user_wsB == NULL ) {
wsB = CHAMELEON_zgetrf_WS_Alloc( B );
}
else {
wsB = user_wsB;
}
IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag;
chameleon_pzgetrf( wsA, A, IPIV, sequence, request );
CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, A, IPIV, B, wsB, sequence, request );
if ( user_wsA == NULL ) {
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( B, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_WS_Free( wsA );
}
if ( user_wsB == NULL ) {
CHAMELEON_zgetrf_WS_Free( wsB );
}
return CHAMELEON_SUCCESS;
}
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -12,7 +12,7 @@
* @brief Chameleon zgesv_incpiv wrappers
* Release Date: November, 15th 2009
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -20,7 +20,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgesv_nopiv wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -19,7 +19,8 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @date 2022-02-22
* @author Matthieu Kuhn
* @date 2024-10-17
* @precisions normal z -> s d c
*
*/
......@@ -82,8 +83,8 @@
*
*/
int CHAMELEON_zgesv_nopiv( int N, int NRHS,
CHAMELEON_Complex64_t *A, int LDA,
CHAMELEON_Complex64_t *B, int LDB )
CHAMELEON_Complex64_t *A, int LDA,
CHAMELEON_Complex64_t *B, int LDB )
{
int NB;
int status;
......@@ -92,6 +93,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
CHAM_desc_t descBl, descBt;
void *ws = NULL;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -138,7 +140,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
/* Call the tile interface */
CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, sequence, &request );
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt );
CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, ws, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
......@@ -149,6 +152,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
......@@ -195,10 +199,11 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
*/
int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
void *ws;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -207,12 +212,15 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zgesv_nopiv_Tile_Async( A, B, sequence, &request );
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
CHAMELEON_zgesv_nopiv_Tile_Async( A, B, ws, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( B, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
......@@ -248,10 +256,14 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B )
* @sa CHAMELEON_zcgesv_Tile_Async
*
*/
int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A,
CHAM_desc_t *B,
void *user_ws,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_nopiv_s *ws;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -294,11 +306,23 @@ int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B,
return CHAMELEON_SUCCESS;
*/
chameleon_pzgetrf_nopiv( A, sequence, request );
if ( user_ws == NULL ) {
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
}
else {
ws = user_ws;
}
chameleon_pzgetrf_nopiv( ws, A, sequence, request );
chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
if ( user_ws == NULL ) {
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( B, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
}
return CHAMELEON_SUCCESS;
}
......@@ -4,19 +4,19 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgesvd wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Gregoire Pichon
* @author Mathieu Faverge
* @author Raphael Boucherie
* @author Alycia Lisito
* @date 2022-02-22
* @date 2025-01-29
* @precisions normal z -> s d c
*
*/
......@@ -472,13 +472,13 @@ int CHAMELEON_zgesvd_Tile_Async( cham_job_t jobu, cham_job_t jobvt,
}
#endif
E = malloc( MINMN * sizeof(double) );
E = malloc( sizeof(double) * MINMN );
if ( E == NULL ) {
chameleon_error( "CHAMELEON_zgesvd_Tile_Async", "malloc(E) failed" );
free( E );
return CHAMELEON_ERR_OUT_OF_RESOURCES;
}
memset( E, 0, MINMN * sizeof(double) );
memset( E, 0, sizeof(double) * MINMN );
/* Reduction to band + bidiagonal */
......
......@@ -4,7 +4,7 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -19,12 +19,16 @@
* @author Florent Pruvost
* @author Matthieu Kuhn
* @author Lionel Eyraud-Dubois
* @date 2024-03-16
* @author Alycia Lisito
* @author Xavier Lacoste
* @author Pierre Esterie
* @date 2024-12-09
*
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#include <limits.h>
/**
********************************************************************************
......@@ -55,6 +59,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *ws;
int lookahead, batch_size;
int P = chameleon_desc_datadist_get_iparam( A, 0 );
int Q = chameleon_desc_datadist_get_iparam( A, 1 );
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
......@@ -65,6 +72,9 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
ws->alg = ChamGetrfPPiv;
ws->ib = CHAMELEON_IB;
ws->laswp = CHAMELEON_zlaswp_WS_Alloc( ChamLeft, A );
ws->laswp->allreduce = 1;
{
char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" );
......@@ -88,22 +98,27 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
chameleon_cleanenv( algostr );
}
ws->batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", 1 );
if ( ws->batch_size > CHAMELEON_BATCH_SIZE ) {
batch_size = chameleon_getenv_get_value_int( "CHAMELEON_BATCH_SIZE", 0 );
batch_size = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE", batch_size );
if ( batch_size > CHAMELEON_BATCH_SIZE ) {
chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE must be smaller than CHAMELEON_BATCH_SIZE, please recompile with the right CHAMELEON_BATCH_SIZE, or reduce the CHAMELEON_GETRF_BATCH_SIZE value\n" );
ws->batch_size = CHAMELEON_BATCH_SIZE;
}
if ( (ws->batch_size > 1) && (CHAMELEON_Comm_rank() > 1) ) {
chameleon_warning( "CHAMELEON_BATCH_SIZE", "CHAMELEON_GETRF_BATCH_SIZE is unavailable in distributed, value forced to 1\n" );
ws->batch_size = 1;
}
ws->batch_size_blas2 = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE_BLAS2", batch_size );
ws->batch_size_blas2 = ( ws->batch_size_blas2 > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->batch_size_blas2;
ws->batch_size_blas3 = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_BATCH_SIZE_BLAS3", batch_size );
ws->batch_size_blas3 = ( ws->batch_size_blas3 > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->batch_size_blas3;
ws->laswp->batch_size_swap = ( ws->laswp->batch_size_swap == 0 ) ? batch_size : ws->laswp->batch_size_swap;
ws->laswp->batch_size_swap = ( ws->laswp->batch_size_swap > CHAMELEON_BATCH_SIZE ) ? CHAMELEON_BATCH_SIZE : ws->laswp->batch_size_swap;
ws->ringswitch = chameleon_getenv_get_value_int( "CHAMELEON_GETRF_RINGSWITCH", INT_MAX );
/* Allocation of U for permutation of the panels */
if ( ws->alg == ChamGetrfNoPivPerColumn ) {
chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, 1, A->nb, A->nb,
A->mt, A->nt * A->nb, 0, 0,
A->mt, A->nt * A->nb, A->p, A->q,
A->mt, A->nt * A->nb, P, Q,
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
}
else if ( ( ws->alg == ChamGetrfPPiv ) ||
......@@ -112,7 +127,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, A->mb, A->nb, A->mb*A->nb,
A->m, A->n, 0, 0,
A->m, A->n, A->p, A->q,
A->m, A->n, P, Q,
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
lookahead = chamctxt->lookahead;
chameleon_desc_init( &(ws->Wl), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb),
A->mt * A->mb, A->nb * Q * lookahead, 0, 0,
A->mt * A->mb, A->nb * Q * lookahead, P, Q,
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
}
......@@ -131,7 +152,7 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A )
chameleon_desc_init( &(ws->Up), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, ws->ib, A->nb, ws->ib * A->nb,
A->mt * ws->ib, A->nt * A->nb, 0, 0,
A->mt * ws->ib, A->nt * A->nb, A->p, A->q,
A->mt * ws->ib, A->nt * A->nb, P, Q,
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
}
......@@ -162,6 +183,8 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
{
struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws;
CHAMELEON_zlaswp_WS_Free( ws->laswp );
if ( ( ws->alg == ChamGetrfNoPivPerColumn ) ||
( ws->alg == ChamGetrfPPiv ) ||
( ws->alg == ChamGetrfPPivPerColumn ) )
......@@ -172,6 +195,11 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws )
{
chameleon_desc_destroy( &(ws->Up) );
}
if ( ( ws->alg == ChamGetrfPPiv ) ||
( ws->alg == ChamGetrfPPivPerColumn ) )
{
chameleon_desc_destroy( &(ws->Wl) );
}
free( ws );
}
......@@ -227,13 +255,14 @@ int
CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
{
int NB;
int status;
CHAM_desc_t descAl, descAt;
CHAM_ipiv_t descIPIV;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
CHAM_desc_t descAl, descAt;
CHAM_ipiv_t descIPIV;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
struct chameleon_pzgetrf_s *ws;
int P, Q;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
......@@ -273,13 +302,16 @@ CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV )
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
A, NB, NB, LDA, N, M, N, sequence, &request );
P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
/* Allocate workspace for partial pivoting */
ws = CHAMELEON_zgetrf_WS_Alloc( &descAt );
if ( ( ws->alg == ChamGetrfPPivPerColumn ) ||
( ws->alg == ChamGetrfPPiv ) )
{
chameleon_ipiv_init( &descIPIV, &descAt, IPIV );
chameleon_ipiv_init( &descIPIV, ChamLeft, descAt.mb, chameleon_min( M, N ), P, P*Q, IPIV, chameleon_getrankof_ipiv_2d_diag);
}
/* Call the tile interface */
......@@ -482,6 +514,8 @@ CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A,
ws = user_ws;
}
IPIV->get_rankof = chameleon_getrankof_ipiv_2d_diag;
chameleon_pzgetrf( ws, A, IPIV, sequence, request );
if ( user_ws == NULL ) {
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrf_incpiv wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -19,7 +19,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,26 +4,121 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrf_nopiv wrappers
*
* @version 1.2.0
* @version 1.3.0
* @author Omar Zenati
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @author Alycia Lisito
* @date 2022-02-22
* @author Matthieu Kuhn
* @author Pierre Esterie
* @date 2024-11-13
*
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* @brief Allocate the required workspaces for asynchronous getrf
*
*******************************************************************************
*
* @param[in] A
* The descriptor of the matrix A.
*
*******************************************************************************
*
* @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_nopiv_Tile_Async()
* and to free with CHAMELEON_zgetrf_nopiv_WS_Free().
*
*******************************************************************************
*
* @sa CHAMELEON_zgetrf_nopiv_Tile_Async
* @sa CHAMELEON_zgetrf_nopiv_WS_Free
*
*/
void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_nopiv_s *options;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
return NULL;
}
options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) );
options->use_workspace = 0;
if ( ( ( chameleon_desc_datadist_get_iparam(A, 0) > 1 ) || ( chameleon_desc_datadist_get_iparam(A, 1) > 1 ) ) &&
( A->get_rankof_init == chameleon_getrankof_2d ) &&
( chamctxt->generic_enabled != CHAMELEON_TRUE ) )
{
int lookahead = chamctxt->lookahead;
options->use_workspace = 1;
chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb),
A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead, 0, 0,
A->mt * A->mb, A->nb * chameleon_desc_datadist_get_iparam(A, 1) * lookahead,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble,
A->mb, A->nb, (A->mb * A->nb),
A->mb * chameleon_desc_datadist_get_iparam(A, 0) * lookahead, A->nt * A->nb, 0, 0,
A->mb * chameleon_desc_datadist_get_iparam(A, 0) * lookahead, A->nt * A->nb,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
}
return (void*)options;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* @brief Free the allocated workspaces for asynchronous getrf
*
*******************************************************************************
*
* @param[in,out] user_ws
* On entry, the opaque pointer allocated by CHAMELEON_zgetrf_nopiv_WS_Alloc()
* On exit, all data are freed.
*
*******************************************************************************
*
* @sa CHAMELEON_zgetrf_nopiv_Tile_Async
* @sa CHAMELEON_zgetrf_nopiv_WS_Alloc
*
*/
void CHAMELEON_zgetrf_nopiv_WS_Free( void *user_ws )
{
struct chameleon_pzgetrf_nopiv_s *ws = (struct chameleon_pzgetrf_nopiv_s*)user_ws;
if ( ws->use_workspace ) {
chameleon_desc_destroy( &(ws->WL) );
chameleon_desc_destroy( &(ws->WU) );
}
free( ws );
}
/**
********************************************************************************
......@@ -69,7 +164,7 @@
*
*/
int CHAMELEON_zgetrf_nopiv( int M, int N,
CHAMELEON_Complex64_t *A, int LDA )
CHAMELEON_Complex64_t *A, int LDA )
{
int NB;
int status;
......@@ -77,6 +172,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
void *ws = NULL;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -117,7 +213,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
A, NB, NB, LDA, N, M, N, sequence, &request );
/* Call the tile interface */
CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, sequence, &request );
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt );
CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, ws, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
......@@ -126,6 +223,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
......@@ -169,10 +267,11 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
*/
int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
void *ws;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -181,11 +280,14 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zgetrf_nopiv_Tile_Async( A, sequence, &request );
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
CHAMELEON_zgetrf_nopiv_Tile_Async( A, ws, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
......@@ -224,11 +326,13 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A )
* @sa CHAMELEON_zgetrs_Tile_Async
*
*/
int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A,
void *user_ws,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_nopiv_s *ws;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
......@@ -263,7 +367,19 @@ int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A,
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
chameleon_pzgetrf_nopiv( A, sequence, request );
if ( user_ws == NULL ) {
ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A );
}
else {
ws = user_ws;
}
chameleon_pzgetrf_nopiv( ws, A, sequence, request );
if ( user_ws == NULL ) {
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
CHAMELEON_zgetrf_nopiv_WS_Free( ws );
}
return CHAMELEON_SUCCESS;
}
/**
*
* @file zgetrs.c
*
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrs wrappers
*
* @version 1.3.0
* @author Matteo Marcos
* @date 2025-03-24
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* @brief Solves a system of linear equations A * X = B, with a general N-by-N matrix A
* using the tile LU factorization with partial pivoting computed by CHAMELEON_zgetrf.
*
*******************************************************************************
*
* @param[in] trans
* Intended to specify the the form of the system of equations:
* = ChamNoTrans: A * X = B (No transpose)
* = ChamTrans: A^T * X = B (Transpose)
* = ChamConjTrans: A^H * X = B (Conjugate transpose)
* Only ChamNoTrans and ChamTrans are supported.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[in] NRHS
* The number of right hand sides, i.e., the number of columns of the matrix B.
* NRHS >= 0.
*
* @param[in] A
* The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[in] IPIV
* On entry, ipiv descriptor associated to A and created with
* CHAMELEON_Ipiv_Create().
* On exit, it contains the pivot indices associated to the PLU
* factorization of A.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, the solution matrix X.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,N).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @return <0 if -i, the i-th argument had an illegal value
*
*******************************************************************************
*
* @sa CHAMELEON_zgetrs_Tile
* @sa CHAMELEON_zgetrs_Tile_Async
* @sa CHAMELEON_cgetrs
* @sa CHAMELEON_dgetrs
* @sa CHAMELEON_sgetrs
* @sa CHAMELEON_zgetrf
*
*/
int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS,
CHAMELEON_Complex64_t *A, int LDA,
int *IPIV,
CHAMELEON_Complex64_t *B, int LDB )
{
int NB;
int status;
CHAM_context_t *chamctxt;
CHAM_ipiv_t *descIPIV;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
CHAM_desc_t descBl, descBt;
struct chameleon_pzgetrf_s *ws;
int P, Q;
chamctxt = chameleon_context_self();
if ( chamctxt == NULL ) {
chameleon_fatal_error("CHAMELEON_zgetrs", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
return CHAMELEON_ERR_ILLEGAL_VALUE;
}
if ( N < 0 ) {
chameleon_error("CHAMELEON_zgetrs", "illegal value of N");
return -2;
}
if ( NRHS < 0 ) {
chameleon_error("CHAMELEON_zgetrs", "illegal value of NRHS");
return -3;
}
if ( LDA < chameleon_max( 1, N ) ) {
chameleon_error("CHAMELEON_zgetrs", "illegal value of LDA");
return -5;
}
if ( LDB < chameleon_max( 1, N ) ) {
chameleon_error("CHAMELEON_zgetrs", "illegal value of LDB");
return -9;
}
/* Quick return */
if ( chameleon_min( N, NRHS ) == 0 )
return CHAMELEON_SUCCESS;
/* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS );
if ( status != CHAMELEON_SUCCESS ) {
chameleon_error("CHAMELEON_zgetrs", "chameleon_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
A, NB, NB, LDA, N, N, N, sequence, &request );
chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
P = chameleon_desc_datadist_get_iparam( &descAt, 0 );
Q = chameleon_desc_datadist_get_iparam( &descAt, 1 );
ws = CHAMELEON_zgetrf_WS_Alloc( &descBt );
CHAMELEON_Ipiv_Create( &descIPIV, ChamLeft, descAt.mb, N, P, P*Q, IPIV );
CHAMELEON_Ipiv_Init( descIPIV );
/* Call the tile interface */
CHAMELEON_zgetrs_Tile_Async( trans, &descAt, descIPIV, &descBt, ws, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInput, ChamUpperLower, sequence, &request );
chameleon_ztile2lap( chamctxt, &descBl, &descBt,
ChamDescInout, ChamUpperLower, sequence, &request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
CHAMELEON_Ipiv_Destroy( &descIPIV );
CHAMELEON_zgetrf_WS_Free( ws );
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* @brief Solves a system of linear equations using previously
* computed LU factorization with partial pivoting.
* Tile equivalent of CHAMELEON_zgetrs().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] trans
* Intended to specify the the form of the system of equations:
* = ChamNoTrans: A * X = B (No transpose)
* = ChamTrans: A^T * X = B (Transpose)
* = ChamConjTrans: A^H * X = B (Conjugate transpose)
* Only ChamNoTrans and ChamTrans are supported.
*
* @param[in] A
* The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf.
*
* @param[in] IPIV
* On entry, ipiv descriptor associated to A and created with
* CHAMELEON_Ipiv_Create().
* On exit, it contains the pivot indices associated to the PLU
* factorization of A.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, the solution matrix X.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
*
*******************************************************************************
*
* @sa CHAMELEON_zgetrs
* @sa CHAMELEON_zgetrs_Tile_Async
* @sa CHAMELEON_cgetrs_Tile
* @sa CHAMELEON_dgetrs_Tile
* @sa CHAMELEON_sgetrs_Tile
* @sa CHAMELEON_zgetrf_Tile
*
*/
int CHAMELEON_zgetrs_Tile( cham_trans_t trans,
CHAM_desc_t *A,
CHAM_ipiv_t *IPIV,
CHAM_desc_t *B )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
void *ws;
chamctxt = chameleon_context_self();
if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
return CHAMELEON_ERR_ILLEGAL_VALUE;
}
if ( chamctxt == NULL ) {
chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
ws = CHAMELEON_zgetrf_WS_Alloc( B );
CHAMELEON_zgetrs_Tile_Async( trans, A, IPIV, B, ws, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( B, sequence );
CHAMELEON_zgetrf_WS_Free( ws );
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* @brief Solves a system of linear equations using previously
* computed LU factorization with partial pivoting.
* Non-blocking equivalent of CHAMELEON_zgetrs_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] trans
* Intended to specify the the form of the system of equations:
* = ChamNoTrans: A * X = B (No transpose)
* = ChamTrans: A^T * X = B (Transpose)
* = ChamConjTrans: A^H * X = B (Conjugate transpose)
* Only ChamNoTrans and ChamTrans are supported.
*
* @param[in,out] A
* On entry, the M-by-N matrix to be factored.
* On exit, the tile factors L and U from the factorization.
*
* @param[in] IPIV
* On entry, ipiv descriptor associated to A and created with
* CHAMELEON_Ipiv_Create().
* On exit, it contains the pivot indices associated to the PLU
* factorization of A.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, the N-by-NRHS solution matrix X.
*
* @param[in,out] user_ws
* The opaque pointer to pre-allocated getrf workspace through
* CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically
* allocated, but BE CAREFULL as it switches the call from asynchronous
* to synchronous call.*
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zgetrs
* @sa CHAMELEON_zgetrs_Tile
* @sa CHAMELEON_cgetrs_Tile_Async
* @sa CHAMELEON_dgetrs_Tile_Async
* @sa CHAMELEON_sgetrs_Tile_Async
* @sa CHAMELEON_zgetrf_Tile_Async
*
*/
int CHAMELEON_zgetrs_Tile_Async( cham_trans_t trans,
CHAM_desc_t *A,
CHAM_ipiv_t *IPIV,
CHAM_desc_t *B,
void *user_ws,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
struct chameleon_pzgetrf_s *ws;
RUNTIME_option_t options;
int k, tempkm;
chamctxt = chameleon_context_self();
if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) {
chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported");
return CHAMELEON_ERR_ILLEGAL_VALUE;
}
if ( chamctxt == NULL ) {
chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if ( sequence == NULL ) {
chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL sequence");
return CHAMELEON_ERR_UNALLOCATED;
}
if ( request == NULL ) {
chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL request");
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if ( sequence->status == CHAMELEON_SUCCESS ) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED );
}
/* Check descriptors for correctness */
if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) {
chameleon_error("CHAMELEON_zgetrs_Tile", "invalid first descriptor");
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) {
chameleon_error("CHAMELEON_zgetrs_Tile", "invalid third descriptor");
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
/* Check input arguments */
if ( ( A->nb != A->mb ) || ( B->nb != B->mb ) ) {
chameleon_error("CHAMELEON_zgetrs_Tile", "only square tiles supported");
return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE );
}
if ( user_ws == NULL ) {
ws = CHAMELEON_zgetrf_WS_Alloc( B );
}
else {
ws = user_ws;
}
if ( IPIV->data != NULL ) {
RUNTIME_options_init( &options, chamctxt, sequence, request );
for ( k = 0; k < A->mt; k++ ) {
tempkm = A->get_blkdim( A, k, DIM_m, A->m );
INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, 0, A->m,
IPIV, k );
}
chameleon_sequence_wait( chamctxt, sequence );
}
if ( trans == ChamNoTrans ) {
chameleon_pzlaswp( ws->laswp, ChamDirForward, B, IPIV, sequence, request );
chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
}
else {
chameleon_pztrsm( ChamLeft, ChamUpper, ChamTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
chameleon_pztrsm( ChamLeft, ChamLower, ChamTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request );
chameleon_pzlaswp( ws->laswp, ChamDirBackward, B, IPIV, sequence, request );
}
if ( user_ws == NULL ) {
CHAMELEON_zgetrf_WS_Free( ws );
}
return CHAMELEON_SUCCESS;
}
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrs_incpiv wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -19,7 +19,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -4,14 +4,14 @@
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zgetrs_nopiv wrappers
*
* @version 1.2.0
* @version 1.3.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for CHAMELEON 0.9.2
* @author Jakub Kurzak
......@@ -19,7 +19,7 @@
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @author Florent Pruvost
* @date 2022-02-22
* @date 2024-02-18
* @precisions normal z -> s d c
*
*/
......
......@@ -2,7 +2,7 @@
*
* @file zgram.c
*
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
......@@ -14,7 +14,8 @@
* @author Florent Pruvost
* @author Philippe Swartvagher
* @author Lionel Eyraud-Dubois
* @date 2023-07-05
* @author Pierre Esterie
* @date 2024-11-13
* @precisions normal z -> s d c z
*
*/
......@@ -58,19 +59,23 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A )
options = calloc( 1, sizeof(struct chameleon_pzgram_s) );
workmt = chameleon_max( A->mt, A->p );
worknt = chameleon_max( A->nt, A->q );
workmt = chameleon_max( A->mt, chameleon_desc_datadist_get_iparam(A, 0) );
worknt = chameleon_max( A->nt, chameleon_desc_datadist_get_iparam(A, 1) );
chameleon_desc_init( &(options->Wcol), CHAMELEON_MAT_ALLOC_TILE,
ChamRealDouble, 2, A->nb, 2*A->nb,
2*workmt, A->n, 0, 0,
2*workmt, A->n, A->p, A->q,
2*workmt, A->n,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
chameleon_desc_init( &(options->Welt), CHAMELEON_MAT_ALLOC_TILE,
ChamRealDouble, 2, 1, 2,
2, worknt, 0, 0,
2, worknt, A->p, A->q,
2, worknt,
chameleon_desc_datadist_get_iparam(A, 0),
chameleon_desc_datadist_get_iparam(A, 1),
NULL, NULL, NULL, NULL );
return (void*)options;
......