Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 04b62ec2 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'poinv' into 'master'

Poinv

See merge request !271
parents 9594f9af 3870fdd5
No related branches found
No related tags found
1 merge request!271Poinv
......@@ -166,6 +166,7 @@ set(ZSRC
zpotrf.c
zsytrf.c
zpotri.c
zpoinv.c
zpotrimm.c
zpotrs.c
zsytrs.c
......
/**
*
* @file zpoinv.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zpoinv wrappers
*
* @version 1.0.0
* @author Alycia Lisito
* @date 2022-02-02
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* @brief Computes the inverse of a complex Hermitian positive definite
* matrix A using the Cholesky factorization A = U^H*U or A = L*L^H
* computed by CHAMELEON_zpotrf().
*
*******************************************************************************
*
* @param[in] uplo
* = ChamUpper: Upper triangle of A is stored;
* = ChamLower: Lower triangle of A is stored.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[in,out] A
* On entry, the symmetric positive definite (or Hermitian) matrix A.
* If uplo = ChamUpper, the leading N-by-N upper triangular part of A
* contains the upper triangular part of the matrix A, and the strictly lower triangular
* part of A is not referenced.
* If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
* triangular part of the matrix A, and the strictly upper triangular part of A is not
* referenced.
* On exit, the upper or lower triangle of the (Hermitian)
* inverse of A, overwriting the input factor U or L.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
* @retval >0 if i, the (i,i) element of the factor U or L is
* zero, and the inverse could not be computed.
*
*******************************************************************************
*
* @sa CHAMELEON_zpoinv_Tile
* @sa CHAMELEON_zpoinv_Tile_Async
* @sa CHAMELEON_cpoinv
* @sa CHAMELEON_dpoinv
* @sa CHAMELEON_spoinv
* @sa CHAMELEON_zpotrf
*
*/
int CHAMELEON_zpoinv( cham_uplo_t uplo, int N,
CHAMELEON_Complex64_t *A, int LDA )
{
int NB;
int status;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zpoinv", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if ((uplo != ChamUpper) && (uplo != ChamLower)) {
chameleon_error("CHAMELEON_zpoinv", "illegal value of uplo");
return -1;
}
if (N < 0) {
chameleon_error("CHAMELEON_zpoinv", "illegal value of N");
return -2;
}
if (LDA < chameleon_max(1, N)) {
chameleon_error("CHAMELEON_zpoinv", "illegal value of LDA");
return -4;
}
/* Quick return */
if (chameleon_max(N, 0) == 0)
return CHAMELEON_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = chameleon_tune(CHAMELEON_FUNC_ZPOSV, N, N, 0);
if (status != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zpoinv", "chameleon_tune() failed");
return status;
}
/* Set NT */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
A, NB, NB, LDA, N, N, N, sequence, &request );
/* Call the tile interface */
CHAMELEON_zpoinv_Tile_Async( uplo, &descAt, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInout, uplo, sequence, &request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* @brief Tile equivalent of CHAMELEON_zpoinv()
*
* Computes the inverse of a complex Hermitian
* positive definite matrix A using the Cholesky factorization
* A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf().
*
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] uplo
* = ChamUpper: Upper triangle of A is stored;
* = ChamLower: Lower triangle of A is stored.
*
* @param[in] A
* On entry, the symmetric positive definite (or Hermitian) matrix A.
* If uplo = ChamUpper, the leading N-by-N upper triangular part of A
* contains the upper triangular part of the matrix A, and the strictly lower triangular
* part of A is not referenced.
* If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
* triangular part of the matrix A, and the strictly upper triangular part of A is not
* referenced.
* On exit, the upper or lower triangle of the (Hermitian)
* inverse of A, overwriting the input factor U or L.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval >0 if i, the leading minor of order i of A is not
* positive definite, so the factorization could not be
* completed, and the solution has not been computed.
*
*******************************************************************************
*
* @sa CHAMELEON_zpoinv
* @sa CHAMELEON_zpoinv_Tile_Async
* @sa CHAMELEON_cpoinv_Tile
* @sa CHAMELEON_dpoinv_Tile
* @sa CHAMELEON_spoinv_Tile
* @sa CHAMELEON_zpotrf_Tile
*
*/
int CHAMELEON_zpoinv_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zpoinv_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zpoinv_Tile_Async( uplo, A, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* @brief Non-blocking equivalent of CHAMELEON_zpoinv_Tile().
*
* Submit the tasks to compute the inverse of a complex Hermitian
* positive definite matrix A using the Cholesky factorization A = U^H*U
* or A = L*L^H computed by CHAMELEON_zpotrf.
*
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zpoinv
* @sa CHAMELEON_zpoinv_Tile
* @sa CHAMELEON_cpoinv_Tile_Async
* @sa CHAMELEON_dpoinv_Tile_Async
* @sa CHAMELEON_spoinv_Tile_Async
* @sa CHAMELEON_zpotrf_Tile_Async
*
*/
int CHAMELEON_zpoinv_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "NULL sequence");
return CHAMELEON_ERR_UNALLOCATED;
}
if (request == NULL) {
chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "NULL request");
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == CHAMELEON_SUCCESS) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
}
/* Check descriptors for correctness */
if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zpoinv_Tile_Async", "invalid descriptor");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (A->nb != A->mb) {
chameleon_error("CHAMELEON_zpoinv_Tile_Async", "only square tiles supported");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
if ((uplo != ChamUpper) && (uplo != ChamLower)) {
chameleon_error("CHAMELEON_zpoinv_Tile_Async", "illegal value of uplo");
return chameleon_request_fail(sequence, request, -1);
}
/* Quick return */
if ( chameleon_max( A->n, 0 ) == 0 ) {
return CHAMELEON_SUCCESS;
}
chameleon_pzpotrf( uplo, A, sequence, request );
chameleon_pztrtri( uplo, ChamNonUnit, A, sequence, request );
chameleon_pzlauum( uplo, A, sequence, request );
return CHAMELEON_SUCCESS;
}
......@@ -80,6 +80,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_
int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
int CHAMELEON_zplrnt( int M, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed );
int CHAMELEON_zplrnk( int M, int N, int K, CHAMELEON_Complex64_t *C, int LDC, unsigned long long int seedA, unsigned long long int seedB );
int CHAMELEON_zpoinv(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
int CHAMELEON_zposv(cham_uplo_t uplo, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB);
int CHAMELEON_zpotrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
int CHAMELEON_zsytrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA);
......@@ -158,6 +159,7 @@ int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigne
int CHAMELEON_zplgsy_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed );
int CHAMELEON_zplrnt_Tile(CHAM_desc_t *A, unsigned long long int seed );
int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB );
int CHAMELEON_zpoinv_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
int CHAMELEON_zposv_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B);
int CHAMELEON_zpotrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
int CHAMELEON_zsytrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
......@@ -232,6 +234,7 @@ int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, u
int CHAMELEON_zplgsy_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
int CHAMELEON_zplrnt_Tile_Async(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
int CHAMELEON_zpoinv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
int CHAMELEON_zposv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
int CHAMELEON_zpotrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
int CHAMELEON_zsytrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
......
......@@ -73,6 +73,7 @@ set(ZSRC
testing_ztrtri.c
testing_zlauum.c
testing_zpotri.c
testing_zpoinv.c
testing_zsytrf.c
testing_zsytrs.c
testing_zsysv.c
......
......@@ -40,7 +40,7 @@ if (NOT CHAMELEON_SIMULATION)
set( TESTS ${TESTS}
potrf potrs posv trtri lauum )
if ( NOT CHAMELEON_SCHED_PARSEC )
set( TESTS ${TESTS} potri )
set( TESTS ${TESTS} potri poinv)
endif()
if ( ${prec} STREQUAL c OR ${prec} STREQUAL z )
set( TESTS ${TESTS}
......
# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
# Not given parameters will receive default values
# POINV
# nb: Tile size
# ib: Inner tile size
# n: Order of the matrix A
# lda: Leading dimension of matrix A
# uplo: Matrix part to be considered (0: Upper, 1: Lower)
op = poinv
nb = 16, 17
ib = 8
n = 15, 19, 37
lda = 41
uplo = 0,1
/**
*
* @file testing_zpoinv.c
*
* @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zpoinv testing
*
* @version 1.1.0
* @author Lucas Barros de Assis
* @author Florent Pruvost
* @author Mathieu Faverge
* @author Alycia Lisito
* @date 2022-02-02
* @precisions normal z -> c d s
*
*/
#include <chameleon.h>
#include <assert.h>
#include "testings.h"
#include "testing_zcheck.h"
#include <chameleon/flops.h>
static cham_fixdbl_t
flops_zpoinv( int N )
{
cham_fixdbl_t flops = flops_zpotrf( N ) + flops_zpotri( N );
return flops;
}
int
testing_zpoinv( run_arg_list_t *args, int check )
{
testdata_t test_data = { .args = args };
int hres = 0;
/* Read arguments */
int async = parameters_getvalue_int( "async" );
intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" );
int nb = run_arg_get_int( args, "nb", 320 );
int P = parameters_getvalue_int( "P" );
cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper );
int N = run_arg_get_int( args, "N", 1000 );
int LDA = run_arg_get_int( args, "LDA", N );
int seedA = run_arg_get_int( args, "seedA", random() );
int Q = parameters_compute_q( P );
/* Descriptors */
CHAM_desc_t *descA;
CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
/* Create the matrices */
CHAMELEON_Desc_Create(
&descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q );
/* Initialise the matrix with the random values */
CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA );
/* Calculates the inversed matrix */
testing_start( &test_data );
if ( async ) {
hres = CHAMELEON_zpoinv_Tile_Async( uplo, descA, test_data.sequence, &test_data.request );
CHAMELEON_Desc_Flush( descA, test_data.sequence );
}
else {
hres = CHAMELEON_zpoinv_Tile( uplo, descA );
}
test_data.hres = hres;
testing_stop( &test_data, flops_zpoinv( N ) );
/* Check the inverse */
if ( check ) {
CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL );
CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA );
hres += check_ztrtri( args, ChamHermitian, uplo, ChamNonUnit, descA0, descA );
CHAMELEON_Desc_Destroy( &descA0 );
}
CHAMELEON_Desc_Destroy( &descA );
return hres;
}
testing_t test_zpoinv;
const char *zpoinv_params[] = { "mtxfmt", "nb", "uplo", "n", "lda", "seedA", NULL };
const char *zpoinv_output[] = { NULL };
const char *zpoinv_outchk[] = { "RETURN", NULL };
/**
* @brief Testing registration function
*/
void testing_zpoinv_init( void ) __attribute__( ( constructor ) );
void
testing_zpoinv_init( void )
{
test_zpoinv.name = "zpoinv";
test_zpoinv.helper = "Hermitian positive definite matrix inversion";
test_zpoinv.params = zpoinv_params;
test_zpoinv.output = zpoinv_output;
test_zpoinv.outchk = zpoinv_outchk;
test_zpoinv.fptr = testing_zpoinv;
test_zpoinv.next = NULL;
testing_register( &test_zpoinv );
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment