diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index e4d809341c0fc7d2bc7fc18851f1f52653ec6529..a4cb1fc264233227fa012b5ce8f38907bdfb0a53 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -16,6 +16,10 @@ _extra_blas = [ ('', 'slatro', 'dlatro', 'clatro', 'zlatro' ), #=> Replace by getmo/gecmo as in essl ('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), #=> Replace by map function ('', 'sgram', 'dgram', 'cgram', 'zgram' ), + ('', 'slaran', 'dlaran', 'claran', 'zlaran' ), + ('', 'slaran', 'dlaran', 'slaran', 'dlaran' ), + ('', 'slatm1', 'dlatm1', 'clatm1', 'zlatm1' ), + ('', 'slatm1', 'dlatm1', 'slatm1', 'dlatm1' ), ] _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9066b5e32a75fe48d53ca0fdd70e177c133b9210..cdf6d90212e850c6ccfb3444d8e2ae1dbf9de42b 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -118,6 +118,7 @@ set(ZSRC pzlansy.c pzlaset2.c pzlaset.c + pzlatms.c pzlauum.c pzplghe.c pzplgsy.c @@ -166,6 +167,7 @@ set(ZSRC zlansy.c zlantr.c zlaset.c + zlatms.c zlauum.c zplghe.c zplgsy.c diff --git a/compute/pzlatms.c b/compute/pzlatms.c new file mode 100644 index 0000000000000000000000000000000000000000..134a39ff24ce91e23b901ce2266f7c7ad7650d40 --- /dev/null +++ b/compute/pzlatms.c @@ -0,0 +1,330 @@ +/** + * + * @file pzlatms.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2016-2020 KAUST. All rights reserved. + * + *** + * + * @brief Chameleon zlatms parallel algorithm + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" +#include <stdlib.h> +#if !defined(CHAMELEON_SIMULATION) +#include <coreblas/random.h> +#include <coreblas.h> +#endif + +#define A(m, n) A, m, n + +/* + * Static variable to know how to handle the data within the kernel + * This assumes that only one runtime is enabled at a time. + */ +static RUNTIME_id_t zlatms_runtime_id = RUNTIME_SCHED_STARPU; + +static inline int +zlaset_diag( const CHAM_desc_t *descA, + cham_uplo_t uplo, int m, int n, + CHAM_tile_t *tileA, void *op_args ) +{ + CHAMELEON_Complex64_t *A; + const double *D = (const double *)op_args; + + int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb; + int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb; + int minmn = chameleon_min( tempmm, tempnn ); + int lda, i; + + if ( zlatms_runtime_id == RUNTIME_SCHED_PARSEC ) { + A = (CHAMELEON_Complex64_t*)tileA; + lda = descA->get_blkldd( descA, m ); + } + else { + A = tileA->mat; + lda = tileA->ld; + } + + assert( m == n ); + + /* Shift to the values corresponding to the tile */ + D += m * descA->mb; + + for( i=0; i<minmn; i++, D++ ) { + *A = *D; + A += lda + 1; + } + + return 0; +} + +/** + * Parallel scale of a matrix A + */ +void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, + double *D, int mode, double cond, double dmax, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t descU, descV; + CHAM_desc_t *descDptr = NULL; + CHAM_desc_t descTS, descTT, descD; + libhqr_matrix_t mat; + libhqr_tree_t qrtree; + cham_trans_t trans = ChamConjTrans; + + int n, ib, alloc_d = 0; + int kt = chameleon_min( A->mt, A->nt ); + int minmn = chameleon_min( A->m, A->n ); + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + ib = CHAMELEON_IB; + zlatms_runtime_id = chamctxt->scheduler; + + RUNTIME_options_init(&options, chamctxt, sequence, request); + + /* Start initialiazing A */ + chameleon_pzlaset( ChamUpperLower, 0., 0., A, sequence, request ); + + /* Check if we need to perform UDV or UDU' */ + if ( sym == ChamNonsymPosv ) { + trans = ChamNoTrans; + } + else if ( sym == ChamSymPosv ) { + trans = ChamTrans; + } + +#if !defined(CHAMELEON_SIMULATION) + /* Compute the diagonal D */ + { + int irsign, rc; + + /* Check what to do with the sign in dlatm1 */ +#if defined(PRECISION_z) || defined(PRECISION_c) + irsign = ( sym == ChamHermGeev ); +#else + irsign = ( sym == ChamHermGeev ) || ( sym == ChamSymPosv ); +#endif + + if ( D == NULL ) { + D = malloc( minmn * sizeof(double) ); + alloc_d = 1; + } + rc = CORE_dlatm1( mode, cond, irsign, idist, seed, D, minmn ); + if ( rc != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zlatms", "Could not generated D. dlatm1 failed" ); + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Sequence_Wait( sequence ); + + if ( alloc_d ) { + free( D ); + } + return; + } + + /* Scale by dmax */ + if ( ( mode != 0 ) && ( abs(mode) != 6 ) ) + { + int imax = cblas_idamax( minmn, D, 1 ); + double temp = D[imax]; + + if ( temp > 0. ) { + cblas_dscal( minmn, dmax / temp, D, 1 ); + } + else { + chameleon_error( "CHAMELEON_zlatms", "Could not scale D (max sing. value is 0.)" ); + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Sequence_Wait( sequence ); + + if ( alloc_d ) { + free( D ); + } + } + } + } +#endif + + /* Copy D to the diagonal of A */ + for (n = 0; n < kt; n++) { + INSERT_TASK_map( + &options, + ChamUpperLower, A(n, n), + zlaset_diag, D ); + } + + /** + * Generate A = U D V + * + * U and V are random unitary matrices + * V = U^t or U^h if A is respectively symmetric or hermitian + * + */ + + /* Generate U and apply it */ + { + /* U is of size A->m by min(A->m, A->n) */ + chameleon_zdesc_copy_and_restrict( A, &descU, A->m, minmn ); + + /* Shift the seed to generate the random unitary matrix */ +#if !defined(CHAMELEON_SIMULATION) + seed = CORE_rnd64_jump( 2 * minmn, seed ); +#endif + chameleon_pzplrnt( &descU, seed, sequence, request ); +#if !defined(CHAMELEON_SIMULATION) + seed = CORE_rnd64_jump( 2 * minmn * A->n, seed ); +#endif + + /* Apply a QR factorization */ + mat.mt = descU.mt; + mat.nt = descU.nt; + mat.nodes = descU.p * descU.q; + mat.p = descU.p; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &mat, + -1, /*low level tree */ + -1, /* high level tree */ + -1, /* TS tree size */ + mat.p, /* High level size */ + -1, /* Domino */ + 0 /* TSRR (unstable) */ ); + +#if defined(CHAMELEON_COPY_DIAG) + chameleon_zdesc_copy_and_restrict( A, &descD, A->m, minmn ); + descDptr = &descD; +#endif + + chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, ib, descU.nb, ib * descU.nb, + ib * descU.lmt, descU.nb * descU.lnt, 0, 0, + ib * descU.lmt, descU.nb * descU.lnt, descU.p, descU.q, + NULL, NULL, NULL ); + chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, ib, descU.nb, ib * descU.nb, + ib * descU.lmt, descU.nb * descU.lnt, 0, 0, + ib * descU.lmt, descU.nb * descU.lnt, descU.p, descU.q, + NULL, NULL, NULL ); + + /* U <= qr(U) */ + chameleon_pzgeqrf_param( 1, kt, &qrtree, &descU, + &descTS, &descTT, descDptr, sequence, request ); + + /* A <= U * D */ + chameleon_pzunmqr_param( 0, &qrtree, ChamLeft, ChamNoTrans, + &descU, A, &descTS, &descTT, descDptr, sequence, request ); + + if ( trans != ChamNoTrans ) { + /* A <= (U * D) * U' */ + chameleon_pzunmqr_param( 0, &qrtree, ChamRight, trans, + &descU, A, &descTS, &descTT, descDptr, sequence, request ); + } + + CHAMELEON_Desc_Flush( &descU, sequence ); + CHAMELEON_Desc_Flush( &descTS, sequence ); + CHAMELEON_Desc_Flush( &descTT, sequence ); + if ( descDptr != NULL ) { + CHAMELEON_Desc_Flush( descDptr, sequence ); + } + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Sequence_Wait( sequence ); + + chameleon_desc_destroy( &descU ); + chameleon_desc_destroy( &descTS ); + chameleon_desc_destroy( &descTT ); + if ( descDptr != NULL ) { + chameleon_desc_destroy( descDptr ); + } + + libhqr_finalize( &qrtree ); + } + + /* Generate V and apply it */ + if ( trans == ChamNoTrans ) + { + /* V is of size min(A->m, A->n) by A->n */ + chameleon_zdesc_copy_and_restrict( A, &descV, minmn, A->n ); + + /* Shift the seed to generate the random unitary matrix */ +#if !defined(CHAMELEON_SIMULATION) + seed = CORE_rnd64_jump( 2 * minmn, seed ); +#endif + chameleon_pzplrnt( &descV, seed, sequence, request ); +#if !defined(CHAMELEON_SIMULATION) + seed = CORE_rnd64_jump( 2 * minmn * A->n, seed ); +#endif + + /* Apply a QR factorization */ + mat.mt = descV.mt; + mat.nt = descV.nt; + mat.nodes = descV.p * descV.q; + mat.p = descV.q; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &mat, + -1, /*low level tree */ + -1, /* high level tree */ + -1, /* TS tree size */ + mat.p, /* High level size */ + -1, /* Domino */ + 0 /* TSRR (unstable) */ ); + +#if defined(CHAMELEON_COPY_DIAG) + chameleon_zdesc_copy_and_restrict( A, &descD, minmn, A->n ); + descDptr = &descD; +#endif + + chameleon_desc_init( &descTS, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, ib, descV.nb, ib * descV.nb, + ib * descV.lmt, descV.nb * descV.lnt, 0, 0, + ib * descV.lmt, descV.nb * descV.lnt, descV.p, descV.q, + NULL, NULL, NULL ); + chameleon_desc_init( &descTT, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, ib, descV.nb, ib * descV.nb, + ib * descV.lmt, descV.nb * descV.lnt, 0, 0, + ib * descV.lmt, descV.nb * descV.lnt, descV.p, descV.q, + NULL, NULL, NULL ); + + /* V <= qr(V) */ + chameleon_pzgelqf_param( 1, &qrtree, &descV, + &descTS, &descTT, descDptr, sequence, request ); + + /* A <= (U * D) * V */ + chameleon_pzunmlq_param( 0, &qrtree, ChamRight, ChamNoTrans, + &descV, A, &descTS, &descTT, descDptr, sequence, request ); + + CHAMELEON_Desc_Flush( &descV, sequence ); + CHAMELEON_Desc_Flush( &descTS, sequence ); + CHAMELEON_Desc_Flush( &descTT, sequence ); + if ( descDptr != NULL ) { + CHAMELEON_Desc_Flush( descDptr, sequence ); + } + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Sequence_Wait( sequence ); + + chameleon_desc_destroy( &descV ); + chameleon_desc_destroy( &descTS ); + chameleon_desc_destroy( &descTT ); + if ( descDptr != NULL ) { + chameleon_desc_destroy( descDptr ); + } + + libhqr_finalize( &qrtree ); + } + + RUNTIME_options_finalize(&options, chamctxt); + + if ( alloc_d ) { + free( D ); + } + (void)descD; +} diff --git a/compute/zlatms.c b/compute/zlatms.c new file mode 100644 index 0000000000000000000000000000000000000000..adffaa263b0aef1af43984f6bb6ff427e2b575e5 --- /dev/null +++ b/compute/zlatms.c @@ -0,0 +1,324 @@ +/** + * + * @file zlatms.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2016-2020 KAUST. All rights reserved. + * + *** + * + * @brief Chameleon zlatms wrappers + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Generates random matrices with specified singular values + * (or hermitian with specified eigenvalues) + * for testing programs (See LAPACK). + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. When M = 0, + * the returned value is set to zero. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. When N = 0, + * the returned value is set to zero. + * + * @param[in] idist + * On entry, idist specifies the type of distribution to be used + * to generate the random eigen-/singular values. + * ChamDistUniform => UNIFORM( 0, 1 ) ( 'U' for uniform ) + * ChamDistSymmetric => UNIFORM( -1, 1 ) ( 'S' for symmetric ) + * ChamDistNormal => NORMAL( 0, 1 ) ( 'N' for normal ) + * + * @param[in] seed + * The seed used in the random generation. + * + * @param[in] sym + * - ChamHermGeev, the generated matrix is hermitian, with + * eigenvalues specified by D, cond, mode, and dmax; they + * may be positive, negative, or zero. + * - ChamHermPoev, the generated matrix is hermitian, with + * eigenvalues (= singular values) specified by D, cond, + * mode, and dmax; they will not be negative. + * - ChamNonsymPosv, the generated matrix is nonsymmetric, with + * singular values specified by D, cond, mode, and dmax; + * they will not be negative. + * - ChamSymPosv, the generated matrix is (complex) symmetric, + * with singular values specified by D, cond, mode, and + * dmax; they will not be negative. + * + * @param[in,out] D + * Array of dimension ( MIN( M, N ) ) + * This array is used to specify the singular values or + * eigenvalues of A (see SYM, above.) If mode=0, then D is + * assumed to contain the singular/eigenvalues, otherwise + * they will be computed according to mode, cond, and dmax, + * and placed in D. + * Modified if mode is nonzero. + * + * @param[in] mode + * On entry this describes how the singular/eigenvalues are to + * be specified: + * mode = 0 means use D as input + * mode = 1 sets D(1)=1 and D(2:N)=1.0/cond + * mode = 2 sets D(1:N-1)=1 and D(N)=1.0/cond + * mode = 3 sets D(I)=cond**(-(I-1)/(N-1)) + * mode = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/cond) + * mode = 5 sets D to random numbers in the range + * ( 1/cond , 1 ) such that their logarithms + * are uniformly distributed. + * mode = 6 set D to random numbers from same distribution + * as the rest of the matrix. + * mode < 0 has the same meaning as ABS(mode), except that + * the order of the elements of D is reversed. + * Thus if mode is positive, D has entries ranging from + * 1 to 1/cond, if negative, from 1/cond to 1, + * If sym == ChamHermPoev, and mode is neither 0, 6, nor -6, then + * the elements of D will also be multiplied by a random + * sign (i.e., +1 or -1.) + * + * @param[in] cond + * On entry, this is used as described under mode above. + * If used, it must be >= 1. + * + * @param[in] dmax + * If mode is neither -6, 0 nor 6, the contents of D, as + * computed according to mode and cond, will be scaled by + * dmax / max(abs(D(i))); thus, the maximum absolute eigen- or + * singular value (which is to say the norm) will be abs(dmax). + * Note that dmax need not be positive: if dmax is negative + * (or zero), D will be scaled by a negative number (or zero). + * Not modified. + * + * @param[in] A + * The M-by-N matrix A. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @retval the two-norm estimate. + * + ******************************************************************************* + * + * @sa CHAMELEON_zlatms_Tile + * @sa CHAMELEON_zlatms_Tile_Async + * @sa CHAMELEON_clange + * @sa CHAMELEON_dlange + * @sa CHAMELEON_slange + * + */ +int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, + unsigned long long int seed, cham_sym_t sym, + double *D, int mode, double cond, double dmax, + CHAMELEON_Complex64_t *A, int LDA ) +{ + int NB; + int status; + double value = -1.; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + chameleon_error("CHAMELEON_zlatms", "illegal value of M"); + return -1; + } + if (N < 0) { + chameleon_error("CHAMELEON_zlatms", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, M)) { + chameleon_error("CHAMELEON_zlatms", "illegal value of LDA"); + return -4; + } + + /* Quick return */ + if (chameleon_min(N, M) == 0) { + return (double)0.0; + } + + /* Tune NB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0); + if (status != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zlatms", "chameleon_tune() failed"); + return status; + } + + /* Set NT */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower, + A, NB, NB, LDA, N, M, N, sequence, &request ); + + /* Call the tile interface */ + CHAMELEON_zlatms_Tile_Async( idist, seed, sym, D, mode, cond, dmax, + &descAt, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInput, ChamUpperLower, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + chameleon_sequence_destroy( chamctxt, sequence ); + return value; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Tile equivalent of CHAMELEON_zlatms(). + * + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * On entry, the input matrix A. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_zlatms + * @sa CHAMELEON_zlatms_Tile_Async + * @sa CHAMELEON_clange_Tile + * @sa CHAMELEON_dlange_Tile + * @sa CHAMELEON_slange_Tile + * + */ +int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, + double *D, int mode, double cond, double dmax, CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_zlatms_Tile_Async( idist, seed, sym, D, mode, cond, dmax, A, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Non-blocking equivalent of CHAMELEON_zlatms_Tile(). + * + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zlatms + * @sa CHAMELEON_zlatms_Tile + * @sa CHAMELEON_clange_Tile_Async + * @sa CHAMELEON_dlange_Tile_Async + * @sa CHAMELEON_slange_Tile_Async + * + */ +int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, + double *D, int mode, double cond, double dmax, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms_Tile", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if (request == NULL) { + chameleon_fatal_error("CHAMELEON_zlatms_Tile", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == CHAMELEON_SUCCESS) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zlatms_Tile_Async", "invalid descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + chameleon_error("CHAMELEON_zlatms_Tile_Async", "only square tiles supported"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if (chameleon_min(A->m, A->n) == 0) { + return CHAMELEON_SUCCESS; + } + + chameleon_pzlatms( idist, seed, sym, D, mode, cond, dmax, A, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index 4a3c385b48dbed52f7ea3004bb8bb39376ec5748..ae8596e16e6462080cac77ecd46e7314e8f1296f 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -63,6 +63,7 @@ void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaswp(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaswpc(CHAM_desc_t *B, int *IPIV, int inc, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +void chameleon_pzlatms( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplgsy(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt index 120c4ff72cf1ee545489a4864079e1735fd55f69..b774d7dad0cef277d2d6029963a7974c61089dda 100644 --- a/coreblas/compute/CMakeLists.txt +++ b/coreblas/compute/CMakeLists.txt @@ -34,6 +34,7 @@ set(ZSRC core_zaxpy.c core_zgeadd.c core_zlascal.c + core_zlatm1.c core_zgelqt.c core_zgemm.c core_zgeqrt.c diff --git a/coreblas/compute/core_zlatm1.c b/coreblas/compute/core_zlatm1.c new file mode 100644 index 0000000000000000000000000000000000000000..8ca67eade1b15514b2e255593decea68b4311dba --- /dev/null +++ b/coreblas/compute/core_zlatm1.c @@ -0,0 +1,259 @@ +/** + * + * @file core_zlatm1.c + * + * @copyright 2009-2016 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_dlatm1 CPU kernel + * + * @version 1.0.0 + * @author Hatem Ltaief + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +#include "coreblas/lapacke.h" +#include "coreblas.h" +#include "coreblas/random.h" + +/** + * + * @ingroup CORE_CHAMELEON_Double_t + * + * @brief Computes the entries of D(1..N) as specified by + * MODE, COND and IRSIGN, and as specifief in LAPACK dlatm1.f + * + * @warning This prototype may evolve + * + ******************************************************************************* + * + * @param[in] MODE + * On entry describes how D is to be computed: + * MODE = 0 means do not change D. + * MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND + * MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND + * MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) + * MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) + * MODE = 5 sets D to random numbers in the range + * ( 1/COND , 1 ) such that their logarithms + * are uniformly distributed. + * MODE = 6 set D to random numbers from same distribution + * as the rest of the matrix. + * MODE < 0 has the same meaning as ABS(MODE), except that + * the order of the elements of D is reversed. + * Thus if MODE is positive, D has entries ranging from + * 1 to 1/COND, if negative, from 1/COND to 1, + * Not modified. + * + * @param[in] COND + * COND is DOUBLE PRECISION + * On entry, used as described under MODE above. + * If used, it must be >= 1. Not modified. + * + * @param[in] IRSIGN + * IRSIGN is INTEGER + * On entry, if MODE neither -6, 0 nor 6, determines sign of + * entries of D + * 0 => leave entries of D unchanged + * 1 => multiply each entry of D by 1 or -1 with probability .5 + * + * @param[in] DIST + * DIST is INTEGER + * On entry, DIST specifies the type of distribution to be + * used to generate a random matrix. Only for MODE 6 and -6. + * ChamDistUniform => UNIFORM( 0, 1 ) + * ChamDistSymmetric => UNIFORM( -1, 1 ) + * ChamDistNormal => NORMAL( 0, 1 ) + * ChamDistNormal is not supported for now. + * + * @param[in] ISEED + * Specifies the seed of the random number + * generator (see coreblas/random.h). + * + * @param[in,out] D + * D is DOUBLE PRECISION array, dimension ( N ) + * Array to be computed according to MODE, COND and IRSIGN. + * May be changed on exit if MODE is nonzero. + * + * @param[in] N + * N is INTEGER + * Number of entries of D. Not modified. + * + ******************************************************************************* + * + * @retval 0 => normal termination + * @retval -1 => if MODE not in range -6 to 6 + * @retval -2 => if MODE neither -6, 0 nor 6, and IRSIGN neither 0 nor 1 + * @retval -3 => if MODE neither -6, 0 nor 6 and COND less than 1 + * @retval -4 => if MODE equals 6 or -6 and DIST not in range 1 to 3 + * @retval -7 => if N negative + * + */ +int CORE_zlatm1( int MODE, double COND, int IRSIGN, cham_dist_t DIST, + unsigned long long int seed, + CHAMELEON_Complex64_t *D, int N ) +{ + unsigned long long int ran; + int i; + double alpha, temp; + + /* + * Quick return if possible + */ + if ( N == 0 ) { + return CHAMELEON_SUCCESS; + } + + /* + * Check parameters + */ + if ( (MODE < -6) || (MODE > 6) ) { + coreblas_error(1, "illegal value of MODE"); + return -1; + } + if ( ( (MODE != -6) && (MODE != 0) && (MODE != 6) ) && + ( (IRSIGN != 0) && (IRSIGN != 1) ) ) + { + coreblas_error(2, "illegal value of IRSIGN"); + return -2; + } + if ( ( (MODE != -6) && (MODE != 0) && (MODE != 6) ) && + ( COND < 1. ) ) + { + coreblas_error(3, "illegal value of COND"); + return -3; + } + if ( ( (MODE != -6) || (MODE != 6) ) && + ( (DIST < ChamDistUniform) || (DIST > ChamDistSymmetric) ) ) + { + coreblas_error(4, "illegal value of DIST"); + return -4; + } + if ( N < 0 ) { + coreblas_error(7, "illegal value of N"); + return -7; + } + + /* Initialize the random value */ + ran = CORE_rnd64_jump( 1, seed ); + + switch( abs(MODE) ) { + /* + * One large D value + */ + case 1: + D[0] = 1.; + alpha = 1. / COND; + for( i=1; i<N; i++ ) { + D[i] = alpha; + } + break; + + /* + * One small D value + */ + case 2: + for( i=0; i<N-1; i++ ) { + D[i] = 1.; + } + D[N-1] = 1. / COND; + break; + + /* + * Exponentially distributed D values: + */ + case 3: + D[0] = 1.; + if ( N > 1 ) { + alpha = pow( COND, -1. / ((double)(N-1)) ); + + for( i=1; i<N; i++ ) { + D[i] = pow( alpha, i ); + } + } + break; + + /* + * Arithmetically distributed D values + */ + case 4: + D[0] = 1.; + if ( N > 1 ) { + temp = 1. / COND; + alpha = ( 1. - temp ) / ((double)(N-1)); + + for( i=1; i<N; i++ ) { + D[i] = ((double)(N-i)) * alpha + temp; + } + } + break; + + /* + * Randomly distributed D values on ( 1/COND , 1) + */ + case 5: + alpha = log( 1. / COND ); + for( i=0; i<N; i++ ) { + temp = CORE_dlaran( &ran ); + D[i] = exp( alpha * temp ); + } + break; + + /* + * Randomly distributed D values from DIST + */ + case 6: + CORE_zplrnt( N, 1, D, N, N, 1, 0, seed ); + + if ( DIST == ChamDistSymmetric ) { + for( i=0; i<N; i++ ) { + D[i] = 2. * D[i] - 1.; + } + } + break; + case 0: + default: + /* Nothing to be done */ + ; + } + + /* + * If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign + * random signs to D + */ + if ( ( (abs(MODE) != -6) && (MODE != 0) ) && + (IRSIGN == 1) ) + { + for( i=0; i<N; i++ ) { +#if defined(PRECISION_z) || defined(PRECISION_c) + double t1 = CORE_dlaran( &ran ); + double t2 = CORE_dlaran( &ran ); + temp = sqrt( -2 * log( t1 ) ) * exp( I * 2. * M_PI * t2 ); + D[i] = D[i] * ( temp / cabs(temp) ); +#else + if ( CORE_dlaran( &ran ) > .5 ) { + D[i] = -D[i]; + } +#endif + } + } + + /* + * Reverse if MODE < 0 + */ + if ( MODE < 0 ) { + for( i=0; i<N/2; i++ ) { + temp = D[i]; + D[i] = D[N-1-i]; + D[N-1-i] = temp; + } + } + + return CHAMELEON_SUCCESS; +} diff --git a/coreblas/compute/core_zplghe.c b/coreblas/compute/core_zplghe.c index aa864d13a587973bf248584797300622efd86fb4..24925ae1b0e5e0a2fec2fe132ae8621b759bf38c 100644 --- a/coreblas/compute/core_zplghe.c +++ b/coreblas/compute/core_zplghe.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,7 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, i++) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - // CORE_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. - void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -82,23 +50,14 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, /* Lower part */ for (j = 0; j < minmn; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - ran = Rnd64_A * ran + Rnd64_C; -#endif - *tmp = creal( *tmp + bump ); + *tmp = CORE_zlaran( &ran ); + *tmp = creal( *tmp + bump ); /* Take only the real part */ tmp++; for (i = j+1; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i + j + 1); @@ -109,15 +68,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (i = 0; i < minmn; i++) { - ran = Rnd64_jump( NBELEM * (jump+i+1), seed ); + ran = CORE_rnd64_jump( NBELEM * (jump+i+1), seed ); for (j = i+1; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] -= I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = conj( CORE_zlaran( &ran ) ); } jump += bigM; } @@ -127,15 +81,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, */ else if ( m0 > n0 ) { for (j = 0; j < n; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (i = 0; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i); @@ -150,15 +99,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; for (i = 0; i < m; i++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (j = 0; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] -= I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = conj( CORE_zlaran( &ran ) ); } jump += bigM; } diff --git a/coreblas/compute/core_zplgsy.c b/coreblas/compute/core_zplgsy.c index 995dd20c75633f6fe2b57eb0215513efbd1f31bc..37f8dd0126355dbd989d1d486e485df4af1087d1 100644 --- a/coreblas/compute/core_zplgsy.c +++ b/coreblas/compute/core_zplgsy.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,7 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, i++) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - // CORE_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. - void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -82,24 +50,14 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ /* Lower part */ for (j = 0; j < minmn; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); *tmp = *tmp + bump; tmp++; for (i = j+1; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i + j + 1); @@ -110,15 +68,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (i = 0; i < minmn; i++) { - ran = Rnd64_jump( NBELEM * (jump+i+1), seed ); + ran = CORE_rnd64_jump( NBELEM * (jump+i+1), seed ); for (j = i+1; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = CORE_zlaran( &ran ); } jump += bigM; } @@ -128,15 +81,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ */ else if ( m0 > n0 ) { for (j = 0; j < n; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (i = 0; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i); @@ -151,15 +99,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; for (i = 0; i < m; i++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (j = 0; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = CORE_zlaran( &ran ); } jump += bigM; } diff --git a/coreblas/compute/core_zplrnt.c b/coreblas/compute/core_zplrnt.c index b3ef3fd336719fbed9fb6e35daa6a74ed95fba1f..00f06bd03e4698de473c03ba50a8871ce0c943e4 100644 --- a/coreblas/compute/core_zplrnt.c +++ b/coreblas/compute/core_zplrnt.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,6 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, ++i) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - -// CORE_zplrnt - Generate a tile for random matrix. - void CORE_zplrnt( int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -75,20 +42,12 @@ void CORE_zplrnt( int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (j=0; j<n; ++j ) { - ran = Rnd64_jump( NBELEM*jump, seed ); + ran = CORE_rnd64_jump( NBELEM*jump, seed ); for (i = 0; i < m; ++i) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += lda-i; jump += bigM; } } - - - diff --git a/coreblas/include/CMakeLists.txt b/coreblas/include/CMakeLists.txt index 414b4fe82214424d329b8ae6c90ffde147ea07cf..3d9e2252333ede0973b08beea008ff802a574081 100644 --- a/coreblas/include/CMakeLists.txt +++ b/coreblas/include/CMakeLists.txt @@ -46,6 +46,7 @@ set(COREBLAS_HDRS coreblas/lapacke.h coreblas/lapacke_config.h coreblas/lapacke_mangling.h + coreblas/random.h ) # Add generated headers diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h index 4db2c94f130367fab69d7429eee4ef384da8a1c9..111e0b544259f045ddd7785379d7f8127ae483f8 100644 --- a/coreblas/include/coreblas/coreblas_z.h +++ b/coreblas/include/coreblas/coreblas_z.h @@ -168,6 +168,9 @@ int CORE_zlatro(cham_uplo_t uplo, cham_trans_t trans, int M, int N, const CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); +int CORE_zlatm1( int MODE, double COND, int IRSIGN, cham_dist_t DIST, + unsigned long long int seed, + CHAMELEON_Complex64_t *D, int N ); void CORE_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CORE_zpamm(int op, cham_side_t side, cham_store_t storev, int M, int N, int K, int L, diff --git a/coreblas/include/coreblas/random.h b/coreblas/include/coreblas/random.h new file mode 100644 index 0000000000000000000000000000000000000000..b74d89d07b6e8e4469101e9154d66ecab6515540 --- /dev/null +++ b/coreblas/include/coreblas/random.h @@ -0,0 +1,98 @@ +/** + * + * @file random.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon coreblas random number generator + * + * @version 1.0.0 + * @author Piotr Luszczek + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +/* + Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix + generating threads only read Rnd64seed. It is safe to set Rnd64seed before + and after any calls to create_tile(). The only problem can be caused if + Rnd64seed is changed during the matrix generation time. + */ + +//static unsigned long long int Rnd64seed = 100; +#define Rnd64_A 6364136223846793005ULL +#define Rnd64_C 1ULL +#define RndF_Mul 5.4210108624275222e-20f +#define RndD_Mul 5.4210108624275222e-20 + +static inline unsigned long long int +CORE_rnd64_jump(unsigned long long int n, unsigned long long int seed ) { + unsigned long long int a_k, c_k, ran; + int i; + + a_k = Rnd64_A; + c_k = Rnd64_C; + + ran = seed; + for (i = 0; n; n >>= 1, ++i) { + if (n & 1) { + ran = a_k * ran + c_k; + } + c_k *= (a_k + 1); + a_k *= a_k; + } + + return ran; +} + +static inline double +CORE_slaran( unsigned long long int *ran ) +{ + float value = 0.5 - (*ran) * RndF_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline double +CORE_dlaran( unsigned long long int *ran ) +{ + double value = 0.5 - (*ran) * RndD_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline CHAMELEON_Complex32_t +CORE_claran( unsigned long long int *ran ) +{ + CHAMELEON_Complex32_t value; + + value = 0.5 - (*ran) * RndF_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + value += I * (0.5 - (*ran) * RndF_Mul); + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline CHAMELEON_Complex64_t +CORE_zlaran( unsigned long long int *ran ) +{ + CHAMELEON_Complex64_t value; + + value = 0.5 - (*ran) * RndD_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + value += I * (0.5 - (*ran) * RndD_Mul); + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 1ec6c7f5cb0fb2d6db33bbe7a9574aa2ea0b9f71..1b7ca622e2309503e526520b3a9a2efb1ba8b02a 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -71,6 +71,7 @@ int CHAMELEON_zlascal(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alph int CHAMELEON_zlaset(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *A, int LDA); //int CHAMELEON_zlaswp(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); //int CHAMELEON_zlaswpc(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); +int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAMELEON_Complex64_t *A, int LDA ); int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); @@ -145,6 +146,7 @@ int CHAMELEON_zlascal_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_d int CHAMELEON_zlaset_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A); //int CHAMELEON_zlaswp_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX); //int CHAMELEON_zlaswpc_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX); +int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A ); int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); int CHAMELEON_zplgsy_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); @@ -216,6 +218,7 @@ int CHAMELEON_zlascal_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, int CHAMELEON_zlaset_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zlaswp_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zlaswpc_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zplgsy_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index a75a907066287231407832def05eeaf1290aefe6..962e7270f16e9a512b99167c2dc2416f131d4f7a 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -135,10 +135,12 @@ typedef enum chameleon_mtxtype_e { /** * @brief Eigen and singular values generator format */ -#define ChamHermGeev 241 -#define ChamHermPoev 242 -#define ChamNonsymPosv 243 -#define ChamSymPosv 244 +typedef enum chameleon_sym_e { + ChamHermGeev = 241, + ChamHermPoev = 242, + ChamNonsymPosv = 243, + ChamSymPosv = 244 +} cham_sym_t; #define ChamNoPacking 291 #define ChamPackSubdiag 292