diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index e4d809341c0fc7d2bc7fc18851f1f52653ec6529..f1943d81d82306a6dff98e6d1c99d655a43485ca 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -16,6 +16,8 @@ _extra_blas = [ ('', 'slatro', 'dlatro', 'clatro', 'zlatro' ), #=> Replace by getmo/gecmo as in essl ('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), #=> Replace by map function ('', 'sgram', 'dgram', 'cgram', 'zgram' ), + ('', 'slaran', 'dlaran', 'claran', 'zlaran' ), + ('', 'slaran', 'dlaran', 'slaran', 'dlaran' ), ] _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] diff --git a/coreblas/compute/core_zplghe.c b/coreblas/compute/core_zplghe.c index aa864d13a587973bf248584797300622efd86fb4..24925ae1b0e5e0a2fec2fe132ae8621b759bf38c 100644 --- a/coreblas/compute/core_zplghe.c +++ b/coreblas/compute/core_zplghe.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,7 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, i++) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - // CORE_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. - void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -82,23 +50,14 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, /* Lower part */ for (j = 0; j < minmn; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - ran = Rnd64_A * ran + Rnd64_C; -#endif - *tmp = creal( *tmp + bump ); + *tmp = CORE_zlaran( &ran ); + *tmp = creal( *tmp + bump ); /* Take only the real part */ tmp++; for (i = j+1; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i + j + 1); @@ -109,15 +68,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (i = 0; i < minmn; i++) { - ran = Rnd64_jump( NBELEM * (jump+i+1), seed ); + ran = CORE_rnd64_jump( NBELEM * (jump+i+1), seed ); for (j = i+1; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] -= I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = conj( CORE_zlaran( &ran ) ); } jump += bigM; } @@ -127,15 +81,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, */ else if ( m0 > n0 ) { for (j = 0; j < n; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (i = 0; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i); @@ -150,15 +99,10 @@ void CORE_zplghe( double bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; for (i = 0; i < m; i++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (j = 0; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] -= I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = conj( CORE_zlaran( &ran ) ); } jump += bigM; } diff --git a/coreblas/compute/core_zplgsy.c b/coreblas/compute/core_zplgsy.c index 995dd20c75633f6fe2b57eb0215513efbd1f31bc..37f8dd0126355dbd989d1d486e485df4af1087d1 100644 --- a/coreblas/compute/core_zplgsy.c +++ b/coreblas/compute/core_zplgsy.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,7 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, i++) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - // CORE_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. - void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -82,24 +50,14 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ /* Lower part */ for (j = 0; j < minmn; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); *tmp = *tmp + bump; tmp++; for (i = j+1; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i + j + 1); @@ -110,15 +68,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (i = 0; i < minmn; i++) { - ran = Rnd64_jump( NBELEM * (jump+i+1), seed ); + ran = CORE_rnd64_jump( NBELEM * (jump+i+1), seed ); for (j = i+1; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = CORE_zlaran( &ran ); } jump += bigM; } @@ -128,15 +81,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ */ else if ( m0 > n0 ) { for (j = 0; j < n; j++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (i = 0; i < m; i++) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += (lda - i); @@ -151,15 +99,10 @@ void CORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAMELEON_Complex64_ jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; for (i = 0; i < m; i++) { - ran = Rnd64_jump( NBELEM * jump, seed ); + ran = CORE_rnd64_jump( NBELEM * jump, seed ); for (j = 0; j < n; j++) { - A[j*lda+i] = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - A[j*lda+i] += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + A[j*lda+i] = CORE_zlaran( &ran ); } jump += bigM; } diff --git a/coreblas/compute/core_zplrnt.c b/coreblas/compute/core_zplrnt.c index b3ef3fd336719fbed9fb6e35daa6a74ed95fba1f..00f06bd03e4698de473c03ba50a8871ce0c943e4 100644 --- a/coreblas/compute/core_zplrnt.c +++ b/coreblas/compute/core_zplrnt.c @@ -24,19 +24,7 @@ * */ #include "coreblas.h" - -/* - Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix - generating threads only read Rnd64seed. It is safe to set Rnd64seed before - and after any calls to create_tile(). The only problem can be caused if - Rnd64seed is changed during the matrix generation time. - */ - -//static unsigned long long int Rnd64seed = 100; -#define Rnd64_A 6364136223846793005ULL -#define Rnd64_C 1ULL -#define RndF_Mul 5.4210108624275222e-20f -#define RndD_Mul 5.4210108624275222e-20 +#include "coreblas/random.h" #if defined(PRECISION_z) || defined(PRECISION_c) #define NBELEM 2 @@ -44,27 +32,6 @@ #define NBELEM 1 #endif -static unsigned long long int -Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { - unsigned long long int a_k, c_k, ran; - int i; - - a_k = Rnd64_A; - c_k = Rnd64_C; - - ran = seed; - for (i = 0; n; n >>= 1, ++i) { - if (n & 1) - ran = a_k * ran + c_k; - c_k *= (a_k + 1); - a_k *= a_k; - } - - return ran; -} - -// CORE_zplrnt - Generate a tile for random matrix. - void CORE_zplrnt( int m, int n, CHAMELEON_Complex64_t *A, int lda, int bigM, int m0, int n0, unsigned long long int seed ) { @@ -75,20 +42,12 @@ void CORE_zplrnt( int m, int n, CHAMELEON_Complex64_t *A, int lda, jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; for (j=0; j<n; ++j ) { - ran = Rnd64_jump( NBELEM*jump, seed ); + ran = CORE_rnd64_jump( NBELEM*jump, seed ); for (i = 0; i < m; ++i) { - *tmp = 0.5f - ran * RndF_Mul; - ran = Rnd64_A * ran + Rnd64_C; -#if defined(PRECISION_z) || defined(PRECISION_c) - *tmp += I*(0.5f - ran * RndF_Mul); - ran = Rnd64_A * ran + Rnd64_C; -#endif + *tmp = CORE_zlaran( &ran ); tmp++; } tmp += lda-i; jump += bigM; } } - - - diff --git a/coreblas/include/CMakeLists.txt b/coreblas/include/CMakeLists.txt index 414b4fe82214424d329b8ae6c90ffde147ea07cf..3d9e2252333ede0973b08beea008ff802a574081 100644 --- a/coreblas/include/CMakeLists.txt +++ b/coreblas/include/CMakeLists.txt @@ -46,6 +46,7 @@ set(COREBLAS_HDRS coreblas/lapacke.h coreblas/lapacke_config.h coreblas/lapacke_mangling.h + coreblas/random.h ) # Add generated headers diff --git a/coreblas/include/coreblas/random.h b/coreblas/include/coreblas/random.h new file mode 100644 index 0000000000000000000000000000000000000000..b74d89d07b6e8e4469101e9154d66ecab6515540 --- /dev/null +++ b/coreblas/include/coreblas/random.h @@ -0,0 +1,98 @@ +/** + * + * @file random.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon coreblas random number generator + * + * @version 1.0.0 + * @author Piotr Luszczek + * @author Mathieu Faverge + * @date 2020-10-06 + * @precisions normal z -> c d s + * + */ +/* + Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix + generating threads only read Rnd64seed. It is safe to set Rnd64seed before + and after any calls to create_tile(). The only problem can be caused if + Rnd64seed is changed during the matrix generation time. + */ + +//static unsigned long long int Rnd64seed = 100; +#define Rnd64_A 6364136223846793005ULL +#define Rnd64_C 1ULL +#define RndF_Mul 5.4210108624275222e-20f +#define RndD_Mul 5.4210108624275222e-20 + +static inline unsigned long long int +CORE_rnd64_jump(unsigned long long int n, unsigned long long int seed ) { + unsigned long long int a_k, c_k, ran; + int i; + + a_k = Rnd64_A; + c_k = Rnd64_C; + + ran = seed; + for (i = 0; n; n >>= 1, ++i) { + if (n & 1) { + ran = a_k * ran + c_k; + } + c_k *= (a_k + 1); + a_k *= a_k; + } + + return ran; +} + +static inline double +CORE_slaran( unsigned long long int *ran ) +{ + float value = 0.5 - (*ran) * RndF_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline double +CORE_dlaran( unsigned long long int *ran ) +{ + double value = 0.5 - (*ran) * RndD_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline CHAMELEON_Complex32_t +CORE_claran( unsigned long long int *ran ) +{ + CHAMELEON_Complex32_t value; + + value = 0.5 - (*ran) * RndF_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + value += I * (0.5 - (*ran) * RndF_Mul); + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +} + +static inline CHAMELEON_Complex64_t +CORE_zlaran( unsigned long long int *ran ) +{ + CHAMELEON_Complex64_t value; + + value = 0.5 - (*ran) * RndD_Mul; + *ran = Rnd64_A * (*ran) + Rnd64_C; + + value += I * (0.5 - (*ran) * RndD_Mul); + *ran = Rnd64_A * (*ran) + Rnd64_C; + + return value; +}