diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 8fec8ccde1c44496ba92cb681c3239b2481ec870..e4d809341c0fc7d2bc7fc18851f1f52653ec6529 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -9,6 +9,7 @@ _extra_blas = [ ('', 'spocon', 'dpocon', 'cpocon', 'zpocon' ), ('', 'strasm', 'dtrasm', 'ctrasm', 'ztrasm' ), ('', 'sgecfi', 'dgecfi', 'cgecfi', 'zgecfi' ), + ('', 'splrnk', 'dplrnk', 'cplrnk', 'zplrnk' ), ('', 'splssq', 'dplssq', 'cplssq', 'zplssq' ), ('', 'sy2sb', 'sy2sb' , 'he2hb', 'he2hb' ), ('', 'she2ge', 'dhe2ge', 'che2ge', 'zhe2ge' ), diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 08b57c03a143511d510eef47e012c413dd2ecf00..9066b5e32a75fe48d53ca0fdd70e177c133b9210 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -122,6 +122,7 @@ set(ZSRC pzplghe.c pzplgsy.c pzplrnt.c + pzplrnk.c pzpotrf.c pzsytrf.c pztrtri.c @@ -169,6 +170,7 @@ set(ZSRC zplghe.c zplgsy.c zplrnt.c + zplrnk.c zposv.c zsysv.c zpotrf.c diff --git a/compute/pzplrnk.c b/compute/pzplrnk.c new file mode 100644 index 0000000000000000000000000000000000000000..a8919e2251330d6ae5f5a6f1ef540c8e6ff832e1 --- /dev/null +++ b/compute/pzplrnk.c @@ -0,0 +1,109 @@ +/** + * + * @file pzplrnk.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplrnk parallel algorithm + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2020-03-05 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +#define C(m, n) C, m, n +#define WA(m, n) &WA, m, n +#define WB(m, n) &WB, m, n + +/** + * chameleon_pzplrnk - Generate a random rank-k matrix by tiles. + */ +void chameleon_pzplrnk( int K, CHAM_desc_t *C, + unsigned long long int seedA, + unsigned long long int seedB, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + int m, n, k, KT; + int tempmm, tempnn, tempkk; + int lookahead, myp, myq; + CHAMELEON_Complex64_t zbeta; + CHAM_desc_t WA, WB; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + RUNTIME_options_init( &options, chamctxt, sequence, request ); + + chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), + C->mt * C->mb, C->nb * C->q, 0, 0, + C->mt * C->mb, C->nb * C->q, C->p, C->q, + NULL, NULL, NULL ); + chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), + C->mb * C->p, C->nt * C->nb, 0, 0, + C->mb * C->p, C->nt * C->nb, C->p, C->q, + NULL, NULL, NULL ); + + KT = (K + C->mb - 1) / C->mb; + myp = C->myrank / C->q; + myq = C->myrank % C->q; + + for (k = 0; k < KT; k++) { + tempkk = k == KT-1 ? K - k * WA.nb : WA.nb; + zbeta = k == 0 ? 0. : 1.; + + for (n = myq; n < C->nt; n+=C->q) { + tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + + INSERT_TASK_zplrnt( + &options, + tempkk, tempnn, WB(myp, n), + WB.m, k * WB.mb, n * WB.nb, seedB ); + } + + for (m = myp; m < C->mt; m+=C->p) { + tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; + + INSERT_TASK_zplrnt( + &options, + tempmm, tempkk, WA(m, myq), + WA.m, m * WA.mb, k * WA.nb, seedA ); + + for (n = myq; n < C->nt; n+=C->q) { + tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; + + INSERT_TASK_zgemm( + &options, + ChamNoTrans, ChamNoTrans, + tempmm, tempnn, tempkk, C->mb, + 1., WA(m, myq), + WB(myp, n), + zbeta, C(m, n)); + } + RUNTIME_data_flush( sequence, WA(m, 0) ); + } + for (n = myq; n < C->nt; n+=C->q) { + RUNTIME_data_flush( sequence, WB(0, n) ); + } + } + + RUNTIME_desc_flush( &WA, sequence ); + RUNTIME_desc_flush( &WB, sequence ); + RUNTIME_desc_flush( C, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + chameleon_desc_destroy( &WA ); + chameleon_desc_destroy( &WB ); + + RUNTIME_options_finalize( &options, chamctxt ); +} diff --git a/compute/zplrnk.c b/compute/zplrnk.c new file mode 100644 index 0000000000000000000000000000000000000000..d2069f9a2b9eebb026ac813d2dd838e2e6573296 --- /dev/null +++ b/compute/zplrnk.c @@ -0,0 +1,289 @@ +/** + * + * @file zplrnk.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplrnk wrappers + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Florent Pruvost + * @date 2020-03-05 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * CHAMELEON_zplrnk - Generate a random rank-k matrix by tiles. + * + * Use the specific rank-k property of the following matrices multiplication + * + * \f[ C = A \times B \f], + * + * with A a random matrix of size m-by-k and B a random matrix of size k-by-n. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of C. + * + * @param[in] N + * The order of the matrix C. N >= 0. + * + * @param[in] K + * The rank of C. + * + * @param[out] C + * On exit, The random rank-k matrix C generated. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + * @param[in] seedA + * The seed used in the random generation of matrix A. + * + * @param[in] seedB + * The seed used in the random generation of matrix B. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa CHAMELEON_zplrnk_Tile + * @sa CHAMELEON_zplrnk_Tile_Async + * @sa CHAMELEON_cplrnk + * @sa CHAMELEON_dplrnk + * @sa CHAMELEON_splrnk + * + */ +int CHAMELEON_zplrnk( int M, int N, int K, + CHAMELEON_Complex64_t *C, int LDC, + unsigned long long int seedA, + unsigned long long int seedB ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descCl, descCt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zplrnk", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + chameleon_error("CHAMELEON_zplrnk", "illegal value of M"); + return -1; + } + if (N < 0) { + chameleon_error("CHAMELEON_zplrnk", "illegal value of N"); + return -2; + } + if (K < 0) { + chameleon_error("CHAMELEON_zplrnk", "illegal value of K"); + return -3; + } + if (LDC < chameleon_max(1, M)) { + chameleon_error("CHAMELEON_zplrnk", "illegal value of LDC"); + return -4; + } + /* Quick return */ + if (chameleon_min(M, N) == 0) + return CHAMELEON_SUCCESS; + + /* Set NT */ + NB = CHAMELEON_NB; + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descCl, &descCt, ChamDescOutput, ChamUpperLower, + C, NB, NB, LDC, N, M, N, sequence, &request ); + + /* Call the tile interface */ + CHAMELEON_zplrnk_Tile_Async( K, &descCt, seedA, seedB, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descCl, &descCt, + ChamDescOutput, ChamUpperLower, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + chameleon_ztile2lap_cleanup( chamctxt, &descCl, &descCt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * CHAMELEON_zplrnk_Tile - Generate a random rank-k matrix by tiles. + * Tile equivalent of CHAMELEON_zplrnk(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] K + * The rank of C. + * + * @param[in] C + * On exit, The random rank-k matrix C generated. + * + * @param[in] seedA + * The seed used in the random generation of matrix A. + * + * @param[in] seedB + * The seed used in the random generation of matrix B. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_zplrnk + * @sa CHAMELEON_zplrnk_Tile_Async + * @sa CHAMELEON_cplrnk_Tile + * @sa CHAMELEON_dplrnk_Tile + * @sa CHAMELEON_splrnk_Tile + * @sa CHAMELEON_zplghe_Tile + * @sa CHAMELEON_zplgsy_Tile + * + */ +int CHAMELEON_zplrnk_Tile( int K, CHAM_desc_t *C, + unsigned long long int seedA, + unsigned long long int seedB) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zplrnk_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_zplrnk_Tile_Async( K, C, seedA, seedB, sequence, &request ); + + CHAMELEON_Desc_Flush( C, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * CHAMELEON_zplrnk_Tile_Async - Generate a random rank-k matrix by tiles. + * Non-blocking equivalent of CHAMELEON_zplrnk_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] K + * The rank of C. + * + * @param[in] C + * On exit, The random rank-k matrix C generated. + * + * @param[in] seedA + * The seed used in the random generation of matrix A. + * + * @param[in] seedB + * The seed used in the random generation of matrix B. + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zplrnk + * @sa CHAMELEON_zplrnk_Tile + * @sa CHAMELEON_cplrnk_Tile_Async + * @sa CHAMELEON_dplrnk_Tile_Async + * @sa CHAMELEON_splrnk_Tile_Async + * @sa CHAMELEON_zplghe_Tile_Async + * @sa CHAMELEON_zplgsy_Tile_Async + * + */ +int CHAMELEON_zplrnk_Tile_Async( int K, CHAM_desc_t *C, + unsigned long long int seedA, + unsigned long long int seedB, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zplrnk_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + chameleon_fatal_error("CHAMELEON_zplrnk_Tile", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if (request == NULL) { + chameleon_fatal_error("CHAMELEON_zplrnk_Tile", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == CHAMELEON_SUCCESS) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (chameleon_desc_check(C) != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zplrnk_Tile", "invalid descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (C->nb != C->mb) { + chameleon_error("CHAMELEON_zplrnk_Tile", "only square tiles supported"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if (chameleon_min( C->m, C->n ) == 0) + return CHAMELEON_SUCCESS; + + chameleon_pzplrnk( K, C, seedA, seedB, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/control/chameleon_zf77.c b/control/chameleon_zf77.c index 08f079d7f7dc5fc7b603c63083e279293aaffad2..ab4d3c74483b39574f13bfbfb9656ee0020be69a 100644 --- a/control/chameleon_zf77.c +++ b/control/chameleon_zf77.c @@ -86,6 +86,7 @@ #endif #define CHAMELEON_ZPLGSY CHAMELEON_FNAME(zplgsy , ZPLGSY ) #define CHAMELEON_ZPLRNT CHAMELEON_FNAME(zplrnt , ZPLRNT ) +#define CHAMELEON_ZPLRNK CHAMELEON_FNAME(zplrnk , ZPLRNK ) #define CHAMELEON_ZPOSV CHAMELEON_FNAME(zposv , ZPOSV ) #define CHAMELEON_ZPOTRF CHAMELEON_FNAME(zpotrf , ZPOTRF ) #define CHAMELEON_ZPOTRI CHAMELEON_FNAME(zpotri , ZPOTRI ) @@ -159,6 +160,7 @@ #endif #define CHAMELEON_ZPLGSY_TILE CHAMELEON_TILE_FNAME(zplgsy , ZPLGSY ) #define CHAMELEON_ZPLRNT_TILE CHAMELEON_TILE_FNAME(zplrnt , ZPLRNT ) +#define CHAMELEON_ZPLRNK_TILE CHAMELEON_TILE_FNAME(zplrnk , ZPLRNK ) #define CHAMELEON_ZPOSV_TILE CHAMELEON_TILE_FNAME(zposv , ZPOSV ) #define CHAMELEON_ZPOTRF_TILE CHAMELEON_TILE_FNAME(zpotrf , ZPOTRF ) #define CHAMELEON_ZPOTRI_TILE CHAMELEON_TILE_FNAME(zpotri , ZPOTRI ) @@ -229,6 +231,7 @@ #endif #define CHAMELEON_ZPLGSY_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zplgsy , ZPLGSY ) #define CHAMELEON_ZPLRNT_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zplrnt , ZPLRNT ) +#define CHAMELEON_ZPLRNK_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zplrnk , ZPLRNK ) #define CHAMELEON_ZPOSV_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zposv , ZPOSV ) #define CHAMELEON_ZPOTRF_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zpotrf , ZPOTRF ) #define CHAMELEON_ZPOTRI_TILE_ASYNC CHAMELEON_ASYNC_FNAME(zpotri , ZPOTRI ) @@ -425,6 +428,9 @@ void CHAMELEON_ZPLGSY(CHAMELEON_Complex64_t *bump, cham_uplo_t *uplo, int *N, CH void CHAMELEON_ZPLRNT(int *M, int *N, CHAMELEON_Complex64_t *A, int *LDA, unsigned long long int *seed, int *info) { *info = CHAMELEON_zplrnt(*M, *N, A, *LDA, *seed); } +void CHAMELEON_ZPLRNK(int *M, int *N, int *K, CHAMELEON_Complex64_t *C, int *LDC, unsigned long long int *seedA, unsigned long long int *seedB, int *info) +{ *info = CHAMELEON_zplrnk(*M, *N, *K, C, *LDC, *seedA, *seedB); } + void CHAMELEON_ZPOSV(cham_uplo_t *uplo, int *N, int *NRHS, CHAMELEON_Complex64_t *A, int *LDA, CHAMELEON_Complex64_t *B, int *LDB, int *info) { *info = CHAMELEON_zposv(*uplo, *N, *NRHS, A, *LDA, B, *LDB); } @@ -615,6 +621,9 @@ void CHAMELEON_ZPLGSY_TILE(CHAMELEON_Complex64_t *bump, cham_uplo_t *uplo, CHAM_ void CHAMELEON_ZPLRNT_TILE(CHAM_desc_t *A, unsigned long long int *seed, int *info) { *info = CHAMELEON_zplrnt_Tile(A, *seed); } +void CHAMELEON_ZPLRNK_TILE(int *K, CHAM_desc_t *C, unsigned long long int *seedA, unsigned long long int *seedB, int *info) +{ *info = CHAMELEON_zplrnk_Tile(*K, C, *seedA, *seedB); } + void CHAMELEON_ZPOSV_TILE(cham_uplo_t *uplo, CHAM_desc_t *A, CHAM_desc_t *B, int *info) { *info = CHAMELEON_zposv_Tile(*uplo, A, B); } @@ -805,6 +814,9 @@ void CHAMELEON_ZPLGSY_TILE_ASYNC(CHAMELEON_Complex64_t *bump, cham_uplo_t *uplo, void CHAMELEON_ZPLRNT_TILE_ASYNC(CHAM_desc_t *A, unsigned long long int *seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t * *request, int *info) { *info = CHAMELEON_zplrnt_Tile_Async(A, *seed, sequence, *request); } +void CHAMELEON_ZPLRNK_TILE_ASYNC(int *K, CHAM_desc_t *C, unsigned long long int *seedA, unsigned long long int *seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t * *request, int *info) +{ *info = CHAMELEON_zplrnk_Tile_Async(*K, C, *seedA, *seedB, sequence, *request); } + void CHAMELEON_ZPOSV_TILE_ASYNC(cham_uplo_t *uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info) { *info = CHAMELEON_zposv_Tile_Async(*uplo, A, B, sequence, request); } diff --git a/control/chameleon_zf90.F90 b/control/chameleon_zf90.F90 index 447c82a9723c9a3fef4d0cfbe0c778f0c278c88b..3c890304c56f830d3bb9da29621d891960af1f2d 100644 --- a/control/chameleon_zf90.F90 +++ b/control/chameleon_zf90.F90 @@ -792,6 +792,22 @@ module chameleon_z end function CHAMELEON_zplrnt_c end interface + interface + function CHAMELEON_zplrnk_c(M,N,K,C,LDC,seedA,seedB) & + & bind(c, name='CHAMELEON_zplrnk') + use iso_c_binding + implicit none + integer(kind=c_int) :: CHAMELEON_zplrnk_c + integer(kind=c_int), value :: M + integer(kind=c_int), value :: N + integer(kind=c_int), value :: K + type(c_ptr), value :: C + integer(kind=c_int), value :: LDC + integer(kind=c_long_long), value :: seedA + integer(kind=c_long_long), value :: seedB + end function CHAMELEON_zplrnk_c + end interface + interface function CHAMELEON_zposv_c(uplo,N,NRHS,A,LDA,B,LDB) & & bind(c, name='CHAMELEON_zposv') @@ -1613,6 +1629,19 @@ module chameleon_z end function CHAMELEON_zplrnt_Tile_c end interface + interface + function CHAMELEON_zplrnk_Tile_c(K,C,seedA,seedB) & + & bind(c, name='CHAMELEON_zplrnk_Tile') + use iso_c_binding + implicit none + integer(kind=c_int) :: CHAMELEON_zplrnk_Tile_c + integer(kind=c_int), value :: K + type(c_ptr), value :: C + integer(kind=c_long_long), value :: seedA + integer(kind=c_long_long), value :: seedB + end function CHAMELEON_zplrnk_Tile_c + end interface + interface function CHAMELEON_zposv_Tile_c(uplo,A,B) & & bind(c, name='CHAMELEON_zposv_Tile') @@ -2436,6 +2465,21 @@ module chameleon_z end function CHAMELEON_zplrnt_Tile_Async_c end interface + interface + function CHAMELEON_zplrnk_Tile_Async_c(K,C,seedA,seedB,sequence,request) & + & bind(c, name='CHAMELEON_zplrnk_Tile_Async') + use iso_c_binding + implicit none + integer(kind=c_int) :: CHAMELEON_zplrnk_Tile_Async_c + integer(kind=c_int), value :: K + type(c_ptr), value :: C + integer(kind=c_long_long), value :: seedA + integer(kind=c_long_long), value :: seedB + type(c_ptr), value :: sequence + type(c_ptr), value :: request + end function CHAMELEON_zplrnk_Tile_Async_c + end interface + interface function CHAMELEON_zposv_Tile_Async_c(uplo,A,B,sequence,request) & & bind(c, name='CHAMELEON_zposv_Tile_Async') @@ -3491,6 +3535,20 @@ contains info = CHAMELEON_zplrnt_c(M,N,c_loc(A),LDA,seed) end subroutine CHAMELEON_zplrnt + subroutine CHAMELEON_zplrnk(M,N,K,C,LDC,seedA,seedB,info) + use iso_c_binding + implicit none + integer(kind=c_int), intent(out) :: info + integer(kind=c_int), intent(in) :: M + integer(kind=c_int), intent(in) :: N + integer(kind=c_int), intent(in) :: K + integer(kind=c_int), intent(in) :: LDC + integer(kind=c_long_long), intent(in) :: seedA + integer(kind=c_long_long), intent(in) :: seedB + complex(kind=c_double_complex), intent(out), target :: C(LDC,*) + info = CHAMELEON_zplrnk_c(M,N,K,c_loc(C),LDC,seedA,seedB) + end subroutine CHAMELEON_zplrnk + subroutine CHAMELEON_zposv(uplo,N,NRHS,A,LDA,B,LDB,info) use iso_c_binding implicit none @@ -4221,6 +4279,17 @@ contains info = CHAMELEON_zplrnt_Tile_c(A,seed) end subroutine CHAMELEON_zplrnt_Tile + subroutine CHAMELEON_zplrnk_Tile(K,C,seedA,seedB,info) + use iso_c_binding + implicit none + integer(kind=c_int), intent(out) :: info + integer(kind=c_int), intent(in) :: K + integer(kind=c_long_long), intent(in) :: seedA + integer(kind=c_long_long), intent(in) :: seedB + type(c_ptr), value :: C ! Arg managed by CHAMELEON: opaque to Fortran + info = CHAMELEON_zplrnk_Tile_c(K,C,seedA,seedB) + end subroutine CHAMELEON_zplrnk_Tile + subroutine CHAMELEON_zposv_Tile(uplo,A,B,info) use iso_c_binding implicit none @@ -4953,6 +5022,19 @@ contains info = CHAMELEON_zplrnt_Tile_Async_c(A,seed,sequence,request) end subroutine CHAMELEON_zplrnt_Tile_Async + subroutine CHAMELEON_zplrnk_Tile_Async(K,C,seedA,seedB,sequence,request,info) + use iso_c_binding + implicit none + integer(kind=c_int), intent(out) :: info + integer(kind=c_int), intent(in) :: K + integer(kind=c_long_long), intent(in) :: seedA + integer(kind=c_long_long), intent(in) :: seedB + type(c_ptr), value :: C ! Arg managed by CHAMELEON: opaque to Fortran + type(c_ptr), value :: sequence ! Arg managed by CHAMELEON: opaque to Fortran + type(c_ptr), value :: request ! Arg managed by CHAMELEON: opaque to Fortran + info = CHAMELEON_zplrnk_Tile_Async_c(K,C,seedA,seedB,sequence,request) + end subroutine CHAMELEON_zplrnk_Tile_Async + subroutine CHAMELEON_zposv_Tile_Async(uplo,A,B,sequence,request,info) use iso_c_binding implicit none diff --git a/control/compute_z.h b/control/compute_z.h index 06b023fb20aba468ef8e5cd40c9b504bcc10a1d9..4a3c385b48dbed52f7ea3004bb8bb39376ec5748 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -67,6 +67,7 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *seq void chameleon_pzplghe(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplgsy(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzplrnt(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzplrnk(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzshift(int, int, int, CHAMELEON_Complex64_t *, int *, int, int, int, RUNTIME_sequence_t*, RUNTIME_request_t*); diff --git a/doc/orgmode/chapters/using.org b/doc/orgmode/chapters/using.org index aa222ba94f621151d4253e63b3c0b5fbfb293e73..f0cb97f5dc6265761c961cbbbefc44fa93b43ee0 100644 --- a/doc/orgmode/chapters/using.org +++ b/doc/orgmode/chapters/using.org @@ -778,6 +778,7 @@ * plghe: generate a random Hermitian matrix * plgsy: generate a random symmetrix matrix * plrnt: generate a random matrix + * plrnk: generate a random matrix of rank K with K <= min(M,N) * *Others* * geadd: general matrix matrix addition * lacpy: copy matrix into another diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 03bae9adf27acdb8787be3b8204fe98e5403b267..1ec6c7f5cb0fb2d6db33bbe7a9574aa2ea0b9f71 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -75,6 +75,7 @@ int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA) int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); int CHAMELEON_zplrnt( int M, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); +int CHAMELEON_zplrnk( int M, int N, int K, CHAMELEON_Complex64_t *C, int LDC, unsigned long long int seedA, unsigned long long int seedB ); int CHAMELEON_zposv(cham_uplo_t uplo, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zpotrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zsytrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); @@ -148,6 +149,7 @@ int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); int CHAMELEON_zplgsy_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); int CHAMELEON_zplrnt_Tile(CHAM_desc_t *A, unsigned long long int seed ); +int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB ); int CHAMELEON_zposv_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B); int CHAMELEON_zpotrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zsytrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); @@ -218,6 +220,7 @@ int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequen int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zplgsy_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zplrnt_Tile_Async(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zposv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zpotrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zsytrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 67c4a29a9ac563d27fa7b37c63ac7347ddfc9783..f6591433f46729ac378b23e979cb3d9ba2feb8c6 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -95,6 +95,7 @@ set(ZSRC # testing_zgeqrs_hqr.c # testing_zgelqs_hqr.c testing_zgels_hqr.c + testing_zplrnk.c ) # Add include and link directories diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 598da53926c6e9ef8d5e2d08aef6c97c30c25703..986fd4faf5974bd2be4e62e8c80f632465f266af 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -26,7 +26,7 @@ if (NOT CHAMELEON_SIMULATION) # # Create the list of test based on precision and runtime # - set( TESTS lacpy lange lantr lansy ) + set( TESTS lacpy lange lantr lansy plrnk ) if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) set( TESTS ${TESTS} lanhe ) endif() diff --git a/testing/input/plrnk.in b/testing/input/plrnk.in new file mode 100644 index 0000000000000000000000000000000000000000..3c5c9c3c8d4d89bb5295ad0765c70a0066118906 --- /dev/null +++ b/testing/input/plrnk.in @@ -0,0 +1,22 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# PLRNK +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix C +# N: Number of columns of matrix C +# K: Rank of matrix C +# LDC: Leading dimension of matrix C +# seedA: seed used to generate random matrix A +# seedB: seed used to generate random matrix B + +op = plrnk +nb = 16 +ib = 8 +m = 37 +n = 23 +k = 11 +ldc = 41 +seedA = 1 +seedB = 2 diff --git a/testing/testing_zcheck.c b/testing/testing_zcheck.c index e0d4c1964c4ef857b78d3d79f1f9829a84ae039f..65b0ec3dddb7631085af86949e48840b237135a2 100644 --- a/testing/testing_zcheck.c +++ b/testing/testing_zcheck.c @@ -42,7 +42,7 @@ /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares two matrices by their norms. * @@ -137,7 +137,7 @@ int check_zmatrices( run_arg_list_t *args, cham_uplo_t uplo, CHAM_desc_t *descA, /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares the Chameleon computed norm with a Lapack computed one. * @@ -249,7 +249,7 @@ int check_znorm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_normtype /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed sum with a core function computed one. * @@ -381,7 +381,7 @@ int check_zsum ( run_arg_list_t *args, cham_uplo_t uplo, cham_trans_t trans, /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed scale with a core function computed one. * @@ -444,7 +444,7 @@ int check_zscale( run_arg_list_t *args, cham_uplo_t uplo, CHAMELEON_Complex64_t /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed product with a core function computed one. * @@ -579,7 +579,7 @@ int check_zgemm( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB, /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed hermitian product with a core function computed one. * @@ -735,7 +735,7 @@ int check_zsymm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_side_t s /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed matrix rank k operation with a core function computed one. * @@ -917,7 +917,7 @@ int check_zsyrk( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_uplo_t u /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed matrix triangular product with a core function computed one. * @@ -1048,7 +1048,7 @@ int check_ztrmm( run_arg_list_t *args, int check_func, cham_side_t side, cham_up /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Compares a Chameleon computed product U*U' or L'*L result with a core function computed one. * @@ -1125,7 +1125,7 @@ int check_zlauum( run_arg_list_t *args, cham_uplo_t uplo, CHAM_desc_t *descA, CH /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Checks if a Chameleon computed factorization is correct. * @@ -1252,7 +1252,7 @@ int check_zxxtrf( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_uplo_t uplo /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Checks if the linear solution of op(A) * x = b is correct. * @@ -1340,7 +1340,7 @@ int check_zsolve( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_trans_t tra /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Checks if the A1 matrix is the inverse of A2. * @@ -1544,7 +1544,7 @@ int check_zortho( run_arg_list_t *args, CHAM_desc_t *descQ ) /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Checks if a linear solution is correct. * @@ -1640,7 +1640,7 @@ int check_zgelqf( run_arg_list_t *args, CHAM_desc_t *descA, CHAM_desc_t *descAF, /** ******************************************************************************** * - * @ingroup new_testing + * @ingroup testing * * @brief Checks if a linear solution is correct. * @@ -1920,4 +1920,87 @@ int check_zgels( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, C return info_solution; } +/** + ******************************************************************************** + * + * @ingroup testing + * + * @brief Check matrix is rank K + * + ******************************************************************************* + * + * @param[in] K + * Rank of the matrix + * + * @param[in] descA + * The matrix descriptor. + * + * @retval 0 success, else failure + * + ******************************************************************************* + */ +int check_zrankk( run_arg_list_t *args, int K, CHAM_desc_t *descA ) +{ + int info_solution = 0; + int M = descA->m; + int N = descA->n; + int minMN = chameleon_min(M, N); + int LDA = descA->m; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Rnorm, result; + double eps = LAPACKE_dlamch_work('e'); + + Anorm = CHAMELEON_zlange_Tile( ChamFrobeniusNorm, descA ); + + /* Converts the matrices to LAPACK layout in order to check values on the main process */ + CHAMELEON_Complex64_t *A = NULL; + if ( rank == 0 ) { + A = malloc( M * N * sizeof(CHAMELEON_Complex64_t) ); + } + CHAMELEON_Desc2Lap( ChamUpperLower, descA, A, LDA ); + + /* check rank of A using SVD, value K+1 of Sigma must be small enough */ + if ( rank == 0 ) { + CHAMELEON_Complex64_t *U = malloc( M * M * sizeof(CHAMELEON_Complex64_t) ); + CHAMELEON_Complex64_t *VT = malloc( N * N * sizeof(CHAMELEON_Complex64_t) ); + double *S = malloc( minMN * sizeof(double) ); + double *work = malloc( minMN * sizeof(double) ); + + LAPACKE_zgesvd( LAPACK_COL_MAJOR, 'A', 'A', M, N, A, LDA, S, U, M, VT, N, work ); + + /* Computes the residual's norm */ + if ( K >= minMN ) { + Rnorm = 0.; + } else { + Rnorm = S[K]; + } + + run_arg_add_double( args, "||A||", Anorm ); + run_arg_add_double( args, "||R||", Rnorm ); + + result = Rnorm / (Anorm * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(A); + free(S); + free(U); + free(VT); + free(work); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + #endif /* defined(CHAMELEON_SIMULATION) */ diff --git a/testing/testing_zcheck.h b/testing/testing_zcheck.h index 312e3f5e3a783fd4e9bcb6602bac91ff1a66515a..e5864b1ff7224f9b0f1c6cb3b5f8888901ce540f 100644 --- a/testing/testing_zcheck.h +++ b/testing/testing_zcheck.h @@ -62,6 +62,9 @@ static inline int check_zgeqrs ( run_arg_list_t *args, cham_trans_t trans static inline int check_zgelqs ( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ) { return 0; } static inline int check_zqc ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, CHAM_desc_t *descC, CHAM_desc_t *descQ, CHAM_desc_t *descCC ) { return 0; } +/* Matrix Generators */ +static inline int check_zrankk ( run_arg_list_t *args, int K, CHAM_desc_t *descA ) { return 0; } + #else /* !defined(CHAMELEON_SIMULATION) */ int check_zmatrices ( run_arg_list_t *args, cham_uplo_t uplo, CHAM_desc_t *descA, CHAM_desc_t *descB ); @@ -94,6 +97,9 @@ int check_zgeqrs ( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t int check_zgelqs ( run_arg_list_t *args, cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ); int check_zqc ( run_arg_list_t *args, cham_side_t side, cham_trans_t trans, CHAM_desc_t *descC, CHAM_desc_t *descQ, CHAM_desc_t *descCC ); +/* Matrix Generators */ +int check_zrankk ( run_arg_list_t *args, int K, CHAM_desc_t *descA ); + #endif /* defined(CHAMELEON_SIMULATION) */ #endif /* _testing_zcheck_h_ */ diff --git a/testing/testing_zplrnk.c b/testing/testing_zplrnk.c new file mode 100644 index 0000000000000000000000000000000000000000..355df294e0830a36b3ba56ba3f7c2d70054652f9 --- /dev/null +++ b/testing/testing_zplrnk.c @@ -0,0 +1,93 @@ +/** + * + * @file testing_zplrnk.c + * + * @copyright 2019-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zplrnk testing + * + * @version 1.0.0 + * @author Lucas Barros de Assis + * @author Florent Pruvost + * @date 2020-03-05 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testings.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zplrnk( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + double norm; + CHAM_desc_t *descC; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_normtype_t norm_type = run_arg_get_ntype( args, "norm", ChamMaxNorm ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + /* We consider the gemm cost used in this operation as the cost */ + cham_fixdbl_t flops = flops_zgemm( M, N, K ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + + /* Calculates the random rank-k matrix */ + START_TIMING( t ); + hres = CHAMELEON_zplrnk_Tile( K, descC, seedA, seedB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. ); + + /* Checks the solution */ + if ( check ) { + hres = check_zrankk( args, K, descC ); + } + + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zplrnk; +const char *zplrnk_params[] = { "nb", "m", "n", "k", "ldc", "seedA", "seedB", NULL }; +const char *zplrnk_output[] = { NULL }; +const char *zplrnk_outchk[] = { "||A||", "||R||", "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zplrnk_init( void ) __attribute__( ( constructor ) ); +void +testing_zplrnk_init( void ) +{ + test_zplrnk.name = "zplrnk"; + test_zplrnk.helper = "General rank-k matrix generation"; + test_zplrnk.params = zplrnk_params; + test_zplrnk.output = zplrnk_output; + test_zplrnk.outchk = zplrnk_outchk; + test_zplrnk.fptr = testing_zplrnk; + test_zplrnk.next = NULL; + + testing_register( &test_zplrnk ); +}