diff --git a/.gitmodules b/.gitmodules index b4e9149f8b1be3e5fbdb5efd6e9cbc834ab63687..86c9e98e1c62cb763852e13071db5ba0797bdec9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cmake_modules/morse_cmake"] path = cmake_modules/morse_cmake url = https://gitlab.inria.fr/solverstack/morse_cmake.git +[submodule "hqr"] + path = hqr + url = https://gitlab.inria.fr/solverstack/hqr.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 139ba4e7952a85aec022a3f00854395603c24f8a..5f8a1c32ab1b96a7ee3be8fa2113ee83c80b2f52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,7 +255,7 @@ set(CHAMELEON_DEP "") set(CMAKE_THREAD_PREFER_PTHREAD TRUE) find_package(Threads REQUIRED) if( THREADS_FOUND ) - list(APPEND EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) + list(APPEND EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) endif () # Add math library to the list of extra @@ -263,16 +263,16 @@ endif () set(M_LIBRARIES "") if(UNIX OR WIN32) find_library( - M_m_LIBRARY - NAMES m - ) + M_m_LIBRARY + NAMES m + ) mark_as_advanced(M_m_LIBRARY) if (M_m_LIBRARY) - list(APPEND M_LIBRARIES "${M_m_LIBRARY}") - list(APPEND EXTRA_LIBRARIES "${M_m_LIBRARY}") + list(APPEND M_LIBRARIES "${M_m_LIBRARY}") + list(APPEND EXTRA_LIBRARIES "${M_m_LIBRARY}") else() - message(FATAL_ERROR "Could NOT find libm on your system." - " Are you sure to a have a C compiler installed?") + message(FATAL_ERROR "Could NOT find libm on your system." + " Are you sure to a have a C compiler installed?") endif() endif() @@ -1011,6 +1011,12 @@ if(CHAMELEON_USE_CUDA) endif() #------------------------------------------------------------------------------ +############################################################################### +# Add HQR library # +################### +add_subdirectory(hqr) +include_directories(hqr/include) +#------------------------------------------------------------------------------ ############################################################################### # Main library # diff --git a/cmake_modules/FindLIBHQR.cmake b/cmake_modules/FindLIBHQR.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0599401515348720702703d49b659194eefb5f2d --- /dev/null +++ b/cmake_modules/FindLIBHQR.cmake @@ -0,0 +1,41 @@ +# - Try to find LibHQR +# Once done this will define +# LIBHQR_FOUND - System has LibHQR +# LIBHQR_INCLUDE_DIRS - The LibHQR include directories +# LIBHQR_LIBRARIES - The libraries needed to use LibHQR +# LIBHQR_DEFINITIONS - Compiler switches required for using LIBHQR + +find_package(PkgConfig) +if(PKG_CONFIG_FOUND) + pkg_check_modules(PC_LIBHQR QUIET libhqr) +endif() + +find_path( + LIBHQR_INCLUDE_DIR + libhqr.h + HINTS ${LIBHQR_DIR} ${PC_LIBHQR_INCLUDEDIR} ${PC_LIBHQR_INCLUDE_DIRS} + PATH_SUFFIXES include include/libhqr + ) + +find_library( + LIBHQR_LIBRARY + NAMES hqr + HINTS ${LIBHQR_DIR} ${PC_LIBHQR_LIBDIR} ${PC_LIBHQR_LIBRARY_DIRS} + PATH_SUFFIXES lib lib32 lib64 lib/libhqr lib32/libhqr lib64/libhqr + ) + +set(LIBHQR_DIR "" CACHE PATH "Path where LIBHQR was installed") + + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments +# and set LIBHQR_FOUND to TRUE +# if all listed variables are TRUE +find_package_handle_standard_args( + LIBHQR DEFAULT_MSG LIBHQR_LIBRARY LIBHQR_INCLUDE_DIR) + +mark_as_advanced(LIBHQR_INCLUDE_DIR LIBHQR_LIBRARY ) + +set(LIBHQR_LIBRARIES ${LIBHQR_LIBRARY} ) +set(LIBHQR_INCLUDE_DIRS ${LIBHQR_INCLUDE_DIR} ) +set(LIBHQR_DEFINITIONS ${PC_LIBHQR_CFLAGS_OTHER} ) diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index 0a974775b7192227b887dcba77515305083c1f13..41120c395d72ac2d7602e3d5722bcfe033be1790 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit 0a974775b7192227b887dcba77515305083c1f13 +Subproject commit 41120c395d72ac2d7602e3d5722bcfe033be1790 diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9dcc70224ccba1d5c1647b4169cdaafd15bbbae7..9a98f52459b9d01b391e2d3869442d1f688543dd 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -102,9 +102,11 @@ set(ZSRC # LAPACK ################## pzgelqf.c + pzgelqf_param.c pzgelqfrh.c pzgeqrf.c pzgeqrfrh.c + pzgeqrf_param.c pzgetrf_incpiv.c pzgetrf_nopiv.c pzlacpy.c @@ -123,21 +125,30 @@ set(ZSRC pztrtri.c pzpotrimm.c pzunglq.c + pzunglq_param.c pzunglqrh.c pzungqr.c + pzungqr_param.c pzungqrrh.c pzunmlq.c + pzunmlq_param.c pzunmlqrh.c pzunmqr.c + pzunmqr_param.c pzunmqrrh.c pztpgqrt.c pztpqrt.c ### zgels.c + zgels_param.c zgelqf.c + zgelqf_param.c zgelqs.c + zgelqs_param.c zgeqrf.c + zgeqrf_param.c zgeqrs.c + zgeqrs_param.c #zgesv.c zgesv_incpiv.c zgesv_nopiv.c @@ -165,12 +176,16 @@ set(ZSRC zpotrs.c zsytrs.c ztrtri.c + ztpgqrt.c + ztpqrt.c zunglq.c + zunglq_param.c zungqr.c + zungqr_param.c zunmlq.c + zunmlq_param.c zunmqr.c - ztpgqrt.c - ztpqrt.c + zunmqr_param.c ################## # MIXED PRECISION ################## @@ -283,6 +298,7 @@ endif() if (NOT CHAMELEON_SIMULATION) target_link_libraries(chameleon coreblas) endif() +target_link_libraries(chameleon hqr) list(INSERT CHAMELEON_DEP 0 -lchameleon) add_dependencies(chameleon diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c new file mode 100644 index 0000000000000000000000000000000000000000..d82f451d375615d8bef17c657b93fc9c692f327b --- /dev/null +++ b/compute/pzgelqf_param.c @@ -0,0 +1,204 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzgelqf_param.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 0.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" +#include "libhqr.h" + +#define A(m,n) A, (m), (n) +#define TS(m,n) TS, (m), (n) +#define TT(m,n) TT, (m), (n) +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, (m), (n) +#else +#define D(m,n) A, (m), (n) +#endif + +/* + * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling + */ +void morse_pzgelqf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int K; + int ldak, ldam; + int tempkmin, tempkm, tempnn, tempmm, temppn; + int ib; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zgelqt = A->nb * (ib+1) + * zunmlq = A->nb * ib + * ztslqt = A->nb * (ib+1) + * zttlqt = A->nb * (ib+1) + * ztsmlq = A->nb * ib + * zttmlq = A->nb * ib + */ + ws_worker = A->nb * (ib+1); + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmlq = A->nb * ib + * ztsmlq = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + K = chameleon_min(A->mt, A->nt); + + /* The number of the factorization */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + /* The number of geqrt to apply */ + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + + MORSE_TASK_zgelqt( + &options, + tempkm, temppn, ib, TS->nb, + A( k, p), ldak, + TS(k, p), TS->mb); + if ( k < (A->mt-1) ) { +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkm, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkm, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + } + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + MORSE_TASK_zunmlq( + &options, + MorseRight, MorseConjTrans, + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + A( m, p), ldam); + } + } + + /* Setting the order of the tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < A->nt-1; i++) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + /* Tiles killed is a TS */ + if(qrtree->gettype(qrtree, k, n) == 0){ + MORSE_TASK_ztslqt( + &options, + tempkm, tempnn, ib, TS->nb, + A( k, p), ldak, + A( k, n), ldak, + TS(k, n), TS->mb); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + MORSE_TASK_ztsmlq( + &options, + MorseRight, MorseConjTrans, + tempmm, A->nb, tempmm, tempnn, tempkm, ib, TS->nb, + A( m, p), ldam, + A( m, n), ldam, + A( k, n), ldak, + TS(k, n), TS->mb); + } + } + + /* Tiles killed is a TT */ + else { + MORSE_TASK_zttlqt( + &options, + tempkm, tempnn, ib, TT->nb, + A( k, p), ldak, + A( k, n), ldak, + TT(k, n), TT->mb); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + MORSE_TASK_zttmlq( + &options, + MorseRight, MorseConjTrans, + tempmm, A->nb, tempmm, tempnn, tempkm, ib, TT->nb, + A( m, p), ldam, + A( m, n), ldam, + A( k, n), ldak, + TT(k, n), TT->mb); + } + } + } + RUNTIME_iteration_pop(morse); + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + (void)D; +} diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c index 57b1d613cd415ba91dee83c99dfc968d8d15bc58..0fee71961a000b184ea3723471914f464e085546 100644 --- a/compute/pzgelqfrh.c +++ b/compute/pzgelqfrh.c @@ -1,10 +1,9 @@ /** * - * @copyright (c) 2009-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. - * All rights reserved. - * @copyright (c) 2012-2016 Inria. All rights reserved. - * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * @copyright (c) 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * **/ @@ -29,12 +28,10 @@ * @precisions normal z -> s d c * **/ -//ALLOC_WS : A->nb + ib*T->nb -//WS_ADD : A->nb + ib*T->nb #include "control/common.h" -#define A(m,n) A, (m), (n) -#define T(m,n) T, (m), (n) +#define A(m,n) A, (m), (n) +#define T(m,n) T, (m), (n) #define T2(m,n) T, (m), (n)+A->nt #if defined(CHAMELEON_COPY_DIAG) #define DIAG(m,n) DIAG, ((n)/BS), 0 @@ -42,9 +39,9 @@ #define DIAG(m,n) A, (m), (n) #endif -/***************************************************************************//** +/* * Parallel tile LQ factorization (reduction Householder) - dynamic scheduling - **/ + */ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request) { @@ -55,7 +52,7 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_desc_t *DIAG = NULL; int k, m, n; - int N, RD; + int K, N, RD; int ldak, ldam; int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn; int ib; @@ -101,7 +98,10 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, } #endif - for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { + K = chameleon_min(A->mt, A->nt); + + /* The number of the factorization */ + for (k = 0; k < K; k++) { RUNTIME_iteration_push(morse, k); tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; @@ -114,22 +114,22 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, tempkm, tempNn, ib, T->nb, A(k, N), ldak, T(k, N), T->mb); - if ( k < (A->mt-1) ) { + if ( k < (A->mt-1) ) { #if defined(CHAMELEON_COPY_DIAG) - MORSE_TASK_zlacpy( - &options, - MorseUpper, tempkm, tempNn, A->nb, - A(k, N), ldak, - DIAG(k, N), ldak ); + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkm, tempNn, A->nb, + A(k, N), ldak, + DIAG(k, N), ldak ); #if defined(CHAMELEON_USE_CUDA) - MORSE_TASK_zlaset( - &options, - MorseLower, tempkm, tempNn, - 0., 1., - DIAG(k, N), ldak ); + MORSE_TASK_zlaset( + &options, + MorseLower, tempkm, tempNn, + 0., 1., + DIAG(k, N), ldak ); #endif #endif - } + } for (m = k+1; m < A->mt; m++) { tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; ldam = BLKLDD(A, m); @@ -188,7 +188,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, } } } - RUNTIME_iteration_pop(morse); } RUNTIME_options_ws_free(&options); diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c new file mode 100644 index 0000000000000000000000000000000000000000..89585c40824ec25e09c02a4a83f2b0ade2f1a478 --- /dev/null +++ b/compute/pzgeqrf_param.c @@ -0,0 +1,203 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzgeqrf_param.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" +#include "libhqr.h" + +#define A(m,n) A, (m), (n) +#define TS(m,n) TS, (m), (n) +#define TT(m,n) TT, (m), (n) +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, (m), (n) +#else +#define D(m,n) A, (m), (n) +#endif + +/** + * Parallel tile QR factorization (reduction Householder) - dynamic scheduling + */ +void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int K; + int ldap, ldam; + int tempkmin, tempkn, tempnn, tempmm; + int ib; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zgeqrt = A->nb * (ib+1) + * zunmqr = A->nb * ib + * ztsqrt = A->nb * (ib+1) + * zttqrt = A->nb * (ib+1) + * ztsmqr = A->nb * ib + * zttmqr = A->nb * ib + */ + ws_worker = A->nb * (ib+1); + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + K = chameleon_min(A->mt, A->nt); + + /* The number of the factorization */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + /* The number of geqrt to apply */ + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + + MORSE_TASK_zgeqrt( + &options, + tempmm, tempkn, ib, TS->nb, + A( m, k), ldam, + TS(m, k), TS->mb); + if ( k < (A->nt-1) ) { +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, A->nb, A->nb, + A(m, k), ldam, + D(m, k), ldam ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, A->nb, + 0., 1., + D(m, k), ldam ); +#endif +#endif + } + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + MORSE_TASK_zunmqr( + &options, + MorseLeft, MorseConjTrans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( m, k), ldam, + TS(m, k), TS->mb, + A( m, n), ldam); + } + } + + /* Setting the order of the tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < A->mt-1; i++) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldap = BLKLDD(A, p); + ldam = BLKLDD(A, m); + + /* Tiles killed is a TS */ + if(qrtree->gettype(qrtree, k, m) == 0){ + MORSE_TASK_ztsqrt( + &options, + tempmm, tempkn, ib, TS->nb, + A( p, k), ldap, + A( m, k), ldam, + TS(m, k), TS->mb); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + MORSE_TASK_ztsmqr( + &options, + MorseLeft, MorseConjTrans, + A->nb, tempnn, tempmm, tempnn, A->nb, ib, TS->nb, + A( p, n), ldap, + A( m, n), ldam, + A( m, k), ldam, + TS(m, k), TS->mb); + } + } + + /* Tiles killed is a TT */ + else { + MORSE_TASK_zttqrt( + &options, + tempmm, tempkn, ib, TT->nb, + A( p, k), ldap, + A( m, k), ldam, + TT(m, k), TT->mb); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + MORSE_TASK_zttmqr( + &options, + MorseLeft, MorseConjTrans, + A->mb, tempnn, tempmm, tempnn, A->nb, ib, TT->nb, + A( p, n), ldap, + A( m, n), ldam, + A( m, k), ldam, + TT(m, k), TT->mb); + } + } + } + RUNTIME_iteration_pop(morse); + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + (void)D; +} diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index 3bd8d9e7c8f686602db7b4233a557fbc44c30014..4f8b5e8b0d09ec1489912fd59518b51782511539 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -184,5 +184,5 @@ void morse_pztpgqrt( int L, morse_desc_mat_free(DIAG); free(DIAG); #endif - (void)DIAG; + (void)DIAG; (void)minMT; } diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..0bca71880c0936abb5a394b729a802e10af7017f --- /dev/null +++ b/compute/pzunglq_param.c @@ -0,0 +1,177 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzunglq_pram.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, (m), (n) +#define Q(m,n) Q, (m), (n) +#define TS(m,n) TS, (m), (n) +#define TT(m,n) TT, (m), (n) +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, (m), (n) +#else +#define D(m,n) A, (m), (n) +#endif + +/** + * Parallel construction of Q using tile V - dynamic scheduling + */ +void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int K; + int ldak, ldqm; + int tempkm, tempkmin, temppn, tempnn, tempmm; + int ib; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + /* + * zunmqr = A->nb * ib + * ztsmqr = A->nb * ib + * zttmqr = A->nb * ib + */ + ws_worker = A->nb * ib; + +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + K = chameleon_min(A->mt, A->nt); + + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = A->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldqm = BLKLDD(Q, m); + MORSE_TASK_ztsmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TS->nb, + Q( m, p), ldqm, + Q( m, n), ldqm, + A( k, n), ldak, + TS(k, n), TS->mb); + } + } + else { + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldqm = BLKLDD(Q, m); + MORSE_TASK_zttmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, Q->nb, tempmm, tempnn, tempkm, ib, TT->nb, + Q( m, p), ldqm, + Q( m, n), ldqm, + A( k, n), ldak, + TT(k, n), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (m = k; m < Q->mt; m++) { + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldqm = BLKLDD(Q, m); + MORSE_TASK_zunmlq( + &options, + MorseRight, MorseNoTrans, + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + Q( m, p), ldqm); + } + } + RUNTIME_iteration_pop(morse); + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + (void)D; +} diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c new file mode 100644 index 0000000000000000000000000000000000000000..32ab51de1ac869a407870f20bf1888c26a51365e --- /dev/null +++ b/compute/pzungqr_param.c @@ -0,0 +1,184 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzungqr_param.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, m, n +#define Q(m,n) Q, m, n +#define TS(m,n) TS, m, n +#define TT(m,n) TT, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, m, n +#else +#define D(m,n) A, m, n +#endif + +/** + * Parallel construction of Q using tile V (application to identity) - dynamic scheduling + */ +void morse_pzungqr_param(const libhqr_tree_t *qrtree, + MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int ldam, ldqm, ldqp; + int tempmm, tempnn, tempkmin, tempkn; + int ib, minMT; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + if (A->m > A->n) { + minMT = A->nt; + } else { + minMT = A->mt; + } + + /* + * zunmqr = A->nb * ib + * ztsmqr = A->nb * ib + */ + ws_worker = A->nb * ib; + + /* Allocation of temporary (scratch) working space */ +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + for (k = minMT-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = Q->mt - 2; i >= k; i--) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb; + ldam = BLKLDD(A, m); + ldqm = BLKLDD(Q, m); + ldqp = BLKLDD(Q, p); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k , m) == 0){ + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_ztsmqr( + &options, + MorseLeft, MorseNoTrans, + Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + Q(p, n), ldqp, + Q(m, n), ldqm, + A(m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_zttmqr( + &options, + MorseLeft, MorseNoTrans, + Q->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + Q(p, n), ldqp, + Q(m, n), ldqm, + A(m, k), ldam, + TT(m, k), TT->mb); + } + } + } + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldqm = BLKLDD(Q, m); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); +#endif +#endif + for (n = k; n < Q->nt; n++) { + tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb; + MORSE_TASK_zunmqr( + &options, + MorseLeft, MorseNoTrans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D(m, k), ldam, + TS(m, k), TS->mb, + Q(m, n), ldqm); + } + } + RUNTIME_iteration_pop(morse); + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + + (void)D; +} diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c index 7217488af320fa546ce314717c09127e618da554..c5d2d9c0a024f5cb9b6399c5c355f356e52499e4 100644 --- a/compute/pzungqrrh.c +++ b/compute/pzungqrrh.c @@ -118,7 +118,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_TASK_zttmqr( &options, MorseLeft, MorseNoTrans, - A->nb, tempnn, tempMRDm, tempnn, + Q->mb, tempnn, tempMRDm, tempnn, tempkn, ib, T->nb, Q (M, n), ldqM, Q (M+RD, n), ldqMRD, @@ -142,7 +142,7 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_TASK_ztsmqr( &options, MorseLeft, MorseNoTrans, - A->nb, tempnn, tempmm, tempnn, + Q->mb, tempnn, tempmm, tempnn, tempkn, ib, T->nb, Q(M, n), ldqM, Q(m, n), ldqm, diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..8838be440eafaf5f6f5aed7efa04f26b4f119152 --- /dev/null +++ b/compute/pzunmlq_param.c @@ -0,0 +1,441 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzunmlq_param.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, m, n +#define B(m,n) B, m, n +#define TS(m,n) TS, m, n +#define TT(m,n) TT, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, m, n +#else +#define D(m,n) A, m, n +#endif + +/** + * Parallel application of Q using tile V - LQ factorization - dynamic scheduling + */ +void morse_pzunmlq_param(const libhqr_tree_t *qrtree, + MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *B, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int ldbm, ldak, ldbp; + int tempnn, temppn, tempkmin, tempmm, tempkm; + int ib, K; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + K = chameleon_min(A->mt, A->nt); + + /* + * zunmlq = A->nb * ib + * ztsmlq = A->nb * ib + * zttmlq = A->nb * ib + */ + ws_worker = A->nb * ib; + +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmlq = A->nb * ib + * ztsmlq = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + if (side == MorseLeft ) { + if (trans == MorseNoTrans) { + /* + * MorseLeft / MorseNoTrans + */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmlq( + &options, + side, trans, + temppn, tempnn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( p, n), ldbp); + } + } + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < A->nt-1; i++) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbp = BLKLDD(B, p); + ldbm = BLKLDD(B, m); + + /* TT or TS */ + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + MORSE_TASK_ztsmlq( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, m), ldak, + TS(k, m), TS->mb); + } + } + else { + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + + MORSE_TASK_zttmlq( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, m), ldak, + TT(k, m), TS->mb); + } + } + } + RUNTIME_iteration_pop(morse); + } + } else { + /* + * MorseLeft / MorseConjTrans + */ + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = A->nt-2; i >= k; i--) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbp = BLKLDD(B, p); + ldbm = BLKLDD(B, m); + + /* TT or TS */ + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztsmlq( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, m), ldak, + TS(k, m), TS->mb); + } + } + else { + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmlq( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkm, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, m), ldak, + TT(k, m), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmlq( + &options, + side, trans, + temppn, tempnn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( p, n), ldbp); + } + } + RUNTIME_iteration_pop(morse); + } + } + } else { + if (trans == MorseNoTrans) { + /* + * MorseRight / MorseNoTrans + */ + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = A->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldbp = BLKLDD(B, p); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmlq( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( k, n), ldak, + TS(k, n), TS->mb); + } + } + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmlq( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( k, n), ldak, + TT(k, n), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmlq( + &options, + side, trans, + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( m, p), ldbm); + } + } + RUNTIME_iteration_pop(morse); + } + } else { + /* + * MorseRight / MorseConjTrans + */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + ldak = BLKLDD(A, k); + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + p = qrtree->getm(qrtree, k, i); + + temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; + tempkmin = chameleon_min(tempkm, temppn); + ldbp = BLKLDD(B, p); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseUpper, tempkmin, temppn, A->nb, + A(k, p), ldak, + D(k, p), ldak ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseLower, tempkmin, temppn, + 0., 1., + D(k, p), ldak ); +#endif +#endif + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmlq( + &options, + side, trans, + tempmm, temppn, tempkmin, ib, TS->nb, + D( k, p), ldak, + TS(k, p), TS->mb, + B( m, p), ldbm); + } + } + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < A->nt-1; i++) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldbp = BLKLDD(B, p); + + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmlq( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkm, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, n), ldak, + TS(k, n), TS->mb); + } + } + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmlq( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkm, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( k, n), ldak, + TT(k, n), TT->mb); + } + } + } + + RUNTIME_iteration_pop(morse); + } + } + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + + (void)D; +} diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c new file mode 100644 index 0000000000000000000000000000000000000000..cb5d4f59c2a547407afbe186613c4dbf01fbbb34 --- /dev/null +++ b/compute/pzunmqr_param.c @@ -0,0 +1,440 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file pzunmqr_param.c + * + * MORSE auxiliary routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +#define A(m,n) A, m, n +#define B(m,n) B, m, n +#define TS(m,n) TS, m, n +#define TT(m,n) TT, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define D(m,n) D, m, n +#else +#define D(m,n) A, m, n +#endif + +/** + * Parallel application of Q using tile V - QR factorization - dynamic scheduling + */ +void morse_pzunmqr_param(const libhqr_tree_t *qrtree, + MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_option_t options; + size_t ws_worker = 0; + size_t ws_host = 0; + + int k, m, n, i, p; + int ldam, ldan, ldbm, ldbp; + int tempnn, tempkmin, tempmm, tempkn; + int ib, K; + int *tiles; + + morse = morse_context_self(); + if (sequence->status != MORSE_SUCCESS) + return; + RUNTIME_options_init(&options, morse, sequence, request); + + ib = MORSE_IB; + + K = chameleon_min(A->mt, A->nt); + + /* + * zunmqr = A->nb * ib + * ztsmqr = A->nb * ib + * zttmqr = A->nb * ib + */ + ws_worker = A->nb * ib; + +#if defined(CHAMELEON_USE_CUDA) + /* Worker space + * + * zunmqr = A->nb * ib + * ztsmqr = 2 * A->nb * ib + */ + ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); +#endif + + /* Initialisation of tiles */ + + tiles = (int*)malloc((qrtree->mt)*sizeof(int)); + memset( tiles, 0, (qrtree->mt)*sizeof(int) ); + + ws_worker *= sizeof(MORSE_Complex64_t); + ws_host *= sizeof(MORSE_Complex64_t); + + RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); + + if (side == MorseLeft ) { + if (trans == MorseConjTrans) { + /* + * MorseLeft / MorseConjTrans + */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); +#endif +#endif + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( m, k), ldam, + TS(m, k), TS->mb, + B( m, n), ldbm); + } + } + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < B->mt-1; i++) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + ldbp = BLKLDD(B, p); + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztsmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TT(m, k), TT->mb); + } + } + } + RUNTIME_iteration_pop(morse); + } + } else { + /* + * MorseLeft / MorseNoTrans + */ + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + /* Setting the order of the tiles*/ + libhqr_treewalk(qrtree, k, tiles); + + for (i = B->mt-2; i >= k; i--) { + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + ldbp = BLKLDD(B, p); + + /* TT or TS */ + + if(qrtree->gettype(qrtree, k, m) == 0){ + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_ztsmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TS->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TS(m, k), TS->mb); + } + } + else { + for (n = k; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zttmqr( + &options, + side, trans, + B->mb, tempnn, tempmm, tempnn, tempkn, ib, TT->nb, + B( p, n), ldbp, + B( m, n), ldbm, + A( m, k), ldam, + TT(m, k), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + m = qrtree->getm(qrtree, k, i); + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + tempkmin = chameleon_min(tempmm, tempkn); + ldam = BLKLDD(A, m); + ldbm = BLKLDD(B, m); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempmm, tempkmin, A->nb, + A(m, k), ldam, + D(m, k), ldam ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempmm, tempkmin, + 0., 1., + D(m, k), ldam ); +#endif +#endif + for (n = 0; n < B->nt; n++) { + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( m, k), ldam, + TS(m, k), TS->mb, + B( m, n), ldbm); + } + } + RUNTIME_iteration_pop(morse); + } + } + } else { + if (trans == MorseConjTrans) { + /* + * MorseRight / MorseConjTrans + */ + for (k = K-1; k >= 0; k--) { + RUNTIME_iteration_push(morse, k); + + tempkn = k == A->nt-1 ? A->n - k*A->nb : A->nb; + + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = B->nt-2; i >= k; i--) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + ldbp = BLKLDD(B, p); + + /* TS or TT */ + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TS(n, k), TS->mb); + } + } + else{ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TT(n, k), TT->mb); + } + } + } + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + n = qrtree->getm(qrtree, k, i); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempkmin = chameleon_min(tempnn, tempkn); + ldan = BLKLDD(A, n); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempnn, tempkmin, A->nb, + A(n, k), ldan, + D(n, k), ldan ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempnn, tempkmin, + 0., 1., + D(n, k), ldan ); +#endif +#endif + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( n, k), ldan, + TS(n, k), TS->mb, + B( m, n), ldbm); + } + } + + RUNTIME_iteration_pop(morse); + } + } else { + /* + * MorseRight / MorseNoTrans + */ + for (k = 0; k < K; k++) { + RUNTIME_iteration_push(morse, k); + + tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb; + + for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) { + n = qrtree->getm(qrtree, k, i); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + tempkmin = chameleon_min(tempnn, tempkn); + ldan = BLKLDD(A, n); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempnn, tempkmin, A->nb, + A(n, k), ldan, + D(n, k), ldan ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempnn, tempkmin, + 0., 1., + D(n, k), ldan ); +#endif +#endif + for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + MORSE_TASK_zunmqr( + &options, + side, trans, + tempmm, tempnn, tempkmin, ib, TS->nb, + D( n, k), ldan, + TS(n, k), TS->mb, + B( m, n), ldbm); + } + } + /* Setting the order of tiles */ + libhqr_treewalk(qrtree, k, tiles); + + for (i = k; i < B->nt-1; i++) { + n = tiles[i]; + p = qrtree->currpiv(qrtree, k, n); + + tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + ldan = BLKLDD(A, n); + ldbp = BLKLDD(B, p); + if(qrtree->gettype(qrtree, k, n) == 0){ + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_ztsmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TS->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TS(n, k), TS->mb); + } + } + else { + for (m = 0; m < B->mt; m++) { + tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; + ldbm = BLKLDD(B, m); + MORSE_TASK_zttmqr( + &options, + side, trans, + tempmm, B->nb, tempmm, tempnn, tempkn, ib, TT->nb, + B( m, p), ldbm, + B( m, n), ldbm, + A( n, k), ldan, + TT(n, k), TT->mb); + } + } + } + + RUNTIME_iteration_pop(morse); + } + } + } + + free(tiles); + RUNTIME_options_ws_free(&options); + RUNTIME_options_finalize(&options, morse); + MORSE_TASK_dataflush_all(); + + (void)D; +} diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c index 1c6954f67911e9eec173bdaa6403060dff51ad71..777ec71ad5a96e12e3870131fdefa24611dec60a 100644 --- a/compute/pzunmqrrh.c +++ b/compute/pzunmqrrh.c @@ -134,8 +134,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_TASK_zunmqr( &options, side, trans, - tempMm, tempnn, - tempkmin, ib, T->nb, + tempMm, tempnn, tempkmin, ib, T->nb, DIAG(M, k), ldaM, T(M, k), T->mb, B(M, n), ldbM); @@ -260,7 +259,6 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, B(M, n), ldbM); } } - RUNTIME_iteration_pop(morse); } } @@ -337,8 +335,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempMm, - tempkmin, ib, T->nb, + tempmm, tempMm, tempkmin, ib, T->nb, DIAG(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); @@ -379,8 +376,7 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_TASK_zunmqr( &options, side, trans, - tempmm, tempMm, - tempkmin, ib, T->nb, + tempmm, tempMm, tempkmin, ib, T->nb, DIAG(M, k), ldaM, T(M, k), T->mb, B(m, M), ldbm); diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c new file mode 100644 index 0000000000000000000000000000000000000000..2e46686816f4d5423371b8a120df9cce725c110a --- /dev/null +++ b/compute/zgelqf_param.c @@ -0,0 +1,299 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgelqf_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgelqf_param - Computes the tile LQ factorization of a complex M-by-N matrix A: A = L * Q. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) + * lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the + * diagonal represent the unitary matrix Q as a product of elementary reflectors, stored + * by tiles. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] descT + * On exit, auxiliary factorization data, required by MORSE_zgelqs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgelqf_param_Tile + * @sa MORSE_zgelqf_param_Tile_Async + * @sa MORSE_cgelqf + * @sa MORSE_dgelqf + * @sa MORSE_sgelqf + * @sa MORSE_zgelqs + * + ******************************************************************************/ +int MORSE_zgelqf_param(const libhqr_tree_t *qrtree, int M, int N, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqf_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zgelqf_param", "illegal value of M"); + return -1; + } + if (N < 0) { + morse_error("MORSE_zgelqf_param", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgelqf_param", "illegal value of LDA"); + return -4; + } + + /* Quick return */ + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgelqf_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zgelqf_param_Tile_Async(qrtree, &descA, descTS, descTT, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgelqf_param_Tile - Computes the tile LQ factorization of a matrix. + * Tile equivalent of MORSE_zgelqf_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and below the diagonal of the array contain the m-by-min(M,N) + * lower trapezoidal matrix L (L is lower triangular if M <= N); the elements above the + * diagonal represent the unitary matrix Q as a product of elementary reflectors, stored + * by tiles. + * + * @param[out] T + * On exit, auxiliary factorization data, required by MORSE_zgelqs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgelqf_param + * @sa MORSE_zgelqf_param_Tile_Async + * @sa MORSE_cgelqf_Tile + * @sa MORSE_dgelqf_Tile + * @sa MORSE_sgelqf_Tile + * @sa MORSE_zgelqs_Tile + * + ******************************************************************************/ +int MORSE_zgelqf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqf_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgelqf_param_Tile_Async(qrtree, A, TS, TT, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgelqf_param_Tile_Async - Computes the tile LQ factorization of a matrix. + * Non-blocking equivalent of MORSE_zgelqf_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgelqf_param + * @sa MORSE_zgelqf_param_Tile + * @sa MORSE_cgelqf_Tile_Async + * @sa MORSE_dgelqf_Tile_Async + * @sa MORSE_sgelqf_Tile_Async + * @sa MORSE_zgelqs_Tile_Async + * + ******************************************************************************/ +int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqf_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgelqf_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgelqf_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqf_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqf_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqf_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + morse_error("MORSE_zgelqf_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ +/* + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; +*/ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + + morse_pzgelqf_param(qrtree, A, TS, TT, Dptr, sequence, request); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zgelqs.c b/compute/zgelqs.c index c78fb2d92736de7b3107ff9d8ba0044ddfcd08d5..b998694a910e89f1dd1ca315b35bad2a22bcdf36 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -1,10 +1,10 @@ /** * - * @copyright (c) 2009-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. - * All rights reserved. - * @copyright (c) 2012-2014 Inria. All rights reserved. - * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * @copyright 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. * **/ @@ -29,7 +29,8 @@ **/ #include "control/common.h" -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t * @@ -161,13 +162,14 @@ int MORSE_zgelqs(int M, int N, int NRHS, /* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ /* morse_sequence_wait(morse, sequence);*/ /* }*/ - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile * @@ -222,13 +224,14 @@ int MORSE_zgelqs_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B) morse_sequence_wait(morse, sequence); RUNTIME_desc_getoncpu(A); RUNTIME_desc_getoncpu(B); - + status = sequence->status; morse_sequence_destroy(morse, sequence); return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile_Async * diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c new file mode 100644 index 0000000000000000000000000000000000000000..ef1cc2505a6d2b1a0cd676063bcbb0b4c3952bef --- /dev/null +++ b/compute/zgelqs_param.c @@ -0,0 +1,347 @@ +/** + * + * @copyright 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgelqs_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgelqs_param - Compute a minimum-norm solution min || A*X - B || using the LQ factorization + * A = L*Q computed by MORSE_zgelqf. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= M >= 0. + * + * @param[in] NRHS + * The number of columns of B. NRHS >= 0. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= M. + * + * @param[in] descTS + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in] descTT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] B + * On entry, the M-by-NRHS right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= N. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param_Tile + * @sa MORSE_zgelqs_param_Tile_Async + * @sa MORSE_cgelqs + * @sa MORSE_dgelqs + * @sa MORSE_sgelqs + * @sa MORSE_zgelqf + * + ******************************************************************************/ +int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *B, int LDB) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zgelqs_param", "illegal value of M"); + return -1; + } + if (N < 0 || M > N) { + morse_error("MORSE_zgelqs_param", "illegal value of N"); + return -2; + } + if (NRHS < 0) { + morse_error("MORSE_zgelqs_param", "illegal value of N"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgelqs_param", "illegal value of LDA"); + return -5; + } + if (LDB < chameleon_max(1, chameleon_max(1, N))) { + morse_error("MORSE_zgelqs_param", "illegal value of LDB"); + return -8; + } + /* Quick return */ + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, */ +/* sequence, &request);*/ +/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zgelqs_param_Tile_Async(qrtree, &descA, descTS, descTT, &descB, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgelqs_param_Tile - Computes a minimum-norm solution using previously computed + * LQ factorization. + * Tile equivalent of MORSE_zgelqs_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] TS + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in] TT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] B + * On entry, the M-by-NRHS right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param + * @sa MORSE_zgelqs_param_Tile_Async + * @sa MORSE_cgelqs_Tile + * @sa MORSE_dgelqs_Tile + * @sa MORSE_sgelqs_Tile + * @sa MORSE_zgelqf_Tile + * + ******************************************************************************/ +int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgelqs_param_Tile_Async(qrtree, A, TS, TT, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgelqs_param_Tile_Async - Computes a minimum-norm solution using previously + * computed LQ factorization. + * Non-blocking equivalent of MORSE_zgelqs_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgelqs_param + * @sa MORSE_zgelqs_param_Tile + * @sa MORSE_cgelqs_Tile_Async + * @sa MORSE_dgelqs_Tile_Async + * @sa MORSE_sgelqs_Tile_Async + * @sa MORSE_zgelqf_Tile_Async + * + ******************************************************************************/ +int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_desc_t *subB; + MORSE_desc_t *subA; + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgelqs_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgelqs_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || B->nb != B->mb) { + morse_error("MORSE_zgelqs_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ +/* + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } +*/ + /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n); + morse_pztile_zero(subB, sequence, request); + free(subB); */ + + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + free(subA); + free(subB); + +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, Dptr, sequence, request); + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zgels_param.c b/compute/zgels_param.c new file mode 100644 index 0000000000000000000000000000000000000000..fac618e1e64da9be97f1638c13903a80f415e728 --- /dev/null +++ b/compute/zgels_param.c @@ -0,0 +1,435 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgels_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgels_param - solves overdetermined or underdetermined linear systems involving an M-by-N + * matrix A using the QR or the LQ factorization of A. It is assumed that A has full rank. + * The following options are provided: + * + * # trans = MorseNoTrans and M >= N: find the least squares solution of an overdetermined + * system, i.e., solve the least squares problem: minimize || B - A*X ||. + * + * # trans = MorseNoTrans and M < N: find the minimum norm solution of an underdetermined + * system A * X = B. + * + * Several right hand side vectors B and solution vectors X can be handled in a single call; + * they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS + * solution matrix X. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: the linear system involves A; + * = MorseConjTrans: the linear system involves A**H. + * Currently only MorseNoTrans is supported. + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in] NRHS + * The number of right hand sides, i.e., the number of columns of the matrices B and X. + * NRHS >= 0. + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, + * if M >= N, A is overwritten by details of its QR factorization as returned by + * MORSE_zgeqrf; + * if M < N, A is overwritten by details of its LQ factorization as returned by + * MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] descTS + * On exit, auxiliary factorization data. + * + * @param[out] descTT + * On exit, auxiliary factorization data. + * + * @param[in,out] B + * On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; + * On exit, if return value = 0, B is overwritten by the solution vectors, stored + * columnwise: + * if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual + * sum of squares for the solution in each column is given by the sum of squares of the + * modulus of elements N+1 to M in that column; + * if M < N, rows 1 to N of B contain the minimum norm solution vectors; + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= MAX(1,M,N). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgels_param_Tile + * @sa MORSE_zgels_param_Tile_Async + * @sa MORSE_cgels + * @sa MORSE_dgels + * @sa MORSE_sgels + * + ******************************************************************************/ +int MORSE_zgels_param(const libhqr_tree_t *qrtree, MORSE_enum trans, int M, int N, int NRHS, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *B, int LDB) +{ + int i, j; + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (trans != MorseNoTrans) { + morse_error("MORSE_zgels_param", "only MorseNoTrans supported"); + return MORSE_ERR_NOT_SUPPORTED; + } + if (M < 0) { + morse_error("MORSE_zgels_param", "illegal value of M"); + return -2; + } + if (N < 0) { + morse_error("MORSE_zgels_param", "illegal value of N"); + return -3; + } + if (NRHS < 0) { + morse_error("MORSE_zgels_param", "illegal value of NRHS"); + return -4; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgels_param", "illegal value of LDA"); + return -6; + } + if (LDB < chameleon_max(1, chameleon_max(M, N))) { + morse_error("MORSE_zgels_param", "illegal value of LDB"); + return -9; + } + /* Quick return */ + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) + for (j = 0; j < NRHS; j++) + B[j*LDB+i] = 0.0; + return MORSE_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + + if ( M >= N ) { + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); + /* } else {*/ + /* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ + /* sequence, &request);*/ + /* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS,*/ + /* sequence, &request);*/ + /* }*/ + } else { + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); + /* } else {*/ + /* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ + /* sequence, &request);*/ + /* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS,*/ + /* sequence, &request);*/ + /* }*/ + } + + /* Call the tile interface */ + MORSE_zgels_param_Tile_Async(qrtree, MorseNoTrans, &descA, descTS, descTT, &descB, sequence, &request); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); + /* } else {*/ + /* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ + /* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ + /* morse_sequence_wait(morse, sequence);*/ + /* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgels_param_Tile - Solves overdetermined or underdetermined linear system of equations + * using the tile QR or the tile LQ factorization. + * Tile equivalent of MORSE_zgels_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: the linear system involves A; + * = MorseConjTrans: the linear system involves A**H. + * Currently only MorseNoTrans is supported. + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, + * if M >= N, A is overwritten by details of its QR factorization as returned by + * MORSE_zgeqrf; + * if M < N, A is overwritten by details of its LQ factorization as returned by + * MORSE_zgelqf. + * + * @param[out] TS + * On exit, auxiliary factorization data. + * + * @param[out] TT + * On exit, auxiliary factorization data. + * + * @param[in,out] B + * On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise; + * On exit, if return value = 0, B is overwritten by the solution vectors, stored + * columnwise: + * if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual + * sum of squares for the solution in each column is given by the sum of squares of the + * modulus of elements N+1 to M in that column; + * if M < N, rows 1 to N of B contain the minimum norm solution vectors; + * + ******************************************************************************* + * + * @return + * \return MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgels_param + * @sa MORSE_zgels_param_Tile_Async + * @sa MORSE_cgels_Tile + * @sa MORSE_dgels_Tile + * @sa MORSE_sgels_Tile + * + ******************************************************************************/ +int MORSE_zgels_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgels_param_Tile_Async(qrtree, trans, A, TS, TT, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgels_param_Tile_Async - Solves overdetermined or underdetermined linear + * system of equations using the tile QR or the tile LQ factorization. + * Non-blocking equivalent of MORSE_zgels_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgels_param + * @sa MORSE_zgels_param_Tile + * @sa MORSE_cgels_Tile_Async + * @sa MORSE_dgels_Tile_Async + * @sa MORSE_sgels_Tile_Async + * + ******************************************************************************/ +int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_desc_t *subA; + MORSE_desc_t *subB; + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgels_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgels_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || B->nb != B->mb) { + morse_error("MORSE_zgels_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (trans != MorseNoTrans) { + morse_error("MORSE_zgels_param_Tile", "only MorseNoTrans supported"); + return morse_request_fail(sequence, request, MORSE_ERR_NOT_SUPPORTED); + } + /* Quick return - currently NOT equivalent to LAPACK's: + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + for (i = 0; i < chameleon_max(M, N); i++) + for (j = 0; j < NRHS; j++) + B[j*LDB+i] = 0.0; + return MORSE_SUCCESS; + } + */ + if (A->m >= A->n) { + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + + morse_pzgeqrf_param(qrtree, A, TS, TT, Dptr, sequence, request); + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, Dptr, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + } + else { +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + + subB = morse_desc_submatrix(B, 0, 0, A->m, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->m, A->m); + + morse_pzgelqf_param(qrtree, A, TS, TT, Dptr, sequence, request); + morse_pztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, Dptr, sequence, request); + } + + free(subA); + free(subB); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index 51182770c768571843efe8e89e990e2c4e6b2478..f17810f5cd0b0ee6a5abf7dc83c2b5e54f5d2b76 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -205,7 +205,8 @@ int MORSE_zgeqrf_Tile(MORSE_desc_t *A, MORSE_desc_t *T) return status; } -/***************************************************************************//** +/** + ***************************************************************************** * * @ingroup MORSE_Complex64_t_Tile_Async * diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c new file mode 100644 index 0000000000000000000000000000000000000000..8e84f2a2bc421df67aa4e4e1f4695b53cf2f29e1 --- /dev/null +++ b/compute/zgeqrf_param.c @@ -0,0 +1,309 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgeqrf_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgeqrf_param - Computes the tile QR factorization of a complex M-by-N + * matrix A: A = Q * R. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in, out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N + * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the + * diagonal represent the unitary matrix Q as a product of elementary reflectors stored + * by tiles. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[out] descTS + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + * @param[out] descTT + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgeqrf_param_Tile + * @sa MORSE_zgeqrf_param_Tile_Async + * @sa MORSE_cgeqrf + * @sa MORSE_dgeqrf + * @sa MORSE_sgeqrf + * @sa MORSE_zgeqrs + * + ******************************************************************************/ +int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeqrf_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zgeqrf_param", "illegal value of M"); + return -1; + } + if (N < 0) { + morse_error("MORSE_zgeqrf_param", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgeqrf_param", "illegal value of LDA"); + return -4; + } + + /* Quick return */ + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrf_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zgeqrf_param_Tile_Async(qrtree, &descA, descTS, descTT, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgeqrf_param_Tile - Computes the tile QR factorization of a matrix. + * Tile equivalent of MORSE_zgeqrf_param(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix A. + * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N + * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the + * diagonal represent the unitary matrix Q as a product of elementary reflectors stored + * by tiles. + * + * @param[out] TS + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + * @param[out] TT + * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system + * of equations. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgeqrf_param + * @sa MORSE_zgeqrf_param_Tile_Async + * @sa MORSE_cgeqrf_param_Tile + * @sa MORSE_dgeqrf_param_Tile + * @sa MORSE_sgeqrf_param_Tile + * @sa MORSE_zgeqrs_param_Tile + * + ******************************************************************************/ +int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeqrf_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgeqrf_param_Tile_Async(qrtree, A, TS, TT, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ***************************************************************************** + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgeqrf_param_Tile_Async - Computes the tile QR factorization of a matrix. + * Non-blocking equivalent of MORSE_zgeqrf_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgeqrf_param + * @sa MORSE_zgeqrf_param_Tile + * @sa MORSE_cgeqrf_param_Tile_Async + * @sa MORSE_dgeqrf_param_Tile_Async + * @sa MORSE_sgeqrf_param_Tile_Async + * @sa MORSE_zgeqrs_param_Tile_Async + * + ******************************************************************************/ +int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_error("MORSE_zgeqrf_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgeqrf_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgeqrf_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrf_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrf_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrf_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + morse_error("MORSE_zgeqrf_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ +/* + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; +*/ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + morse_pzgeqrf_param(qrtree, A, TS, TT, Dptr, sequence, request); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c new file mode 100644 index 0000000000000000000000000000000000000000..ed6aef62f7034a169d69cf304db82f9448c518a7 --- /dev/null +++ b/compute/zgeqrs_param.c @@ -0,0 +1,337 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zgeqrs_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zgeqrs_param - Compute a minimum-norm solution min || A*X - B || using the RQ factorization + * A = R*Q computed by MORSE_zgeqrf. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= M >= 0. + * + * @param[in] NRHS + * The number of columns of B. NRHS >= 0. + * + * @param[in,out] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= M. + * + * @param[in] descT + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[in,out] B + * On entry, the m-by-nrhs right hand side matrix B. + * On exit, the n-by-nrhs solution matrix X. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,N). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zgeqrs_param_Tile + * @sa MORSE_zgeqrs_param_Tile_Async + * @sa MORSE_cgeqrs + * @sa MORSE_dgeqrs + * @sa MORSE_sgeqrs + * @sa MORSE_zgeqrf + * + *******************************************************************************/ +int MORSE_zgeqrs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *B, int LDB) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descB; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeqrs_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zgeqrs_param", "illegal value of M"); + return -1; + } + if (N < 0 || N > M) { + morse_error("MORSE_zgeqrs_param", "illegal value of N"); + return -2; + } + if (NRHS < 0) { + morse_error("MORSE_zgeqrs_param", "illegal value of N"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zgeqrs_param", "illegal value of LDA"); + return -5; + } + if (LDB < chameleon_max(1, chameleon_max(1, M))) { + morse_error("MORSE_zgeqrs_param", "illegal value of LDB"); + return -8; + } + /* Quick return */ + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, NRHS); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrs_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descB))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, */ +/* sequence, &request);*/ +/* morse_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zgeqrs_param_Tile_Async(qrtree, &descA, descTS, descTT, &descB, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); + morse_zooptile2lap(descB, B, NB, NB, LDB, NRHS, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descB); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descB, B, NB, NB, LDB, NRHS, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zgeqrs_param_Tile - Computes a minimum-norm solution using the tile QR factorization. + * Tile equivalent of MORSE_zgeqrf(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[in,out] B + * On entry, the m-by-nrhs right hand side matrix B. + * On exit, the n-by-nrhs solution matrix X. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zgeqrs_param + * @sa MORSE_zgeqrs_param_Tile_Async + * @sa MORSE_cgeqrs_Tile + * @sa MORSE_dgeqrs_Tile + * @sa MORSE_sgeqrs_Tile + * @sa MORSE_zgeqrf_Tile + * + ******************************************************************************/ +int MORSE_zgeqrs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeqrs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zgeqrs_param_Tile_Async(qrtree, A, TS, TT, B, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(TS); + RUNTIME_desc_getoncpu(TT); + RUNTIME_desc_getoncpu(B); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * MORSE_zgeqrs_param_Tile_Async - Computes a minimum-norm solution using the tile + * QR factorization. + * Non-blocking equivalent of MORSE_zgeqrs_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zgeqrs_param + * @sa MORSE_zgeqrs_param_Tile + * @sa MORSE_cgeqrs_Tile_Async + * @sa MORSE_dgeqrs_Tile_Async + * @sa MORSE_sgeqrs_Tile_Async + * @sa MORSE_zgeqrf_Tile_Async + * + ******************************************************************************/ +int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_desc_t *subA; + MORSE_desc_t *subB; + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zgeqrs_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zgeqrs_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zgeqrs_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrs_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrs_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrs_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(B) != MORSE_SUCCESS) { + morse_error("MORSE_zgeqrs_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || B->nb != B->mb) { + morse_error("MORSE_zgeqrs_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ + /* + if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) { + return MORSE_SUCCESS; + } + */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + subB = morse_desc_submatrix(B, 0, 0, A->n, B->n); + subA = morse_desc_submatrix(A, 0, 0, A->n, A->n); + + morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, Dptr, sequence, request); + morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); + + free(subA); + free(subB); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..d64930f4119ada76f12b9623e8230ed43378a1bd --- /dev/null +++ b/compute/zunglq_param.c @@ -0,0 +1,322 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zunglq_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zunglq_param - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the + * first M rows of a product of the elementary reflectors returned by MORSE_zgelqf. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] M + * The number of rows of the matrix Q. M >= 0. + * + * @param[in] N + * The number of columns of the matrix Q. N >= M. + * + * @param[in] K + * The number of rows of elementary tile reflectors whose product defines the matrix Q. + * M >= K >= 0. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] descT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + * @param[in] LDQ + * The leading dimension of the array Q. LDQ >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval MORSE_SUCCESS <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param_Tile + * @sa MORSE_zunglq_param_Tile_Async + * @sa MORSE_cunglq + * @sa MORSE_dorglq + * @sa MORSE_sorglq + * @sa MORSE_zgelqf + * + ******************************************************************************/ +int MORSE_zunglq_param(const libhqr_tree_t *qrtree, int M, int N, int K, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *Q, int LDQ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descQ; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zunglq_param", "illegal value of M"); + return -1; + } + if (N < M) { + morse_error("MORSE_zunglq_param", "illegal value of N"); + return -2; + } + if (K < 0 || K > M) { + morse_error("MORSE_zunglq_param", "illegal value of K"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zunglq_param", "illegal value of LDA"); + return -5; + } + if (LDQ < chameleon_max(1, M)) { + morse_error("MORSE_zunglq_param", "illegal value of LDQ"); + return -8; + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */ + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descQ))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zunglq_param_Tile_Async(qrtree, &descA, descTS, descTT, &descQ, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descQ, Q, NB, NB, LDQ, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descQ); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ +/* morse_ziptile2lap( descQ, Q, NB, NB, LDQ, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zunglq_param_Tile - Generates an M-by-N matrix Q with orthonormal rows, which is defined as the + * first M rows of a product of the elementary reflectors returned by MORSE_zgelqf. + * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param + * @sa MORSE_zunglq_param_Tile_Async + * @sa MORSE_cunglq_Tile + * @sa MORSE_dorglq_Tile + * @sa MORSE_sorglq_Tile + * @sa MORSE_zgelqf_Tile + * + ******************************************************************************/ +int MORSE_zunglq_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zunglq_param_Tile_Async(qrtree, A, TS, TT, Q, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(Q); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * Non-blocking equivalent of MORSE_zunglq_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zunglq_param + * @sa MORSE_zunglq_param_Tile + * @sa MORSE_cunglq_Tile_Async + * @sa MORSE_dorglq_Tile_Async + * @sa MORSE_sorglq_Tile_Async + * @sa MORSE_zgelqf_Tile_Async + * + ******************************************************************************/ +int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zunglq_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(Q) != MORSE_SUCCESS) { + morse_error("MORSE_zunglq_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || Q->nb != Q->mb) { + morse_error("MORSE_zunglq_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */ +/* + if (chameleon_min(M, N) == 0) + return MORSE_SUCCESS; +*/ +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzunglq_param(qrtree, A, Q, TS, TT, Dptr, sequence, request); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c new file mode 100644 index 0000000000000000000000000000000000000000..8b4f7e46103dcf0fd3cd92492ced8a764c2de654 --- /dev/null +++ b/compute/zungqr_param.c @@ -0,0 +1,320 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zungqr_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zungqr_param - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the + * first N columns of a product of the elementary reflectors returned by MORSE_zgeqrf. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] M + * The number of rows of the matrix Q. M >= 0. + * + * @param[in] N + * The number of columns of the matrix Q. N >= M. + * + * @param[in] K + * The number of columns of elementary tile reflectors whose product defines the matrix Q. + * M >= K >= 0. + * + * @param[in] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] descT + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + * @param[in] LDQ + * The leading dimension of the array Q. LDQ >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zungqr_param_Tile + * @sa MORSE_zungqr_param_Tile_Async + * @sa MORSE_cungqr + * @sa MORSE_dorgqr + * @sa MORSE_sorgqr + * @sa MORSE_zgeqrf + * + ******************************************************************************/ +int MORSE_zungqr_param(const libhqr_tree_t *qrtree, + int M, int N, int K, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, + MORSE_desc_t *descTT, + MORSE_Complex64_t *Q, int LDQ) +{ + int NB; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descQ; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zungqr_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + /* Check input arguments */ + if (M < 0) { + morse_error("MORSE_zungqr_param", "illegal value of M"); + return -1; + } + if (N < 0 || N > M) { + morse_error("MORSE_zungqr_param", "illegal value of N"); + return -2; + } + if (K < 0 || K > N) { + morse_error("MORSE_zungqr_param", "illegal value of K"); + return -3; + } + if (LDA < chameleon_max(1, M)) { + morse_error("MORSE_zungqr_param", "illegal value of LDA"); + return -5; + } + if (LDQ < chameleon_max(1, M)) { + morse_error("MORSE_zungqr_param", "illegal value of LDQ"); + return -8; + } + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M & N; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, N, 0); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param", "morse_tune() failed"); + return status; + } + + /* Set NT */ + NB = MORSE_NB; + + morse_sequence_create(morse, &sequence); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descQ))); + /* } else {*/ + /* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K,*/ + /* sequence, &request);*/ + /* morse_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N,*/ + /* sequence, &request);*/ + /* }*/ + + /* Call the tile interface */ + MORSE_zungqr_param_Tile_Async(qrtree, &descA, descTS, descTT, &descQ, sequence, &request); + + /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descQ, Q, NB, NB, LDQ, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descQ); + /* } else {*/ + /* morse_ziptile2lap( descA, A, NB, NB, LDA, K, sequence, &request);*/ + /* morse_ziptile2lap( descQ, Q, NB, NB, LDQ, N, sequence, &request);*/ + /* morse_sequence_wait(morse, sequence);*/ + /* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zungqr_param_Tile - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the + * first N columns of a product of the elementary reflectors returned by MORSE_zgeqrf. + * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[out] Q + * On exit, the M-by-N matrix Q. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zungqr_param + * @sa MORSE_zungqr_param_Tile_Async + * @sa MORSE_cungqr_Tile + * @sa MORSE_dorgqr_Tile + * @sa MORSE_sorgqr_Tile + * @sa MORSE_zgeqrf_Tile + * + ******************************************************************************/ +int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zungqr_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zungqr_param_Tile_Async(qrtree, A, TS, TT, Q, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(Q); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * Non-blocking equivalent of MORSE_zungqr_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zungqr_param + * @sa MORSE_zungqr_param_Tile + * @sa MORSE_cungqr_Tile_Async + * @sa MORSE_dorgqr_Tile_Async + * @sa MORSE_sorgqr_Tile_Async + * @sa MORSE_zgeqrf_Tile_Async + * + ******************************************************************************/ +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zungqr_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zungqr_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zungqr_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(Q) != MORSE_SUCCESS) { + morse_error("MORSE_zungqr_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || Q->nb != Q->mb) { + morse_error("MORSE_zungqr_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return */ +/* + if (N <= 0) + return MORSE_SUCCESS; + */ +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); + morse_pzungqr_param(qrtree, A, Q, TS, TT, Dptr, sequence, request); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zunmlq.c b/compute/zunmlq.c index ca6d7806ad610272cfae956b8a100f0cb7f080b4..1138f4aee3a86d9f12658ede2fc41f97587140a2 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -31,7 +31,8 @@ **/ #include "control/common.h" -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t * @@ -204,7 +205,8 @@ int MORSE_zunmlq(MORSE_enum side, MORSE_enum trans, int M, int N, int K, return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile * @@ -276,7 +278,8 @@ int MORSE_zunmlq_Tile(MORSE_enum side, MORSE_enum trans, return status; } -/***************************************************************************//** +/** + ******************************************************************************* * * @ingroup MORSE_Complex64_t_Tile_Async * diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c new file mode 100644 index 0000000000000000000000000000000000000000..cace1d697c507ea985807303f74c1b50846dc3b0 --- /dev/null +++ b/compute/zunmlq_param.c @@ -0,0 +1,384 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zunmlq_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zunmlq_param - Overwrites the general complex M-by-N matrix C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by MORSE_zgeqrf. Q is of order M if SIDE = MorseLeft + * and of order N if SIDE = MorseRight. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] side + * Intended usage: + * = MorseLeft: apply Q or Q**H from the left; + * = MorseRight: apply Q or Q**H from the right. + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: no transpose, apply Q; + * = MorseConjTrans: conjugate transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the matrix C. M >= 0. + * + * @param[in] N + * The number of columns of the matrix C. N >= 0. + * + * @param[in] K + * The number of rows of elementary tile reflectors whose product defines the matrix Q. + * If side == MorseLeft, M >= K >= 0. + * If side == MorseRight, N >= K >= 0. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,K). + * + * @param[in] descTS + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in] descTT + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] C + * On entry, the M-by-N matrix C. + * On exit, C is overwritten by Q*C or Q**H*C. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zunmlq_param_Tile + * @sa MORSE_zunmlq_param_Tile_Async + * @sa MORSE_cunmlq + * @sa MORSE_dormlq + * @sa MORSE_sormlq + * @sa MORSE_zgelqf + * + ******************************************************************************/ +int MORSE_zunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *C, int LDC) +{ + int NB, An; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descC; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmlq_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + if (side == MorseLeft) + An = M; + else + An = N; + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + morse_error("MORSE_zunmlq_param", "illegal value of side"); + return -1; + } + if ((trans != MorseConjTrans) && (trans != MorseNoTrans)){ + morse_error("MORSE_zunmlq_param", "illegal value of trans"); + return -2; + } + if (M < 0) { + morse_error("MORSE_zunmlq_param", "illegal value of M"); + return -3; + } + if (N < 0) { + morse_error("MORSE_zunmlq_param", "illegal value of N"); + return -4; + } + if ((K < 0) || (K > An)) { + morse_error("MORSE_zunmlq_param", "illegal value of K"); + return -5; + } + if (LDA < chameleon_max(1, K)) { + morse_error("MORSE_zunmlq_param", "illegal value of LDA"); + return -7; + } + if (LDC < chameleon_max(1, M)) { + morse_error("MORSE_zunmlq_param", "illegal value of LDC"); + return -10; + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, K, N); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zunmlq_param", "morse_tune() failed"); + return status; + } + + /* Set MT, NT & NTRHS */ + NB = MORSE_NB; + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descC))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, K, An,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zunmlq_param_Tile_Async( + qrtree, side, trans, &descA, descTS, descTT, &descC, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descC, C, NB, NB, LDC, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descC); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, An, sequence, &request);*/ +/* morse_ziptile2lap( descC, C, NB, NB, LDC, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zunmlq_param_Tile - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal + * matrix (unitary in the complex case) defined as the product of elementary reflectors returned + * by MORSE_zgelqf_Tile Q is of order M. + * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] side + * Intended usage: + * = MorseLeft: apply Q or Q**H from the left; + * = MorseRight: apply Q or Q**H from the right. + * Currently only MorseLeft is supported. + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: no transpose, apply Q; + * = MorseConjTrans: conjugate transpose, apply Q**H. + * Currently only MorseConjTrans is supported. + * + * @param[in] A + * Details of the LQ factorization of the original matrix A as returned by MORSE_zgelqf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgelqf. + * + * @param[in,out] C + * On entry, the M-by-N matrix C. + * On exit, C is overwritten by Q*C or Q**H*C. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zunmlq_param + * @sa MORSE_zunmlq_param_Tile_Async + * @sa MORSE_cunmlq_Tile + * @sa MORSE_dormlq_Tile + * @sa MORSE_sormlq_Tile + * @sa MORSE_zgelqf_Tile + * + ******************************************************************************/ +int MORSE_zunmlq_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmlq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zunmlq_param_Tile_Async(qrtree, side, trans, A, TS, TT, C, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(C); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * Non-blocking equivalent of MORSE_zunmlq_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zunmlq_param + * @sa MORSE_zunmlq_param_Tile + * @sa MORSE_cunmlq_Tile_Async + * @sa MORSE_dormlq_Tile_Async + * @sa MORSE_sormlq_Tile_Async + * @sa MORSE_zgelqf_Tile_Async + * + ******************************************************************************/ +int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmlq_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zunmlq_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zunmlq_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zunmlq_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zunmlq_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zunmlq_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(C) != MORSE_SUCCESS) { + morse_error("MORSE_zunmlq_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || C->nb != C->mb) { + morse_error("MORSE_zunmlq_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ((side != MorseLeft) && (side != MorseRight)) { + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ((trans != MorseConjTrans) && (trans != MorseNoTrans)){ + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ +/* + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; +*/ + +#if defined(CHAMELEON_COPY_DIAG) + { + int m = chameleon_min(A->mt, A->nt) * A->mb; + morse_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + Dptr = &D; + } +#endif + + morse_pzunmlq_param(qrtree, side, trans, A, C, TS, TT, Dptr, sequence, request); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c new file mode 100644 index 0000000000000000000000000000000000000000..2a101ace364e8b379d92255b11be078925c17670 --- /dev/null +++ b/compute/zunmqr_param.c @@ -0,0 +1,391 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file zunmqr_param.c + * + * MORSE computational routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-05-17 + * @precisions normal z -> s d c + * + **/ +#include "control/common.h" + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t + * + * MORSE_zunmqr_param - Overwrites the general complex M-by-N matrix C with + * + * SIDE = 'L' SIDE = 'R' + * TRANS = 'N': Q * C C * Q + * TRANS = 'C': Q**H * C C * Q**H + * + * where Q is a complex unitary matrix defined as the product of k + * elementary reflectors + * + * Q = H(1) H(2) . . . H(k) + * + * as returned by MORSE_zgeqrf. Q is of order M if SIDE = MorseLeft + * and of order N if SIDE = MorseRight. + * + ******************************************************************************* + * + * @param[in] qrtree + * The tree used for the factorization + * + * @param[in] side + * Intended usage: + * = MorseLeft: apply Q or Q**H from the left; + * = MorseRight: apply Q or Q**H from the right. + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: no transpose, apply Q; + * = MorseConjTrans: conjugate transpose, apply Q**H. + * + * @param[in] M + * The number of rows of the matrix C. M >= 0. + * + * @param[in] N + * The number of columns of the matrix C. N >= 0. + * + * @param[in] K + * The number of elementary reflectors whose product defines + * the matrix Q. + * If side == MorseLeft, M >= K >= 0. + * If side == MorseRight, N >= K >= 0. + * + * @param[in] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] LDA + * The leading dimension of the array A. + * If side == MorseLeft, LDA >= max(1,M). + * If side == MorseRight, LDA >= max(1,N). + * + * @param[in] descTS + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[in] descTT + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * + * @param[in,out] C + * On entry, the M-by-N matrix C. + * On exit, C is overwritten by Q*C or Q**H*C. + * + * @param[in] LDC + * The leading dimension of the array C. LDC >= max(1,M). + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa MORSE_zunmqr_param_Tile + * @sa MORSE_zunmqr_param_Tile_Async + * @sa MORSE_cunmqr + * @sa MORSE_dormqr + * @sa MORSE_sormqr + * @sa MORSE_zgeqrf + * + ******************************************************************************/ +int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, + MORSE_enum side, MORSE_enum trans, int M, int N, int K, + MORSE_Complex64_t *A, int LDA, + MORSE_desc_t *descTS, MORSE_desc_t *descTT, + MORSE_Complex64_t *C, int LDC) +{ + int NB, Am; + int status; + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + MORSE_desc_t descA, descC; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmqr_param", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + + if ( side == MorseLeft ) { + Am = M; + } else { + Am = N; + } + + /* Check input arguments */ + if ((side != MorseLeft) && (side != MorseRight)) { + morse_error("MORSE_zunmqr_param", "illegal value of side"); + return -1; + } + if ((trans != MorseConjTrans) && (trans != MorseNoTrans)){ + morse_error("MORSE_zunmqr_param", "illegal value of trans"); + return -2; + } + if (M < 0) { + morse_error("MORSE_zunmqr_param", "illegal value of M"); + return -3; + } + if (N < 0) { + morse_error("MORSE_zunmqr_param", "illegal value of N"); + return -4; + } + if ((K < 0) || (K > Am)) { + morse_error("MORSE_zunmqr_param", "illegal value of K"); + return -5; + } + if (LDA < chameleon_max(1, Am)) { + morse_error("MORSE_zunmqr_param", "illegal value of LDA"); + return -7; + } + if (LDC < chameleon_max(1, M)) { + morse_error("MORSE_zunmqr_param", "illegal value of LDC"); + return -10; + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; + + /* Tune NB & IB depending on M, K & N; Set NBNB */ + status = morse_tune(MORSE_FUNC_ZGELS, M, K, N); + if (status != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param", "morse_tune() failed"); + return status; + } + + /* Set MT, NT & NTRHS */ + NB = MORSE_NB; + morse_sequence_create(morse, &sequence); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, sequence, &request, + morse_desc_mat_free(&(descA)) ); + morse_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, sequence, &request, + morse_desc_mat_free(&(descA)); morse_desc_mat_free(&(descC))); +/* } else {*/ +/* morse_ziplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K,*/ +/* sequence, &request);*/ +/* morse_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N,*/ +/* sequence, &request);*/ +/* }*/ + + /* Call the tile interface */ + MORSE_zunmqr_param_Tile_Async( + qrtree, side, trans, &descA, descTS, descTT, &descC, sequence, &request); + +/* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ + morse_zooptile2lap(descC, C, NB, NB, LDC, N, sequence, &request); + morse_sequence_wait(morse, sequence); + morse_desc_mat_free(&descA); + morse_desc_mat_free(&descC); +/* } else {*/ +/* morse_ziptile2lap( descA, A, NB, NB, LDA, K, sequence, &request);*/ +/* morse_ziptile2lap( descC, C, NB, NB, LDC, N, sequence, &request);*/ +/* morse_sequence_wait(morse, sequence);*/ +/* }*/ + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile + * + * MORSE_zunmqr_param_Tile - overwrites the general M-by-N matrix C with Q*C, where Q is an orthogonal + * matrix (unitary in the complex case) defined as the product of elementary reflectors returned + * by MORSE_zgeqrf_Tile Q is of order M. + * All matrices are passed through descriptors. All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] side + * Intended usage: + * = MorseLeft: apply Q or Q**H from the left; + * = MorseRight: apply Q or Q**H from the right. + * Currently only MorseLeft is supported. + * + * @param[in] trans + * Intended usage: + * = MorseNoTrans: no transpose, apply Q; + * = MorseConjTrans: conjugate transpose, apply Q**H. + * Currently only MorseConjTrans is supported. + * + * @param[in] A + * Details of the QR factorization of the original matrix A as returned by MORSE_zgeqrf. + * + * @param[in] T + * Auxiliary factorization data, computed by MORSE_zgeqrf. + * Can be obtained with MORSE_Alloc_Workspace_zgeqrf + * + * @param[in,out] C + * On entry, the M-by-N matrix C. + * On exit, C is overwritten by Q*C or Q**H*C. + * + ******************************************************************************* + * + * @return + * \retval MORSE_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa MORSE_zunmqr_param + * @sa MORSE_zunmqr_param_Tile_Async + * @sa MORSE_cunmqr_Tile + * @sa MORSE_dormqr_Tile + * @sa MORSE_sormqr_Tile + * @sa MORSE_zgeqrf_Tile + * + ******************************************************************************/ +int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C) +{ + MORSE_context_t *morse; + MORSE_sequence_t *sequence = NULL; + MORSE_request_t request = MORSE_REQUEST_INITIALIZER; + int status; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmqr_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + morse_sequence_create(morse, &sequence); + MORSE_zunmqr_param_Tile_Async(qrtree, side, trans, A, TS, TT, C, sequence, &request); + morse_sequence_wait(morse, sequence); + RUNTIME_desc_getoncpu(A); + RUNTIME_desc_getoncpu(C); + + status = sequence->status; + morse_sequence_destroy(morse, sequence); + return status; +} + +/** + ******************************************************************************* + * + * @ingroup MORSE_Complex64_t_Tile_Async + * + * Non-blocking equivalent of MORSE_zunmqr_param_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa MORSE_zunmqr_param + * @sa MORSE_zunmqr_param_Tile + * @sa MORSE_cunmqr_Tile_Async + * @sa MORSE_dormqr_Tile_Async + * @sa MORSE_sormqr_Tile_Async + * @sa MORSE_zgeqrf_Tile_Async + * + ******************************************************************************/ +int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, + MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *C, + MORSE_sequence_t *sequence, MORSE_request_t *request) +{ + MORSE_context_t *morse; + MORSE_desc_t D, *Dptr = NULL; + + morse = morse_context_self(); + if (morse == NULL) { + morse_fatal_error("MORSE_zunmqr_param_Tile", "MORSE not initialized"); + return MORSE_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + morse_fatal_error("MORSE_zunmqr_param_Tile", "NULL sequence"); + return MORSE_ERR_UNALLOCATED; + } + if (request == NULL) { + morse_fatal_error("MORSE_zunmqr_param_Tile", "NULL request"); + return MORSE_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == MORSE_SUCCESS) + request->status = MORSE_SUCCESS; + else + return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); + + /* Check descriptors for correctness */ + if (morse_desc_check(A) != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param_Tile", "invalid first descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TS) != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param_Tile", "invalid second descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(TT) != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param_Tile", "invalid third descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(C) != MORSE_SUCCESS) { + morse_error("MORSE_zunmqr_param_Tile", "invalid fourth descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb || C->nb != C->mb) { + morse_error("MORSE_zunmqr_param_Tile", "only square tiles supported"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ((side != MorseLeft) && (side != MorseRight)) { + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if ((trans != MorseConjTrans) && (trans != MorseNoTrans)){ + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + /* Quick return - currently NOT equivalent to LAPACK's: + * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, C, LDC ) */ +/* + if (chameleon_min(M, chameleon_min(N, K)) == 0) + return MORSE_SUCCESS; +*/ + +#if defined(CHAMELEON_COPY_DIAG) + { + int n = chameleon_min(A->mt, A->nt) * A->nb; + morse_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + Dptr = &D; + } +#endif + + morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, Dptr, sequence, request); + + if (Dptr != NULL) { + morse_desc_mat_free(Dptr); + } + (void)D; + return MORSE_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index bb930ba0ff84322f11b7c5969ea268ae7a91b831..f9608bdb219ae7800a2e1c590fc1d7f29e695c2f 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -37,14 +37,14 @@ (m), (n), (i), (j), (m), (n), p, q); \ morse_desc_mat_alloc( &(descA) ); -#define morse_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ - descA = morse_desc_init( \ - MorseComplexDouble, (mb), (nb), ((mb)*(nb)), \ - (m), (n), (i), (j), (m), (n), 1, 1); \ - if ( morse_desc_mat_alloc( &(descA) ) ) { \ - morse_error( __func__, "morse_desc_mat_alloc() failed"); \ - {free;}; \ - return MORSE_ERR_OUT_OF_RESOURCES; \ +#define morse_zdesc_alloc( descA, mb, nb, lm, ln, i, j, m, n, free) \ + descA = morse_desc_init( \ + MorseComplexDouble, (mb), (nb), ((mb)*(nb)), \ + (m), (n), (i), (j), (m), (n), 1, 1); \ + if ( morse_desc_mat_alloc( &(descA) ) ) { \ + morse_error( __func__, "morse_desc_mat_alloc() failed"); \ + {free;}; \ + return MORSE_ERR_OUT_OF_RESOURCES; \ } #define morse_zooplap2tile( descA, A, mb, nb, lm, ln, i, j, m, n, seq, req, free) \ @@ -154,3 +154,20 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_d void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request ); + +void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, + MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, + MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, + MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index 24d5dfa796c126d4f50a02912ebf3b2b05400be1..0edbcc62fb836fc711e0f25bd8d2521b4768801c 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -133,7 +133,7 @@ int CORE_zttmlq(MORSE_enum side, MORSE_enum trans, MORSE_Complex64_t *WORK, int LDWORK) { int i, i1, i3, l; - int NQ, NW; + int NW; int kb; int ic = 0; int jc = 0; @@ -150,11 +150,9 @@ int CORE_zttmlq(MORSE_enum side, MORSE_enum trans, /* NQ is the order of Q */ if (side == MorseLeft) { - NQ = N2; NW = IB; } else { - NQ = M2; NW = N1; } @@ -198,7 +196,7 @@ int CORE_zttmlq(MORSE_enum side, MORSE_enum trans, coreblas_error(12, "Illegal value of LDA2"); return -12; } - if (LDV < chameleon_max(1,NQ)){ + if (LDV < chameleon_max(1,K)){ coreblas_error(14, "Illegal value of LDV"); return -14; } diff --git a/coreblas/include/coreblas.h b/coreblas/include/coreblas.h index f67d51a969cee3221da0ff62303d7ab2e939d595..590e48d62d730e41fd3a4f99300c2a55941decaf 100644 --- a/coreblas/include/coreblas.h +++ b/coreblas/include/coreblas.h @@ -58,6 +58,7 @@ #include "coreblas/include/coreblas_s.h" #include "coreblas/include/coreblas_zc.h" #include "coreblas/include/coreblas_ds.h" +#include <assert.h> #ifdef __cplusplus extern "C" { @@ -66,7 +67,10 @@ extern "C" { /** **************************************************************************** * Coreblas Error **/ -#define coreblas_error(k, str) fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str) +#define coreblas_error(k, str) do { \ + fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str) ; \ + assert(0); \ + } while(0) /** **************************************************************************** * CBlas enum diff --git a/hqr b/hqr new file mode 160000 index 0000000000000000000000000000000000000000..2bc36789f26039bc6eefe03f6825a383847724f5 --- /dev/null +++ b/hqr @@ -0,0 +1 @@ +Subproject commit 2bc36789f26039bc6eefe03f6825a383847724f5 diff --git a/include/chameleon_config.h.in b/include/chameleon_config.h.in index 83350ba3a9532ab721de01357d3bddb647e13abc..d6c819fc6c6b3b25af2c667a2ed939ac36564c03 100644 --- a/include/chameleon_config.h.in +++ b/include/chameleon_config.h.in @@ -45,5 +45,4 @@ /* Simulating */ #cmakedefine CHAMELEON_SIMULATION - #endif /* CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED */ diff --git a/include/morse.h.in b/include/morse.h.in index b0460b6950bfbf8c3f5967742c730421143cec7b..0a3f3ae0a23d3076c1a51c95f5c61449a5db279d 100644 --- a/include/morse.h.in +++ b/include/morse.h.in @@ -32,7 +32,6 @@ #define MORSE_VERSION_MINOR @MORSE_VERSION_MINOR@ #define MORSE_VERSION_MICRO @MORSE_VERSION_MICRO@ - /* **************************************************************************** * MORSE types and constants */ @@ -122,6 +121,7 @@ int MORSE_Sequence_Wait (MORSE_sequence_t *sequence); } #endif +#include "libhqr.h" #include "morse_z.h" #include "morse_c.h" #include "morse_d.h" diff --git a/include/morse_z.h b/include/morse_z.h index 65f911f00a7f257fea0d897da102c1c8294ef9fa..cd41b2fac3cbd2214efd1ae8398755d08f60e4f5 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -273,9 +273,51 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, //int MORSE_zgecfi_Async(int m, int n, MORSE_Complex64_t *A, MORSE_enum f_in, int imb, int inb, MORSE_enum f_out, int omb, int onb, MORSE_sequence_t *sequence, MORSE_request_t *request); //int MORSE_zgetmi_Async(int m, int n, MORSE_Complex64_t *A, MORSE_enum f_in, int mb, int inb, MORSE_sequence_t *sequence, MORSE_request_t *request); +/** + * Declarations of libhqr dependent functions. + */ /** **************************************************************************** - * Declarations of workspace allocation functions (tile layout) - alphabetical order + * Declarations of math functions (LAPACK layout) - alphabetical order + **/ +int MORSE_zgels_param(const libhqr_tree_t *qrtree, MORSE_enum trans, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zgelqf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); +int MORSE_zgelqs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zgeqrf_param(const libhqr_tree_t *qrtree, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT); +int MORSE_zgeqrs_param(const libhqr_tree_t *qrtree, int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunglq_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zungqr_param(const libhqr_tree_t *qrtree, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *descTT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descT, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +int MORSE_zunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, int M, int N, int K, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descTS, MORSE_desc_t *TT, MORSE_Complex64_t *B, int LDB); +/** **************************************************************************** + * Declarations of math functions (tile layout) - alphabetical order **/ +int MORSE_zgels_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zgelqf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); +int MORSE_zgelqs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zgeqrf_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT); +int MORSE_zgeqrs_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zunglq_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zunmlq_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +int MORSE_zunmqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B); +/** **************************************************************************** + * Declarations of math functions (tile layout, asynchronous execution) - alphabetical order + **/ +int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zgelqf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zgelqs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); +int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); + +/** + * Declarations of workspace allocation functions (tile layout) - alphabetical order + */ int MORSE_Alloc_Workspace_zgesv_incpiv( int N, MORSE_desc_t **descL, int **IPIV, int p, int q); int MORSE_Alloc_Workspace_zgetrf_incpiv(int M, int N, MORSE_desc_t **descL, int **IPIV, int p, int q); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 0bd42f7ce230e7afe08e49f33f6e4b94ff32ef9d..beb55c62bf9fefc860da43203051f72475ccfde5 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -64,6 +64,8 @@ set(ZSRC # LAPACK ################## testing_zgels.c + testing_zgels_hqr.c + testing_zgels_systolic.c #testing_zgesv.c testing_zgesv_incpiv.c #testing_zgetri.c diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index ccb9f5b675e919c9059a6fcc3ef343d671d045fd..c68d4e5dd53a2d76c5196a40bc71acee6cbf62b7 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -39,6 +39,16 @@ foreach(cat ${TEST_CATEGORIES}) add_test(test_${cat}_${prec}gels_lq ./${prec}${TEST_CMD_${cat}} GELS 0 400 800 825 25 810) add_test(test_${cat}_${prec}gels_hlq ./${prec}${TEST_CMD_${cat}} GELS 1 400 800 825 25 810 4) add_test(test_${cat}_${prec}gesv_incpiv ./${prec}${TEST_CMD_${cat}} GESV_INCPIV 800 825 25 810) + + add_test(test_${cat}_${prec}gels_hqr_greedy ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1000 1000 10 1000 4 -1 1 -1 0) + add_test(test_${cat}_${prec}gels_hqr_fibonacci ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1000 1000 10 1000 4 -1 2 -1 0) + add_test(test_${cat}_${prec}gels_hqr_binary ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1000 1000 10 1000 4 -1 3 -1 0) + add_test(test_${cat}_${prec}gels_hlq_greedy ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1500 1000 10 1000 4 -1 1 -1 0) + add_test(test_${cat}_${prec}gels_hlq_fibonacci ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1500 1000 10 1000 4 -1 2 -1 0) + add_test(test_${cat}_${prec}gels_hlq_binary ./${prec}${TEST_CMD_${cat}} GELS_HQR 1000 1500 1000 10 1000 4 -1 3 -1 0) + + add_test(test_${cat}_${prec}gels_rq_systolic ./${prec}${TEST_CMD_${cat}} GELS_SYSTOLIC 1000 1000 1000 10 1000 3 2) + add_test(test_${cat}_${prec}gels_lq_systolic ./${prec}${TEST_CMD_${cat}} GELS_SYSTOLIC 1000 1500 1000 10 1000 3 2) endif() endforeach() diff --git a/testing/testing_zauxiliary.c b/testing/testing_zauxiliary.c index 60be56b932ffbfba7ad210f7d9d709ee795f9f0a..748c56694aac865dceb215c7d6579ba300d46b67 100644 --- a/testing/testing_zauxiliary.c +++ b/testing/testing_zauxiliary.c @@ -185,8 +185,8 @@ int main (int argc, char **argv) else*/ MORSE_Init( ncores, ngpus); MORSE_Disable(MORSE_AUTOTUNING); - MORSE_Set(MORSE_TILE_SIZE, 320 ); - MORSE_Set(MORSE_INNER_BLOCK_SIZE, 48 ); + MORSE_Set(MORSE_TILE_SIZE, 320 ); + MORSE_Set(MORSE_INNER_BLOCK_SIZE, 48 ); argc -= 4; argv += 4; @@ -248,6 +248,12 @@ int main (int argc, char **argv) else if ( strcmp(func, "GESV_INCPIV") == 0 ) { info += testing_zgesv_incpiv( argc, argv ); } + else if ( strcmp(func, "GELS_HQR") == 0 ) { + info += testing_zgels_hqr( argc, argv ); + } + else if ( strcmp(func, "GELS_SYSTOLIC") == 0 ) { + info += testing_zgels_systolic( argc, argv ); + } /* else if ( strcmp(func, "GESV") == 0 ) { */ /* info += testing_zgesv( argc, argv ); */ /* } */ diff --git a/testing/testing_zauxiliary.h b/testing/testing_zauxiliary.h index 1fc57362bef19393d323fe6c61e3eaef7468ce2e..2285df339ecd93a5bd4b7c09b25948bf0c21e6aa 100644 --- a/testing/testing_zauxiliary.h +++ b/testing/testing_zauxiliary.h @@ -92,6 +92,8 @@ int testing_zgeadd(int argc, char **argv); int testing_zposv(int argc, char **argv); int testing_zgels(int argc, char **argv); +int testing_zgels_hqr(int argc, char **argv); +int testing_zgels_systolic(int argc, char **argv); int testing_zgesv(int argc, char **argv); int testing_zgesv_incpiv(int argc, char **argv); diff --git a/testing/testing_zgels.c b/testing/testing_zgels.c index 10ca88cd11174a9eb9ced06e8fabf166175f4de1..d745f30bf06b17b970f257d796692ce4515ecc0e 100644 --- a/testing/testing_zgels.c +++ b/testing/testing_zgels.c @@ -280,16 +280,12 @@ int testing_zgels(int argc, char **argv) /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); - for (i = 0; i < M; i++) - for (j = 0; j < N; j++) - A2[LDA*j+i] = A1[LDA*j+i] ; + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); /* Initialize B1 and B2 */ memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); - for (i = 0; i < M; i++) - for (j = 0; j < NRHS; j++) - B2[LDB*j+i] = B1[LDB*j+i] ; + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); /* MORSE ZGELS */ MORSE_zgels(MorseNoTrans, M, N, NRHS, A2, LDA, T, B2, LDB); diff --git a/testing/testing_zgels_hqr.c b/testing/testing_zgels_hqr.c new file mode 100644 index 0000000000000000000000000000000000000000..ded459e296149f94163a5544d5e03c9318c34625 --- /dev/null +++ b/testing/testing_zgels_hqr.c @@ -0,0 +1,508 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file testing_zgels_hqr.c + * + * MORSE testing routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Boucherie Raphael + * @date 2017-05-17 + * @precisions normal z -> c d s + * + **/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <assert.h> + +#include <morse.h> +#include <coreblas/include/cblas.h> +#include <coreblas/include/lapacke.h> +#include <coreblas/include/coreblas.h> +#include "testing_zauxiliary.h" + +#undef REAL +#define COMPLEX + +static int check_orthogonality(int, int, int, MORSE_Complex64_t*, double); +static int check_factorization(int, int, MORSE_Complex64_t*, MORSE_Complex64_t*, int, MORSE_Complex64_t*, double); +static int check_solution(int, int, int, MORSE_Complex64_t*, int, MORSE_Complex64_t*, MORSE_Complex64_t*, int, double); + +int testing_zgels_hqr(int argc, char **argv) +{ + int hres = 0; + + if ( argc < 1 ){ + goto usage; + } + + /* Check for number of arguments*/ + if ( argc != 10 ) { + usage: + USAGE("GELS_HQR", "M N LDA NRHS LDB", + " - M : number of rows of the matrix A\n" + " - N : number of columns of the matrix A\n" + " - LDA : leading dimension of the matrix A\n" + " - NRHS : number of RHS\n" + " - LDB : leading dimension of the matrix B\n" + " - QR_A : Size of TS domain\n" + " - QR_P : Size of high level tree for distributed mode (defaut: -1)\n" + " - LLVL : tree used for low level reduction insides nodes (defaut: -1)\n" + " - HLVL : tree used for high level reduction between nodes, only if qr_p > 1(default: -1)\n" + " - DOMINO : Enable/Disable the domino between upper and lower trees (defaut: -1)\n" + ); + return -1; + } + + int M = atoi(argv[0]); + int N = atoi(argv[1]); + int LDA = chameleon_max( atoi(argv[2]), M ); + int NRHS = atoi(argv[3]); + int LDB = chameleon_max( chameleon_max( atoi(argv[4]), M ), N ); + int qr_a = atoi(argv[5]); + int qr_p = atoi(argv[6]); + int llvl = atoi(argv[7]); + int hlvl = atoi(argv[8]); + int domino = atoi(argv[9]); + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + + int K = min(M, N); + double eps; + int info_ortho, info_solution, info_factorization; + int LDAxN = LDA*N; + int LDBxNRHS = LDB*NRHS; + + MORSE_Complex64_t *A1 = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *A2 = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *B1 = (MORSE_Complex64_t *)malloc(LDB*NRHS*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *B2 = (MORSE_Complex64_t *)malloc(LDB*NRHS*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Q = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_desc_t *TS; + MORSE_desc_t *TT = NULL; + + /* Check if unable to allocate memory */ + if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)){ + printf("Out of Memory \n "); + return -2; + } + + MORSE_Alloc_Workspace_zgels(M, N, &TS, 1, 1); + MORSE_Alloc_Workspace_zgels(M, N, &TT, 1, 1); + memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MORSE_Complex64_t)); + memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MORSE_Complex64_t)); + + eps = LAPACKE_dlamch_work( 'e' ); + + /*---------------------------------------------------------- + * TESTING ZGEQRF_PARAM + */ + + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + libhqr_hqr_init( &qrtree, + ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, llvl, hlvl, qr_a, qr_p, domino, 0); + + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + /* MORSE ZGELS */ + MORSE_zgels_param(&qrtree, MorseNoTrans, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + + /* MORSE ZGELS */ + if (M >= N) + /* Building the economy-size Q */ + MORSE_zungqr_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + else + /* Building the economy-size Q */ + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELS_HQR ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELS_HQR ............... PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("************************************************\n"); + printf(" - TESTING ZGELS_HQR ... FAILED !\n"); hres++; + printf("************************************************\n"); + } + + /*------------------------------------------------------------- + * TESTING ZGEQRF + ZGEQRS or ZGELQF + ZGELQS + */ + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + if (M >= N) { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGEQRF + ZGEQRS ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n", eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Morse routines */ + MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgeqrs_param( &qrtree, M, N, NRHS, A2, LDA, TS,TT, B2, LDB); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGEQRF + ZGEQRS ............ PASSED !\n"); + printf("***************************************************\n"); + } + else{ + printf("***************************************************\n"); + printf(" - TESTING ZGEQRF + ZGEQRS ... FAILED !\n"); + printf("***************************************************\n"); + } + } + else { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELQF + ZGELQS ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n", eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Morse routines */ + MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); + //MORSE_zgelqf(M, N, A2, LDA, TS); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + //MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELQF + ZGELQS ............ PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" - TESTING ZGELQF + ZGELQS ... FAILED !\n"); + printf("***************************************************\n"); + } + } + + /*---------------------------------------------------------- + * TESTING ZGEQRF + ZORMQR + ZTRSM + */ + + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + /* Initialize Q */ + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA ); + + /* MORSE ZGEQRF+ ZUNMQR + ZTRSM */ + if (M >= N) { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGEQRF + ZUNMQR + ZTRSM ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zunmqr_param( &qrtree, MorseLeft, MorseConjTrans, M, NRHS, N, A2, LDA, TS, TT, B2, LDB); + MORSE_ztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); + } + else { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELQF + ZUNMLQ + ZTRSM ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); + MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); + } + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) { + if (M >= N) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGEQRF + ZUNMQR + ZTRSM .... PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELQF + ZTRSM + ZUNMLQ .... PASSED !\n"); + printf("***************************************************\n"); + } + } + else { + if (M >= N) { + printf("***************************************************\n"); + printf(" - TESTING ZGEQRF + ZUNMQR + ZTRSM ... FAILED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" - TESTING ZGELQF + ZTRSM + ZUNMLQ ... FAILED !\n"); + printf("***************************************************\n"); + } + } + + libhqr_matrix_finalize( &qrtree ); + + free(A1); free(A2); free(B1); free(B2); free(Q); + MORSE_Dealloc_Workspace( &TS ); + MORSE_Dealloc_Workspace( &TT ); + + return hres; +} + +/*------------------------------------------------------------------- + * Check the orthogonality of Q + */ + +static int check_orthogonality(int M, int N, int LDQ, MORSE_Complex64_t *Q, double eps) +{ + double alpha, beta; + double normQ; + int info_ortho; + int minMN = min(M, N); + + double *work = (double *)malloc(minMN*sizeof(double)); + + alpha = 1.0; + beta = -1.0; + + /* Build the idendity matrix USE DLASET?*/ + MORSE_Complex64_t *Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t)); + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN ); + + /* Perform Id - Q'Q */ + if (M >= N) + cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N); + else + cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); + + normQ = LAPACKE_zlansy( LAPACK_COL_MAJOR, 'I', 'U', minMN, Id, minMN ); + + printf("============\n"); + printf("Checking the orthogonality of Q \n"); + printf("||Id-Q'*Q||_oo / (N*eps) = %e \n", normQ/(minMN*eps)); + + if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) { + printf("-- Orthogonality is suspicious ! \n"); + info_ortho=1; + } + else { + printf("-- Orthogonality is CORRECT ! \n"); + info_ortho=0; + } + + free(work); free(Id); + + return info_ortho; +} + +/*------------------------------------------------------------ + * Check the factorization QR + */ + +static int check_factorization(int M, int N, MORSE_Complex64_t *A1, MORSE_Complex64_t *A2, int LDA, MORSE_Complex64_t *Q, double eps ) +{ + double Anorm, Rnorm; + MORSE_Complex64_t alpha, beta; + int info_factorization; + int i,j; + + MORSE_Complex64_t *Ql = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); + double *work = (double *)malloc(max(M,N)*sizeof(double)); + + alpha=1.0; + beta=0.0; + + if (M >= N) { + /* Extract the R */ + MORSE_Complex64_t *R = (MORSE_Complex64_t *)malloc(N*N*sizeof(MORSE_Complex64_t)); + memset((void*)R, 0, N*N*sizeof(MORSE_Complex64_t)); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); + + /* Perform Ql=Q*R */ + memset((void*)Ql, 0, M*N*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M); + free(R); + } + else { + /* Extract the L */ + MORSE_Complex64_t *L = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + memset((void*)L, 0, M*M*sizeof(MORSE_Complex64_t)); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); + + /* Perform Ql=LQ */ + memset((void*)Ql, 0, M*N*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M); + free(L); + } + + /* Compute the Residual */ + for (i = 0; i < M; i++) + for (j = 0 ; j < N; j++) + Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; + + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, Residual, M ); + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, A2, LDA ); + + if (M >= N) { + printf("============\n"); + printf("Checking the QR Factorization \n"); + printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + else { + printf("============\n"); + printf("Checking the LQ Factorization \n"); + printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + + if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) { + printf("-- Factorization is suspicious ! \n"); + info_factorization = 1; + } + else { + printf("-- Factorization is CORRECT ! \n"); + info_factorization = 0; + } + + free(work); free(Ql); free(Residual); + + return info_factorization; +} + +/*-------------------------------------------------------------- + * Check the solution + */ + +static int check_solution(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, MORSE_Complex64_t *X, int LDB, double eps) +{ + int info_solution; + double Rnorm, Anorm, Xnorm, Bnorm; + MORSE_Complex64_t alpha, beta; + double result; + double *work = (double *)malloc(max(M, N)* sizeof(double)); + + alpha = 1.0; + beta = -1.0; + + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, A, LDA ); + Bnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', N, NRHS, B, LDB ); + Xnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, NRHS, X, LDB ); + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A, LDA, X, LDB, CBLAS_SADDR(beta), B, LDB); + + if (M >= N) { + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M*NRHS*sizeof(MORSE_Complex64_t)); + memset((void*)Residual, 0, M*NRHS*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Residual, M); + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, NRHS, Residual, M ); + free(Residual); + } + else { + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(N*NRHS*sizeof(MORSE_Complex64_t)); + memset((void*)Residual, 0, N*NRHS*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Residual, N); + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', N, NRHS, Residual, N ); + free(Residual); + } + + if (getenv("MORSE_TESTING_VERBOSE")) + printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm ); + + result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ; + printf("============\n"); + printf("Checking the Residual of the solution \n"); + printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result); + + if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + printf("-- The solution is suspicious ! \n"); + info_solution = 1; + } + else{ + printf("-- The solution is CORRECT ! \n"); + info_solution = 0; + } + free(work); + return info_solution; +} diff --git a/testing/testing_zgels_systolic.c b/testing/testing_zgels_systolic.c new file mode 100644 index 0000000000000000000000000000000000000000..fce16bfc8fa637f8616d1da1b2475345567ca569 --- /dev/null +++ b/testing/testing_zgels_systolic.c @@ -0,0 +1,506 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file testing_zgels_systolic.c + * + * MORSE testing routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Boucherie Raphael + * @date 2017-05-17 + * @precisions normal z -> c d s + * + **/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <assert.h> + +#include <morse.h> +#include <coreblas/include/cblas.h> +#include <coreblas/include/lapacke.h> +#include <coreblas/include/coreblas.h> +#include "testing_zauxiliary.h" + +#undef REAL +#define COMPLEX + +static int check_orthogonality(int, int, int, MORSE_Complex64_t*, double); +static int check_factorization(int, int, MORSE_Complex64_t*, MORSE_Complex64_t*, int, MORSE_Complex64_t*, double); +static int check_solution(int, int, int, MORSE_Complex64_t*, int, MORSE_Complex64_t*, MORSE_Complex64_t*, int, double); + +int testing_zgels_systolic(int argc, char **argv) +{ + int hres = 0; + + if ( argc < 1 ){ + goto usage; + } + + /* Check for number of arguments*/ + if ( argc != 7 ) { + usage: + USAGE("GELS_SYSTOLIC", "M N LDA NRHS LDB", + " - M : number of rows of the matrix A\n" + " - N : number of columns of the matrix A\n" + " - LDA : leading dimension of the matrix A\n" + " - NRHS : number of RHS\n" + " - LDB : leading dimension of the matrix B\n" + " - P : size of the highest level reduction tree\n" + " - Q : size of the middle reduction trees\n" + ); + return -1; + } + + int M = atoi(argv[0]); + int N = atoi(argv[1]); + int LDA = max( atoi(argv[2]), M ); + int NRHS = atoi(argv[3]); + int LDB = max( max( atoi(argv[4]), M ), N ); + int p = atoi(argv[5]); + int q = atoi(argv[6]); + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + + int K = min(M, N); + double eps; + int info_ortho, info_solution, info_factorization; + int LDAxN = LDA*N; + int LDBxNRHS = LDB*NRHS; + + MORSE_Complex64_t *A1 = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *A2 = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *B1 = (MORSE_Complex64_t *)malloc(LDB*NRHS*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *B2 = (MORSE_Complex64_t *)malloc(LDB*NRHS*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Q = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t)); + MORSE_desc_t *TS; + MORSE_desc_t *TT = NULL; + + /* Check if unable to allocate memory */ + if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)){ + printf("Out of Memory \n "); + return -2; + } + + MORSE_Alloc_Workspace_zgels(M, N, &TS, 1, 1); + MORSE_Alloc_Workspace_zgels(M, N, &TT, 1, 1); + memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MORSE_Complex64_t)); + memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MORSE_Complex64_t)); + + eps = LAPACKE_dlamch_work( 'e' ); + + /*---------------------------------------------------------- + * TESTING ZGEQRF_PARAM + */ + + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + libhqr_systolic_init( &qrtree, + ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, p, q ); + + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + /* MORSE ZGELS */ + MORSE_zgels_param(&qrtree, MorseNoTrans, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + //MORSE_zgels(MorseNoTrans, M, N, NRHS, A2, LDA, TS, B2, LDB); + + /* MORSE ZGELS */ + if (M >= N) + /* Building the economy-size Q */ + MORSE_zungqr_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + else + /* Building the economy-size Q */ + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + + + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELS_SYSTOLIC ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELS_SYSTOLIC ............... PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("************************************************\n"); + printf(" - TESTING ZGELS_SYSTOLIC ... FAILED !\n"); hres++; + printf("************************************************\n"); + } + + /*------------------------------------------------------------- + * TESTING ZGEQRF + ZGEQRS or ZGELQF + ZGELQS + */ + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + if (M >= N) { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGEQRF + ZGEQRS ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n", eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Morse routines */ + MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgeqrs_param( &qrtree, M, N, NRHS, A2, LDA, TS,TT, B2, LDB); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGEQRF + ZGEQRS ............ PASSED !\n"); + printf("***************************************************\n"); + } + else{ + printf("***************************************************\n"); + printf(" - TESTING ZGEQRF + ZGEQRS ... FAILED !\n"); + printf("***************************************************\n"); + } + } + else { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELQF + ZGELQS ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n", eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Morse routines */ + MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zgelqs_param(&qrtree, M, N, NRHS, A2, LDA, TS, TT, B2, LDB); + //MORSE_zgelqs(M, N, NRHS, A2, LDA, TS, B2, LDB); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELQF + ZGELQS ............ PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" - TESTING ZGELQF + ZGELQS ... FAILED !\n"); + printf("***************************************************\n"); + } + } + + /*---------------------------------------------------------- + * TESTING ZGEQRF + ZORMQR + ZTRSM + */ + + /* Initialize A1 and A2 */ + LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA ); + + /* Initialize B1 and B2 */ + memset(B2, 0, LDB*NRHS*sizeof(MORSE_Complex64_t)); + LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, NRHS, B1, LDB, B2, LDB ); + + /* Initialize Q */ + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA ); + + /* MORSE ZGEQRF+ ZUNMQR + ZTRSM */ + if (M >= N) { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGEQRF + ZUNMQR + ZTRSM ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + MORSE_zgeqrf_param( &qrtree, M, N, A2, LDA, TS, TT ); + MORSE_zungqr_param( &qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + MORSE_zunmqr_param( &qrtree, MorseLeft, MorseConjTrans, M, NRHS, N, A2, LDA, TS, TT, B2, LDB); + MORSE_ztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); + } + else { + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELQF + ZUNMLQ + ZTRSM ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, N); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + MORSE_zgelqf_param(&qrtree, M, N, A2, LDA, TS, TT); + MORSE_ztrsm(MorseLeft, MorseLower, MorseNoTrans, MorseNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB); + MORSE_zunglq_param(&qrtree, M, N, K, A2, LDA, TS, TT, Q, LDA); + //MORSE_zunglq(M, N, K, A2, LDA, TS, Q, LDA); + MORSE_zunmlq_param(&qrtree, MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, TT, B2, LDB); + //MORSE_zunmlq(MorseLeft, MorseConjTrans, N, NRHS, M, A2, LDA, TS, B2, LDB); + } + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality(M, N, LDA, Q, eps); + info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps); + info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps); + + if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) { + if (M >= N) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGEQRF + ZUNMQR + ZTRSM .... PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELQF + ZTRSM + ZUNMLQ .... PASSED !\n"); + printf("***************************************************\n"); + } + } + else { + if (M >= N) { + printf("***************************************************\n"); + printf(" - TESTING ZGEQRF + ZUNMQR + ZTRSM ... FAILED !\n"); + printf("***************************************************\n"); + } + else { + printf("***************************************************\n"); + printf(" - TESTING ZGELQF + ZTRSM + ZUNMLQ ... FAILED !\n"); + printf("***************************************************\n"); + } + } + + libhqr_matrix_finalize( &qrtree ); + + free(A1); free(A2); free(B1); free(B2); free(Q); + MORSE_Dealloc_Workspace( &TS ); + MORSE_Dealloc_Workspace( &TT ); + + return hres; +} + +/*------------------------------------------------------------------- + * Check the orthogonality of Q + */ + +static int check_orthogonality(int M, int N, int LDQ, MORSE_Complex64_t *Q, double eps) +{ + double alpha, beta; + double normQ; + int info_ortho; + int minMN = min(M, N); + + double *work = (double *)malloc(minMN*sizeof(double)); + + alpha = 1.0; + beta = -1.0; + + /* Build the idendity matrix USE DLASET?*/ + MORSE_Complex64_t *Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t)); + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN ); + + /* Perform Id - Q'Q */ + if (M >= N) + cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N); + else + cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); + + normQ = LAPACKE_zlansy( LAPACK_COL_MAJOR, 'I', 'U', minMN, Id, minMN ); + + printf("============\n"); + printf("Checking the orthogonality of Q \n"); + printf("||Id-Q'*Q||_oo / (N*eps) = %e \n", normQ/(minMN*eps)); + + if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) { + printf("-- Orthogonality is suspicious ! \n"); + info_ortho=1; + } + else { + printf("-- Orthogonality is CORRECT ! \n"); + info_ortho=0; + } + + free(work); free(Id); + + return info_ortho; +} + +/*------------------------------------------------------------ + * Check the factorization QR + */ + +static int check_factorization(int M, int N, MORSE_Complex64_t *A1, MORSE_Complex64_t *A2, int LDA, MORSE_Complex64_t *Q, double eps ) +{ + double Anorm, Rnorm; + MORSE_Complex64_t alpha, beta; + int info_factorization; + int i,j; + + MORSE_Complex64_t *Ql = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M*N*sizeof(MORSE_Complex64_t)); + double *work = (double *)malloc(max(M,N)*sizeof(double)); + + alpha=1.0; + beta=0.0; + + if (M >= N) { + /* Extract the R */ + MORSE_Complex64_t *R = (MORSE_Complex64_t *)malloc(N*N*sizeof(MORSE_Complex64_t)); + memset((void*)R, 0, N*N*sizeof(MORSE_Complex64_t)); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); + + /* Perform Ql=Q*R */ + memset((void*)Ql, 0, M*N*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M); + free(R); + } + else { + /* Extract the L */ + MORSE_Complex64_t *L = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + memset((void*)L, 0, M*M*sizeof(MORSE_Complex64_t)); + LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); + + /* Perform Ql=LQ */ + memset((void*)Ql, 0, M*N*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M); + free(L); + } + + /* Compute the Residual */ + for (i = 0; i < M; i++) + for (j = 0 ; j < N; j++) + Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; + + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, Residual, M ); + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, A2, LDA ); + + if (M >= N) { + printf("============\n"); + printf("Checking the QR Factorization \n"); + printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + else { + printf("============\n"); + printf("Checking the LQ Factorization \n"); + printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + + if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) { + printf("-- Factorization is suspicious ! \n"); + info_factorization = 1; + } + else { + printf("-- Factorization is CORRECT ! \n"); + info_factorization = 0; + } + + free(work); free(Ql); free(Residual); + + return info_factorization; +} + +/*-------------------------------------------------------------- + * Check the solution + */ + +static int check_solution(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, MORSE_Complex64_t *X, int LDB, double eps) +{ + int info_solution; + double Rnorm, Anorm, Xnorm, Bnorm; + MORSE_Complex64_t alpha, beta; + double result; + double *work = (double *)malloc(max(M, N)* sizeof(double)); + + alpha = 1.0; + beta = -1.0; + + Anorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, N, A, LDA ); + Bnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', N, NRHS, B, LDB ); + Xnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, NRHS, X, LDB ); + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A, LDA, X, LDB, CBLAS_SADDR(beta), B, LDB); + + if (M >= N) { + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M*NRHS*sizeof(MORSE_Complex64_t)); + memset((void*)Residual, 0, M*NRHS*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Residual, M); + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', M, NRHS, Residual, M ); + free(Residual); + } + else { + MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(N*NRHS*sizeof(MORSE_Complex64_t)); + memset((void*)Residual, 0, N*NRHS*sizeof(MORSE_Complex64_t)); + cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Residual, N); + Rnorm = LAPACKE_zlange( LAPACK_COL_MAJOR, 'I', N, NRHS, Residual, N ); + free(Residual); + } + + if (getenv("MORSE_TESTING_VERBOSE")) + printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm ); + + result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ; + printf("============\n"); + printf("Checking the Residual of the solution \n"); + printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result); + + if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + printf("-- The solution is suspicious ! \n"); + info_solution = 1; + } + else{ + printf("-- The solution is CORRECT ! \n"); + info_solution = 0; + } + free(work); + return info_solution; +} diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index 61334fa4209b3e44a91e7381968027af4fe67b1e..c61405a71ab37b16b003fd4394255a8d1a2c16a6 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -85,6 +85,8 @@ if (NOT CHAMELEON_SIMULATION) time_zgels.c time_zgels_tile.c time_zgeqrf.c + time_zgeqrf_hqr.c + time_zgeqrf_hqr_tile.c time_zgeqrf_tile.c time_zgelqf.c time_zgelqf_tile.c @@ -237,6 +239,7 @@ if(NOT CHAMELEON_SIMULATION) ${CBLAS_LIBRARIES} ${LAPACK_SEQ_LIBRARIES} ${BLAS_SEQ_LIBRARIES} + ${LIBHQR_LIBRARIES} ${HWLOC_LIBRARIES} ${EXTRA_LIBRARIES} ) @@ -246,6 +249,7 @@ if(NOT CHAMELEON_SIMULATION) link_directories(${LAPACK_LIBRARY_DIRS}) link_directories(${CBLAS_LIBRARY_DIRS}) link_directories(${BLAS_LIBRARY_DIRS}) + link_directories(${LIBHQR_LIBRARY_DIRS}) else() diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c new file mode 100644 index 0000000000000000000000000000000000000000..2b7eca98722fdbc2888bb6475ae4ceebc62b1acb --- /dev/null +++ b/timing/time_zgeqrf_hqr.c @@ -0,0 +1,103 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file time_zgeqrf_hqr.c + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-06-08 + * @precisions normal z -> c d s + * + **/ +#define _TYPE MORSE_Complex64_t +#define _PREC double +#define _LAMCH LAPACKE_dlamch_work + +#define _NAME "MORSE_zgeqrf_param" +/* See Lawn 41 page 120 */ +#define _FMULS FMULS_GEQRF(M, N) +#define _FADDS FADDS_GEQRF(M, N) + +#include "./timing.c" +#include "timing_zauxiliary.h" + +static int +RunTest(int *iparam, double *dparam, morse_time_t *t_) +{ + MORSE_desc_t *TS; + MORSE_desc_t *TT; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + + PASTE_CODE_IPARAM_LOCALS( iparam ); + + if ( M != N && check ) { + fprintf(stderr, "Check cannot be perfomed with M != N\n"); + check = 0; + } + + /* Allocate Data */ + PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); + + /* Initialize Data */ + MORSE_zplrnt(M, N, A, LDA, 3456); + + /* Allocate Workspace */ + MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q); + memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble)); + MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); + memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); + + /* Save AT in lapack layout for check */ + PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N ); + + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + /* Initialize qrtree */ + libhqr_hqr_init( &qrtree, + ( matrix.mt >= matrix.nt ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, -1, -1, -1, P, 0, 0); + + START_TIMING(); + MORSE_zgeqrf_param(&qrtree, M, N, A, LDA, TS, TT ); + STOP_TIMING(); + + /* Check the solution */ + if ( check ) + { + PASTE_CODE_ALLOCATE_MATRIX( X, 1, MORSE_Complex64_t, LDB, NRHS ); + MORSE_zplrnt( N, NRHS, X, LDB, 5673 ); + PASTE_CODE_ALLOCATE_COPY( B, 1, MORSE_Complex64_t, X, LDB, NRHS ); + + MORSE_zgeqrs_param(&qrtree, M, N, NRHS, A, LDA, TS, TT, X, LDB); + + dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB, + &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_BNORM]), + &(dparam[IPARAM_XNORM])); + + free( Acpy ); + free( B ); + free( X ); + } + + /* Free Workspace */ + MORSE_Dealloc_Workspace( &TS ); + MORSE_Dealloc_Workspace( &TT ); + free( A ); + + return 0; +} diff --git a/timing/time_zgeqrf_hqr_tile.c b/timing/time_zgeqrf_hqr_tile.c new file mode 100644 index 0000000000000000000000000000000000000000..87418d8d36fe64f1b73e7d9fb2f1a242d82d4448 --- /dev/null +++ b/timing/time_zgeqrf_hqr_tile.c @@ -0,0 +1,109 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file time_zgeqrf_hqr_tile.c + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-06-08 + * @precisions normal z -> c d s + * + **/ +#define _TYPE MORSE_Complex64_t +#define _PREC double +#define _LAMCH LAPACKE_dlamch_work + +#define _NAME "MORSE_zgeqrf_param" +/* See Lawn 41 page 120 */ +#define _FMULS FMULS_GEQRF(M, N) +#define _FADDS FADDS_GEQRF(M, N) + +#include "./timing.c" +#include "timing_zauxiliary.h" + +static int +RunTest(int *iparam, double *dparam, morse_time_t *t_) +{ + MORSE_desc_t *TS; + MORSE_desc_t *TT; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + + PASTE_CODE_IPARAM_LOCALS( iparam ); + + if ( M != N && check ) { + fprintf(stderr, "Check cannot be perfomed with M != N\n"); + check = 0; + } + + /* Allocate Data */ + PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descA0, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); + + MORSE_zplrnt_Tile( descA, 5373 ); + + /* Save A for check */ + if (check == 1 && M == N){ + MORSE_zlacpy_Tile(MorseUpperLower, descA, descA0); + } + + /* Allocate Workspace */ + MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q); + memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble)); + MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); + memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); + + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + /* Initialize qrtree */ + libhqr_hqr_init( &qrtree, + ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, -1, -1, -1, P, 0, 0); + + START_TIMING(); + MORSE_zgeqrf_param_Tile(&qrtree, descA, TS, TT ); + STOP_TIMING(); + + /* Check the solution */ + if ( check && M == N) + { + /* Initialize and save B */ + MORSE_zplrnt_Tile( descX, 2264 ); + MORSE_zlacpy_Tile(MorseUpperLower, descX, descB); + + /* Compute the solution */ + MORSE_zgeqrs_param_Tile(&qrtree, descA, TS, TT, descX ); + + /* Check solution */ + dparam[IPARAM_ANORM] = MORSE_zlange_Tile(MorseInfNorm, descA0); + dparam[IPARAM_BNORM] = MORSE_zlange_Tile(MorseInfNorm, descB); + dparam[IPARAM_XNORM] = MORSE_zlange_Tile(MorseInfNorm, descX); + MORSE_zgemm_Tile( MorseNoTrans, MorseNoTrans, 1.0, descA0, descX, -1.0, descB ); + dparam[IPARAM_RES] = MORSE_zlange_Tile(MorseInfNorm, descB); + PASTE_CODE_FREE_MATRIX( descX ) + PASTE_CODE_FREE_MATRIX( descA0 ) + PASTE_CODE_FREE_MATRIX( descB ) + } + + /* Free Workspace */ + MORSE_Dealloc_Workspace( &TS ); + MORSE_Dealloc_Workspace( &TT ); + free( descA ); + + return 0; +} diff --git a/timing/time_zgesv_nopiv.c b/timing/time_zgesv_nopiv.c index 675a5efbe3f5282b5c63d37c9cc22ea5daea6d5f..a0d0e4505ab015949b3fdb3ff6e26ecf4c04e66a 100644 --- a/timing/time_zgesv_nopiv.c +++ b/timing/time_zgesv_nopiv.c @@ -26,19 +26,19 @@ #include "timing_zauxiliary.h" static int -RunTest(int *iparam, double *dparam, morse_time_t *t_) +RunTest(int *iparam, double *dparam, morse_time_t *t_) { PASTE_CODE_IPARAM_LOCALS( iparam ); - + if ( M != N ) { fprintf(stderr, "This timing works only with M == N\n"); return -1; } - + /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, MORSE_Complex64_t, LDB, NRHS ); - + /* Initialiaze Data */ MORSE_zplrnt( N, N, A, LDA, 51 ); MORSE_zplrnt( N, NRHS, X, LDB, 5673 ); @@ -50,13 +50,13 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) START_TIMING(); MORSE_zgesv_nopiv( N, NRHS, A, N, X, LDB ); STOP_TIMING(); - + /* Check the solution */ if (check) { dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, Acpy, LDA, B, X, LDB, - &(dparam[IPARAM_ANORM]), - &(dparam[IPARAM_BNORM]), + &(dparam[IPARAM_ANORM]), + &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(Acpy); free(B); } diff --git a/timing/timing.h b/timing/timing.h index 561fed157cdd380d077c4b6c4a52c5eda194fd71..a8f08224647372f9f99d8d5e2b984867446f3141 100644 --- a/timing/timing.h +++ b/timing/timing.h @@ -63,6 +63,14 @@ enum iparam_timing { IPARAM_BOUNDDEPS, IPARAM_BOUNDDEPSPRIO, /* End */ + /* Added for libhqr version */ + IPARAM_LOWLVL_TREE, /* Tree used for reduction inside nodes */ + IPARAM_HIGHLVL_TREE, /* Tree used for reduction between nodes */ + IPARAM_QR_TS_SZE, /* Size of TS domain */ + IPARAM_QR_HLVL_SZE, /* Size of the high level tree */ + IPARAM_QR_DOMINO, /* Enable/disable the domino tree */ + IPARAM_QR_TSRR, /* Enable/disable the round-robin on TS domain */ + /* End */ IPARAM_SIZEOF };