diff --git a/CMakeLists.txt b/CMakeLists.txt index 19f96ba3cc8c2641e4fad84fd79982794b874991..5f8a1c32ab1b96a7ee3be8fa2113ee83c80b2f52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,11 +250,6 @@ endif() ######################### set(CHAMELEON_DEP "") -set(CHAMELEON_USE_LIBHQR 1) -find_package(LIBHQR REQUIRED) -list(INSERT CHAMELEON_DEP 0 ${LIBHQR_LIBRARIES}) -include_directories(${LIBHQR_INCLUDE_DIRS}) - # Check for Thread library # ------------------------ set(CMAKE_THREAD_PREFER_PTHREAD TRUE) @@ -1016,6 +1011,12 @@ if(CHAMELEON_USE_CUDA) endif() #------------------------------------------------------------------------------ +############################################################################### +# Add HQR library # +################### +add_subdirectory(hqr) +include_directories(hqr/include) +#------------------------------------------------------------------------------ ############################################################################### # Main library # diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 246c7ea3e905a2268ada2bd1a747d5aacacdb336..079fdf0e476d0f1dc7f459a02cde962d73708067 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -298,6 +298,7 @@ endif() if (NOT CHAMELEON_SIMULATION) target_link_libraries(chameleon coreblas) endif() +target_link_libraries(chameleon hqr) list(INSERT CHAMELEON_DEP 0 -lchameleon) add_dependencies(chameleon diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 927dfbe088b0c276686d125de76ba39668cc9958..3bcec90f391e6024ffab81cb2ac2dcbf6a09b2f5 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -322,7 +322,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; tempkmin = chameleon_min(tempkm, temppn); - ldbp = BLKLDD(B, p); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -339,6 +338,7 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, #endif #endif for (m = 0; m < B->mt; m++) { + ldbm = BLKLDD(B, m); tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; MORSE_TASK_zunmlq( &options, diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 6c3c11da8d2fa3f33aa28f605b62db4c739c80c6..e237a89baf7a0eefd15f6ca484909029efe7dc97 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -336,6 +336,7 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, MORSE_desc_t *subB; MORSE_context_t *morse; MORSE_desc_t D; + morse = morse_context_self(); if (morse == NULL) { morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized"); @@ -423,5 +424,6 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans, //morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request); morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request); } + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 86cf56f697b435968b56fcdca9a6eeeda2035b40..d90f300463bfa32d94958e24edbfe52f581f3ebc 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -325,6 +325,6 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree, morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request); free(subA); free(subB); - + (void)D; return MORSE_SUCCESS; } diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 469c73cd4f97283e0cf8454daf1218dbf20484cb..16d94802bd0bbd05ad5dfd08efa733119322ee76 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -310,5 +310,6 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request); morse_pzungqr_param(qrtree, A, Q, TS, TT, NULL, sequence, request); #endif + (void)D; return MORSE_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index 65a8547b0bbc6d16fa14208106bd724ee1f8d335..e1c5b024fc3e593736c0adaadd30f8c4a3b1c70a 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -155,7 +155,6 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_des void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* user_build_callback, MORSE_sequence_t *sequence, MORSE_request_t *request ); -#if defined(CHAMELEON_USE_LIBHQR) void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, @@ -172,4 +171,3 @@ void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_des void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); -#endif /* defined(CHAMELEON_USE_LIBHQR) */ diff --git a/include/chameleon_config.h.in b/include/chameleon_config.h.in index 5da2e208ddbe8fef68080361b82289de6025e0da..d6c819fc6c6b3b25af2c667a2ed939ac36564c03 100644 --- a/include/chameleon_config.h.in +++ b/include/chameleon_config.h.in @@ -37,9 +37,6 @@ /* Communication engine */ #cmakedefine CHAMELEON_USE_MPI -/* Householder reduction trees for QR like operations */ -#cmakedefine CHAMELEON_USE_LIBHQR - /* GPU Support */ #cmakedefine CHAMELEON_USE_CUDA #cmakedefine CHAMELEON_USE_CUBLAS diff --git a/include/morse.h.in b/include/morse.h.in index c972d81a6c7223b03d6432d1660c2a7cf9a142a7..0a3f3ae0a23d3076c1a51c95f5c61449a5db279d 100644 --- a/include/morse.h.in +++ b/include/morse.h.in @@ -121,10 +121,7 @@ int MORSE_Sequence_Wait (MORSE_sequence_t *sequence); } #endif -#if defined(CHAMELEON_USE_LIBHQR) #include "libhqr.h" -#endif /* defined(CHAMELEON_USE_LIBHQR) */ - #include "morse_z.h" #include "morse_c.h" #include "morse_d.h" diff --git a/include/morse_z.h b/include/morse_z.h index cf18bd2dc739654d96e335175a7d64e5273a1eac..cd41b2fac3cbd2214efd1ae8398755d08f60e4f5 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -276,7 +276,6 @@ int MORSE_zunmqr_Tile_Async(MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, /** * Declarations of libhqr dependent functions. */ -#if defined(CHAMELEON_USE_LIBHQR) /** **************************************************************************** * Declarations of math functions (LAPACK layout) - alphabetical order **/ @@ -315,7 +314,7 @@ int MORSE_zunglq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zunmlq_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); -#endif /* defined(CHAMELEON_USE_LIBHQR) */ + /** * Declarations of workspace allocation functions (tile layout) - alphabetical order */ diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index 500dda7f9120fc74ccf8af11bc7f3507d40f7744..c61405a71ab37b16b003fd4394255a8d1a2c16a6 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -86,6 +86,7 @@ if (NOT CHAMELEON_SIMULATION) time_zgels_tile.c time_zgeqrf.c time_zgeqrf_hqr.c + time_zgeqrf_hqr_tile.c time_zgeqrf_tile.c time_zgelqf.c time_zgelqf_tile.c diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c index a0236f6ec90e496de85b594a88f41bb31c7c7e61..d77173c3a72882b563fcfcce5920231ad5dcd390 100644 --- a/timing/time_zgeqrf_hqr.c +++ b/timing/time_zgeqrf_hqr.c @@ -9,6 +9,12 @@ /** * + * @file time_zgeqrf_hqr.c + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-06-08 * @precisions normal z -> c d s * **/ @@ -40,12 +46,10 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) } /* Allocate Data */ - PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, LDA, M, N ); - PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); - PASTE_CODE_ALLOCATE_MATRIX_TILE( descA0, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); - PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); + PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N ); - MORSE_zplrnt_Tile( descA, 5373 ); + /* Initialize Data */ + MORSE_zplrnt(M, N, A, LDA, 3456); /* Allocate Workspace */ @@ -54,10 +58,8 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); - /* Save A for check */ - if (check == 1 && M == N){ - MORSE_zlacpy_Tile(MorseUpperLower, descA, descA0); - } + /* Save AT in lapack layout for check */ + PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N ); /* Initialize matrix */ matrix.mt = TS->mt; @@ -71,7 +73,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) &matrix, -1, -1, 1, -1, 0, 0); START_TIMING(); - MORSE_zgeqrf_param(&qrtree, M, N, descA, LDA, TS, TT ); + MORSE_zgeqrf_param(&qrtree, M, N, A, LDA, TS, TT ); STOP_TIMING(); /* Check the solution */ @@ -96,7 +98,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) /* Free Workspace */ MORSE_Dealloc_Workspace( &TS ); MORSE_Dealloc_Workspace( &TT ); - free( descA ); + free( A ); return 0; } diff --git a/timing/time_zgeqrf_hqr_tile.c b/timing/time_zgeqrf_hqr_tile.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf391ce7647661f9d16e4721f43cdd8006011a6 --- /dev/null +++ b/timing/time_zgeqrf_hqr_tile.c @@ -0,0 +1,110 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file time_zgeqrf_hqr_tile.c + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Raphael Boucherie + * @date 2017-06-08 + * @precisions normal z -> c d s + * + **/ +#define _TYPE MORSE_Complex64_t +#define _PREC double +#define _LAMCH LAPACKE_dlamch_work + +#define _NAME "MORSE_zgeqrf_param" +/* See Lawn 41 page 120 */ +#define _FMULS FMULS_GEQRF(M, N) +#define _FADDS FADDS_GEQRF(M, N) + +#include "./timing.c" +#include "timing_zauxiliary.h" + +static int +RunTest(int *iparam, double *dparam, morse_time_t *t_) +{ + MORSE_desc_t *TS; + MORSE_desc_t *TT; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + + PASTE_CODE_IPARAM_LOCALS( iparam ); + + if ( M != N && check ) { + fprintf(stderr, "Check cannot be perfomed with M != N\n"); + check = 0; + } + + /* Allocate Data */ + PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descA0, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDA, M, N ); + PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, ( check && M == N ), MORSE_Complex64_t, MorseComplexDouble, LDB, M, NRHS ); + + MORSE_zplrnt_Tile( descA, 5373 ); + + /* Save A for check */ + if (check == 1 && M == N){ + MORSE_zlacpy_Tile(MorseUpperLower, descA, descA0); + } + + /* Allocate Workspace */ + MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q); + memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble)); + MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q); + memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble)); + + + /* Initialize matrix */ + matrix.mt = TS->mt; + matrix.nt = TS->nt; + matrix.nodes = 1; + matrix.p = 1; + + /* Initialize qrtree */ + libhqr_hqr_init( &qrtree, + ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, + &matrix, -1, -1, -1, 0, 0, 0); + + START_TIMING(); + MORSE_zgeqrf_param_Tile(&qrtree, descA, TS, TT ); + STOP_TIMING(); + + /* Check the solution */ + if ( check && M == N) + { + /* Initialize and save B */ + MORSE_zplrnt_Tile( descX, 2264 ); + MORSE_zlacpy_Tile(MorseUpperLower, descX, descB); + + /* Compute the solution */ + MORSE_zgeqrs_param_Tile(&qrtree, descA, TS, TT, descX ); + + /* Check solution */ + dparam[IPARAM_ANORM] = MORSE_zlange_Tile(MorseInfNorm, descA0); + dparam[IPARAM_BNORM] = MORSE_zlange_Tile(MorseInfNorm, descB); + dparam[IPARAM_XNORM] = MORSE_zlange_Tile(MorseInfNorm, descX); + MORSE_zgemm_Tile( MorseNoTrans, MorseNoTrans, 1.0, descA0, descX, -1.0, descB ); + dparam[IPARAM_RES] = MORSE_zlange_Tile(MorseInfNorm, descB); + PASTE_CODE_FREE_MATRIX( descX ) + PASTE_CODE_FREE_MATRIX( descA0 ) + PASTE_CODE_FREE_MATRIX( descB ) + } + + /* Free Workspace */ + MORSE_Dealloc_Workspace( &TS ); + MORSE_Dealloc_Workspace( &TT ); + free( descA ); + + return 0; +}