diff --git a/CMakeLists.txt b/CMakeLists.txt index 766390a783946fc8e069b7fce9dd82fdad3f40b3..020837b2501b366b8ec12184a3a9a2eeb714ff46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,10 @@ project (HQR C) # Check if compiled independtly or within another project if ( ${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) set( BUILD_SUBPROJECT OFF ) + set( INSTALL_HDR_DIR include/hqr ) else() set( BUILD_SUBPROJECT ON ) + set( INSTALL_HDR_DIR include/chameleon ) endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") @@ -44,12 +46,25 @@ set(hdrs include/queue.h ) set(srcs - src/libhqr.c - src/libhqr_dbg.c - src/libhqr_systolic.c + src/low_flat.c + src/low_binary.c + src/low_fibonacci.c + src/low_greedy.c + src/low_greedy1p.c + src/low_adaptiv.c + src/high_flat.c + src/high_binary.c + src/high_fibonacci.c + src/high_greedy.c + src/systolic.c + src/svd.c + src/mtxtree.c + src/hqr.c + src/check.c + src/gendot.c + src/gensvg.c src/queue.c src/treedraw.c - src/treewalk.c ) include_directories(include) @@ -62,7 +77,7 @@ install(FILES include/libhqr.h include/libhqr_common.h # include/libhqr_dbg.h - DESTINATION include ) + DESTINATION ${INSTALL_HDR_DIR} ) install(TARGETS hqr DESTINATION lib) diff --git a/include/libhqr.h b/include/libhqr.h index f255fb55d0c3336e9a01e22a096e5b3c2096a49b..e56fb0ed36539ed26a7e7c736f5b2c0509bb9b2c 100644 --- a/include/libhqr.h +++ b/include/libhqr.h @@ -18,23 +18,24 @@ #ifndef _LIBHQR_H_ #define _LIBHQR_H_ -#include <stdio.h> -#include "libhqr_common.h" +#ifdef BEGIN_C_DECLS +#undef BEGIN_C_DECLS +#endif + +#ifdef END_C_DECLS +#undef END_C_DECLS +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else +# define BEGIN_C_DECLS /* empty */ +# define END_C_DECLS /* empty */ +#endif BEGIN_C_DECLS -static inline int libhqr_imin(int a, int b){ - return (a > b) ? b : a; -} - -static inline int libhqr_imax(int a, int b){ - return (a > b) ? a : b; -} - -static inline int libhqr_iceil(int a, int b){ - return (a + b - 1) / b; -} - /** * @brief Define which tree level the tile belongs to. * @@ -74,44 +75,35 @@ typedef enum libhqr_tree_ { LIBHQR_FIBONACCI_TREE = 2, LIBHQR_BINARY_TREE = 3, LIBHQR_GREEDY1P_TREE = 4, -} libqr_tree_e; +} libhqr_tree_e; /** * @brief Define the type of factorization to apply: QR or LQ. */ -typedef enum libhqr_typefacto_ { +typedef enum libhqr_facto_ { LIBHQR_QR = 0, /**< QR factorization will be performed, and A shape is considered */ LIBHQR_LQ = 1, /**< LQ factorization will be performed, and A^t shape is considered */ -} libhqr_typefacto_e; +} libhqr_facto_e; /** - * @brief Minimal structure to define the shape of the matrix to factorize. + * @brief Minimal matrix description stucture. + * + * This structure describes the dimension of the matrix to factorize in number + * of tiles: mt-by-nt. When provided, nodes and p provides the total number of + * nodes involved in the computation, and the P parameter of 2D bloc-cyclic + * distributions of the tiles. LibHQR doesn't support any other distribtions for + * now. */ -typedef struct libhqr_tiledesc_s{ - int mt; /**< The number of rows of tiles */ - int nt; /**< The number of columns of tiles */ +typedef struct libhqr_matrix_s { + int mt; /**< The number of rows of tiles in the matrix */ + int nt; /**< The number of columns of tiles in the matrix */ int nodes; /**< The number of nodes involved in the data distribution */ int p; /**< The number of nodes per column in the data distribution */ -} libhqr_tiledesc_t; - - -/** - * @brief Minimal structure to stock the information for each tile - */ -typedef struct libhqr_tileinfo_s{ - int type; /**< The type of the tile */ - int currpiv; /**< Number of the time annihilating the current tile */ - int nextpiv; /**< The next tile killed by the tile */ - int prevpiv; /**< The previous tile killed by the tile */ - int first_nextpiv; /**< The first next tile killed */ - int first_prevpiv; /**< The first previous tile killed */ -} libhqr_tileinfo_t; +} libhqr_matrix_t; struct libhqr_tree_s; typedef struct libhqr_tree_s libhqr_tree_t; -typedef struct libhqr_context_s libhqr_context_t; - /** * @brief Hierarchical tree structure for QR/LQ factorization like kernels. */ @@ -186,6 +178,7 @@ struct libhqr_tree_s { */ int (*prevpiv)(const libhqr_tree_t *qrtree, int k, int p, int m); + int init; /**< Internal variable */ int mt; /**< Number of rows of tile A if QR factorization, and in A^t if LQ factorization */ int nt; /**< Number of tile reflectors to compute. This is equal to @@ -199,53 +192,41 @@ struct libhqr_tree_s { tree: hqr, systolic, svd, ...) */ }; -int libhqr_systolic_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int p, int q ); -void libhqr_systolic_finalize( libhqr_tree_t *qrtree ); +int libhqr_init_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ); -int libhqr_svd_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, +int libhqr_init_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, int type_hlvl, int p, int nbcores_per_node, int ratio ); -int libhqr_hqr_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, +int libhqr_init_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, int type_llvl, int type_hlvl, int a, int p, int domino, int tsrr ); -void libhqr_hqr_finalize( libhqr_tree_t *qrtree ); +void libhqr_finalize( libhqr_tree_t *qrtree ); - -void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init); -int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m); -int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m); -int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m); -int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m); -void libhqr_matrix_finalize(libhqr_tree_t *qrtree); +void libhqr_walk_stepk( const libhqr_tree_t *qrtree, int k, int *tiles ); /* - * function for drawing the tree + * Function for drawing the tree */ - -void libhqr_treewalk(const libhqr_tree_t *qrtree, int k, int *tiles); -void draw_rectangle(int k, int p, int m, int step_m, FILE *file); -void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file); -void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file); -void draw_vertical_line(int k, int p, int m, int step_m, FILE *file); -void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix); +void libhqr_print_dag( const libhqr_tree_t *qrtree, + const char *filename ); +void libhqr_print_svg( const libhqr_tree_t *qrtree, + const char *filename ); /* * Debugging functions */ - -int libhqr_tree_check ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_dag ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ); -void libhqr_tree_print_type ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_pivot ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_perm ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ); -void libhqr_tree_print_next_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); -void libhqr_tree_print_prev_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); -void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); +int libhqr_check ( libhqr_matrix_t *A, libhqr_tree_t *qrtree ); +void libhqr_print_type ( libhqr_matrix_t *A, libhqr_tree_t *qrtree ); +void libhqr_print_pivot ( libhqr_matrix_t *A, libhqr_tree_t *qrtree ); +void libhqr_print_nbgeqrt( libhqr_matrix_t *A, libhqr_tree_t *qrtree ); +void libhqr_print_perm ( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int *perm ); +void libhqr_print_next_k ( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ); +void libhqr_print_prev_k ( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ); +void libhqr_print_geqrt_k( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ); END_C_DECLS diff --git a/include/libhqr_internal.h b/include/libhqr_internal.h index 0b7dccadcfb69dee13e4cb93e2669a238b5988dd..a51c60701b640203764f255cc9e1ab8a04f8d43c 100644 --- a/include/libhqr_internal.h +++ b/include/libhqr_internal.h @@ -150,6 +150,10 @@ void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file); void draw_vertical_line( int k, int p, int m, int step_m, FILE *file); +/** + * @name Low level trees + * @{ + */ void hqr_low_flat_init ( hqr_subpiv_t *arg ); void hqr_low_binary_init ( hqr_subpiv_t *arg ); void hqr_low_fibonacci_init( hqr_subpiv_t *arg, int minMN ); @@ -157,10 +161,21 @@ void hqr_low_greedy_init ( hqr_subpiv_t *arg, int minMN ); void hqr_low_greedy1p_init ( hqr_subpiv_t *arg, int minMN ); void svd_low_adaptiv_init ( hqr_subpiv_t *arg, int gmt, int gnt, int nbcores, int ratio ); +/** + * @} + * + * @name High level trees + * @{ + */ void hqr_high_flat_init ( hqr_subpiv_t *arg ); void hqr_high_binary_init ( hqr_subpiv_t *arg ); void hqr_high_fibonacci_init( hqr_subpiv_t *arg ); void hqr_high_greedy_init ( hqr_subpiv_t *arg, int minMN ); void hqr_high_greedy1p_init ( hqr_subpiv_t *arg ); +/** + * @} + */ + +void libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out ); #endif /* _LIBHQR_INTERNAL_H_ */ diff --git a/src/check.c b/src/check.c index cc333f41fbafe64b3061bd13356f7dfc53c8f959..9a477f27d941d5765ddcf075a429dfacf66c6978 100644 --- a/src/check.c +++ b/src/check.c @@ -1,6 +1,6 @@ /** * - * @file libhqr_dbg.c + * @file check.c * * @copyright 2010-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -15,7 +15,7 @@ * @date 2017-03-21 * */ -#include "libhqr.h" +#include "libhqr_internal.h" #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -30,7 +30,7 @@ return ret; \ } -int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) +int libhqr_check( libhqr_matrix_t *A, libhqr_tree_t *qrtree) { int minMN = libhqr_imin(A->mt, A->nt ); int i, m, k, nb; @@ -43,8 +43,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) * Check Formula for NB geqrt */ { - /* libhqr_tree_print_type( A, qrtree ); */ - /* libhqr_tree_print_nbgeqrt( A, qrtree ); */ + /* libhqr_print_type( A, qrtree ); */ + /* libhqr_print_nbgeqrt( A, qrtree ); */ check = 1; for (k=0; k<minMN; k++) { nb = 0; @@ -73,7 +73,7 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) int prevm = -1; check = 1; for (k=0; k<minMN; k++) { - /* libhqr_tree_print_geqrt_k( A, qrtree, k ); */ + /* libhqr_print_geqrt_k( A, qrtree, k ); */ nb = qrtree->getnbgeqrf( qrtree, k ); prevm = -1; for (i=0; i < nb; i++) { @@ -134,8 +134,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) nb++; } if ( nb > 1 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -146,8 +146,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 3; } else if ( nb == 0 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -177,8 +177,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) nb++; } if ( nb > 1 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -189,8 +189,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 3; } else if ( nb == 0 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -224,8 +224,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) prev = qrtree->prevpiv(qrtree, k, m, next); if ( start != prev ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -246,7 +246,7 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 0; } -void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void libhqr_print_type( libhqr_matrix_t *A, libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -282,7 +282,7 @@ void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) } } -void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void libhqr_print_pivot( libhqr_matrix_t *A, libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -317,7 +317,7 @@ void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) } } -void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void libhqr_print_next_k( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Next (k = %d)--------------\n", k); @@ -336,7 +336,7 @@ void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } } -void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void libhqr_print_prev_k( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Prev (k = %d)--------------\n", k); @@ -355,7 +355,7 @@ void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } } -void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ) +void libhqr_print_perm( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int *perm ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -380,7 +380,7 @@ void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *p printf( "\n" ); } -void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void libhqr_print_nbgeqrt( libhqr_matrix_t *A, libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k, nb; @@ -408,7 +408,7 @@ void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) printf( "\n" ); } -void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void libhqr_print_geqrt_k( libhqr_matrix_t *A, libhqr_tree_t *qrtree, int k ) { int i, m, nb; (void)A; @@ -426,125 +426,3 @@ void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } printf( "\n" ); } - - -/* static int libhqr_tree_getinon0( const libhqr_tree_t *qrtree, */ -/* const int k, int i, int mt ) */ -/* { */ -/* int j; */ -/* for(j=k; j<mt; j++) { */ -/* if ( libhqr_tree_gettype( qrtree, k, j ) != 0 ) */ -/* i--; */ -/* if ( i == -1 ) */ -/* break; */ -/* } */ -/* return qrtree->perm[k*(qrtree->desc->mt+1) + j]; */ -/* } */ - -#define DAG_HEADER "digraph G { orientation=portrait; \n" -#define DAG_FOOTER "} // close graph\n" -#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n" -#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n" -#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n" -#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" -#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" -#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n" -#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" -#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" -#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n" -#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n" -#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n" - -char *color[] = { - "red", - "blue", - "green", - "orange", - "cyan", - "purple", - "yellow", -}; -#define DAG_NBCOLORS 7 - -void libhqr_tree_print_dag( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ) -{ - int *pos, *next, *done; - int k, m, n, lpos, prev, length; - int minMN = libhqr_imin( A->mt, A->nt ); - FILE *f = fopen( filename, "w" ); - - done = (int*)malloc( A->mt * sizeof(int) ); - pos = (int*)malloc( A->mt * sizeof(int) ); - next = (int*)malloc( A->mt * sizeof(int) ); - memset(pos, 0, A->mt * sizeof(int) ); - memset(next, 0, A->mt * sizeof(int) ); - - /* Print header */ - fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/ - for(m=0; m < A->mt; m++) { - fprintf(f, DAG_LABELNODE, m, m, m); - } - - for(k=0; k<minMN; k++ ) { - int nb2reduce = A->mt - k - 1; - - for(m=k; m < A->mt; m++) { - fprintf(f, DAG_STARTNODE, m, A->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); - next[m] = qrtree->nextpiv( qrtree, k, m, A->mt); - } - - while( nb2reduce > 0 ) { - memset(done, 0, A->mt * sizeof(int) ); - for(m=A->mt-1; m > (k-1); m--) { - n = next[m]; - if ( next[n] != A->mt ) - continue; - if ( n != A->mt ) { - lpos = libhqr_imax( pos[m], pos[n] ); - lpos++; - pos[m] = lpos; - pos[n] = lpos; - - fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - prev = qrtree->prevpiv( qrtree, k, m, n ); - fprintf(f, DAG_EDGE_PIV, - m, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - prev = qrtree->prevpiv( qrtree, k, n, n ); - if ( qrtree->gettype(qrtree, k, n) == 0 ) - fprintf(f, DAG_EDGE_TS, - n, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - else - fprintf(f, DAG_EDGE_TT, - n, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - next[m] = qrtree->nextpiv( qrtree, k, m, n); - done[m] = done[n] = 1; - nb2reduce--; - } - } - } - } - - length = 0; - for(m=0; m < A->mt; m++) { - length = libhqr_imax(length, pos[m]); - } - length++; - for(k=0; k<length; k++) - fprintf(f, DAG_LENGTHNODE, k, k, k); - fprintf(f, DAG_FOOTER); - - printf("Tic Max = %d\n", length-1); - - fclose( f ); - free(pos); - free(next); -} diff --git a/src/gensvg.c b/src/gensvg.c index 81a74e732c16bb0e3f070d26fa5a6350dc4a7bb8..63c1f361f2d4e46b21869d0a9f73548cdbca84e1 100644 --- a/src/gensvg.c +++ b/src/gensvg.c @@ -1,6 +1,6 @@ /** * - * @file treewalk.c + * @file gensvg.c * * File for algorithm of treewalking. * @@ -13,8 +13,7 @@ * @date 2017-03-21 * */ - -#include "libhqr.h" +#include "libhqr_internal.h" #include "libhqr_draw.h" #include "libhqr_queue.h" #include <stdio.h> @@ -23,15 +22,124 @@ #include <string.h> /* - * Common prameters to the 3 following functions: - * k - Factorization step for the color - * p - annihilator - * m - tile eliminated - * file - File where the lines are drawn - * tiles - table stocking the tiles + * Global array for color + */ + +char *colortree[4] = {"red", "blue", "green", "purple"}; + +/* + * functions writing in the svg file */ +static void +drawsvg_header( FILE *file ) +{ + int rc; + + rc = fprintf(file, + "<?xml version=\"1.0\" standalone=\"no\"?>\n" + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n" + "<svg width=\"2000\" height=\"2000\" version=\"1.1\" \n xmlns=\"http://www.w3.org/2000/svg\">\n"); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_header)\n"); + } + return; +} + +static void +drawsvg_top_TS( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%4]); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TS)\n"); + } + return; +} + +static void +drawsvg_bot_TS( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc, x2, y2, w2, h2; -void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file){ + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%4]); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n"); + return; + } + + x2 = x + (w / 4); + y2 = y + (h / 4); + w2 = (w / 2); + h2 = (h / 2); + + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill =\"white\"/> \n", x2, y2, w2, h2); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n"); + } + return; +} + +static void +drawsvg_top_TT( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n", + x + w / 2, y + h / 2, w / 2, colortree[k%4] ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TT)\n"); + } + return; +} + +static void +drawsvg_bot_TT( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n", + x + w / 2, y + h / 2, w / 2, colortree[k%4] ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n"); + return; + } + + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"white\" /> \n", + x + w / 2, y + h / 2, w / 4 ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n"); + } + return; +} + +static void +drawsvg_line( FILE *file, int k, int x1, int y1, int x2, int y2 ) +{ + int rc; + rc = fprintf(file,"<line x1=\"%d\" y1=\"%d\" x2=\"%d\" y2=\"%d\" style=\"fill:none;stroke:%s;stroke-width:2px;\"/> \n", x1, y1, x2, y2, colortree[k%4]); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_line)\n"); + } + return; +} + +static void +drawsvg_footer( FILE *file ) +{ + int rc; + rc = fprintf(file, "</svg>\n"); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_footer)\n"); + } + return; +} + + +static void +drawsvg_lines_rowm( FILE *file, int k, + int p, int m, int beg_p, int beg_m, int end ) +{ int yp, ym; int x, xp, xm; @@ -40,153 +148,93 @@ void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *fil yp = SIZE + SIZE * p; /* Starting position of the tiles */ - xm = (SIZE + (SIZE / 4)) + SIZE * step_m; - xp = (SIZE + (SIZE / 4)) + SIZE * step_p; + xm = (SIZE + (SIZE / 4)) + SIZE * beg_m; + xp = (SIZE + (SIZE / 4)) + SIZE * beg_p; /* Final position of the tiles */ - x = libhqr_imax(step_m, step_p) + 1; - x = (SIZE + (SIZE / 4)) + SIZE * x; - - libhqr_drawline( xm, ym, x, ym, k, file ); - libhqr_drawline( xp, yp, x, yp, k, file ); -} + x = SIZE + SIZE * end; -void draw_vertical_line(int k, int p, int m, int step_m, FILE *file){ - int absciss, ym, yp; - absciss = SIZE + SIZE * step_m; - ym = SIZE + SIZE * m; - yp = SIZE + SIZE * p; - libhqr_drawline(absciss, ym, absciss, yp, k, file); -} + /* Horizontal lines */ + drawsvg_line( file, k, xm, ym, x + (SIZE / 4), ym ); + drawsvg_line( file, k, xp, yp, x + (SIZE / 4), yp ); -void draw_rectangle(int k, int p, int m, int step_m, FILE *file){ - int absciss, ordinate; - absciss = ((SIZE * 3) /4) + SIZE * step_m; - ordinate = ((SIZE * 3) /4) + SIZE * m; - libhqr_drawTS(absciss, ordinate, WIDTH, HEIGHT, k, file); - ordinate = ((SIZE * 3) /4) + SIZE * p; - libhqr_drawTT(absciss, ordinate, WIDTH, HEIGHT, k, file); + /* Vertical line */ + drawsvg_line( file, k, x, ym, x, yp ); } -void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file){ - int i, m, p, tmp; +static void +drawsvg_lines_stepk( const libhqr_tree_t *qrtree, FILE *file, + int k, int *tiles, int *step ) +{ + int i, m, p, end; /* Get order for step k */ - libhqr_treewalk(qrtree, k, tiles); + libhqr_walk_stepk( qrtree, k, tiles+(k+1) ); - /* Draw lines */ - for(i = k; i < (qrtree->mt-1); i++){ + for(i = k+1; i < qrtree->mt; i++){ m = tiles[i]; p = qrtree->currpiv(qrtree, k, m); - draw_horizontal_line(k, p, m, step[p], step[m], file); + end = libhqr_imax( step[p], step[m] ) + 1; + + /* Draw horizontal lines for rows p and m */ + drawsvg_lines_rowm( file, k, p, m, step[p], step[m], end ); - tmp = libhqr_imax( step[p], step[m] ) + 1; - step[m] = tmp; - step[p] = tmp; - draw_vertical_line(k, p, m, tmp, file); + /* Update last time the rows p and m have been modified for future lines */ + step[m] = end; + step[p] = end; } } -/**************************************************** - * LIBHQR_TREEWALK - ***************************************************/ - -/** - * libhqr_treewalk - * - * @param[in] arg - * Arguments specific to the reduction tree used - * - * @param[in] k - * Factorization step - * - * @param[in] tiles - * Table stocking the tiles and their step in the order they are killed - * - */ -void libhqr_treewalk(const libhqr_tree_t *qrtree, int k, int *tiles) +static void +drawsvg_nodes_rowm( FILE *file, int k, + int type, int p, int m, int step_m ) { - int tsid = -1, ttid = -1; - int p = k; - int pivot = k; - int m = k; - libhqr_queue_tile_t *tt = libhqr_queue_tile_new(); - libhqr_queue_tile_t *ts = libhqr_queue_tile_new(); - libhqr_queue_tile_push(&tt, k); - - while( tt ) - { - /* Stack all elements killed on the way down */ - p = qrtree->prevpiv(qrtree, k, pivot, p); - while( p != qrtree->mt ) { - /* If it's a TS tile, or a TT at the bottom of the tree that kills noone */ - if( (qrtree->gettype(qrtree, k, p) == 0) || - (qrtree->prevpiv(qrtree, k, p, p) != qrtree->mt) ) - { - libhqr_queue_tile_push(&tt,p); - /* printf("PUSH TT: %d\n", p); */ - } - libhqr_queue_tile_push(&ts, p); - /* printf("PUSH TS: %d\n", p); */ - p = qrtree->prevpiv(qrtree, k, pivot, p); - assert( p != -1 ); - } - - tsid = libhqr_queue_tile_head(&ts); - ttid = libhqr_queue_tile_pop(&tt); - - /* Unstack all element killed by TS, or by TT and already discovered */ - while(tsid != -1 && tsid != ttid) { - /* printf( "TS=%d, TT=%d\n", tsid, ttid ); */ - - tsid = libhqr_queue_tile_pop(&ts); - /* printf("POP TS: %d\n", tsid); */ - /* printf("Call function on (%d, %d)\n", - qrtree->currpiv(qrtree, k, tsid), tsid ); */ - assert(m < (qrtree->mt-1)); - tiles[m] = tsid; - m++; - tsid = libhqr_queue_tile_head(&ts); - } - pivot = p = ttid; + int x, yp, ym; + x = ((SIZE * 3) / 4) + SIZE * step_m; + ym = ((SIZE * 3) / 4) + SIZE * m; + yp = ((SIZE * 3) / 4) + SIZE * p; + + if ( type == LIBHQR_KILLED_BY_TS ) { + drawsvg_top_TS(file, k, x, yp, WIDTH, HEIGHT ); + drawsvg_bot_TS(file, k, x, ym, WIDTH, HEIGHT ); + } + else { + drawsvg_top_TT(file, k, x, yp, WIDTH, HEIGHT ); + drawsvg_bot_TT(file, k, x, ym, WIDTH, HEIGHT ); } - - assert(m == (qrtree->mt-1)); - assert(ts == NULL); - assert(tt == NULL); } -/**************************************************** - * LIBHQR_PRINT_TREE - ***************************************************/ - -void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix){ - int maxMN, minMN, k, i; - int *tiles; - int *step; +void +libhqr_print_svg( const libhqr_tree_t *qrtree, + const char *filename ) +{ FILE *file; + int *tiles, *steps; + int minMN, k, i; - maxMN = libhqr_imax(matrix->mt, matrix->nt ); - minMN = libhqr_imin(matrix->mt, matrix->nt ); + minMN = libhqr_imin( qrtree->mt, qrtree->nt ); - tiles = (int*)malloc(maxMN*sizeof(int)); - memset( tiles, 0, maxMN*sizeof(int) ); + tiles = (int*)calloc( qrtree->mt, sizeof(int)); + steps = (int*)calloc( qrtree->mt, sizeof(int)); - step = (int*) malloc( qrtree->mt * sizeof(int) ); - memset( step, 0, qrtree->mt * sizeof(int) ); + file = fopen(filename,"w+"); - file = fopen("tree.svg","w+"); - libhqr_writeheader(file); - for (k = 0; k < minMN; k++){ + drawsvg_header(file); + for (k = 0; k < minMN; k++) { /* Drawing the lines */ - draw_lines(qrtree, k, tiles, step, file); + drawsvg_lines_stepk( qrtree, file, k, tiles, steps ); + /* Drawing the rectangles */ - for (i = k + 1; i < maxMN; i++){ - draw_rectangle(k, qrtree->currpiv(qrtree, k, i), i, step[i], file); + for (i = k+1; i < qrtree->mt; i++) { + drawsvg_nodes_rowm( file, k, + qrtree->gettype(qrtree, k, i), + qrtree->currpiv(qrtree, k, i), i, steps[i] ); } } - libhqr_writeend(file); + drawsvg_footer(file); fclose(file); + free(tiles); + free(steps); } diff --git a/src/hqr.c b/src/hqr.c index f064e31feeef814ee6132201bb7b97eeae902bbc..ee6bce47ab7a80c7604ef67e98894a92060f02ad 100644 --- a/src/hqr.c +++ b/src/hqr.c @@ -1,6 +1,6 @@ /** * - * @file libhqr.c + * @file hqr.c * * @copyright 2010-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -77,166 +77,22 @@ * high level tree to reduce communications. * These lines are defined by (i-k)/p = 0. */ -#include "libhqr.h" -#include <assert.h> +#include "libhqr_internal.h" +#include "libhqr_queue.h" #include <stdlib.h> -#include <stdio.h> -#include <math.h> -#include <string.h> - -#define PRINT_PIVGEN 0 -#ifdef PRINT_PIVGEN -#define myassert( test ) {if ( ! (test) ) return -1;} -#else -#define myassert(test) {assert((test)); return -1;} -#endif - -struct hqr_args_s; -typedef struct hqr_args_s hqr_args_t; - -struct hqr_subpiv_s; -typedef struct hqr_subpiv_s hqr_subpiv_t; - -/** - * @brief jhj - */ -struct hqr_args_s { - int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */ - int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */ - hqr_subpiv_t *llvl; - hqr_subpiv_t *hlvl; - int *perm; -}; - -struct hqr_subpiv_s { - /** - * currpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] m line you want to eliminate - * - * @return the annihilator p used with m at step k - */ - int (*currpiv)(const hqr_subpiv_t *arg, int k, int m); - /* - * nextpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] p line currently used as an annihilator - * @param[in] m line actually annihilated. - * m = MT to find the first time p is used as an annihilator during step k - * - * @return the next line m' using the line p as annihilator during step k - * mt if p will never be used again as an annihilator. - */ - int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m); - /* - * prevpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] p line currently used as an annihilator - * @param[in] m line actually annihilated. - * m = p to find the last time p has been used as an annihilator during step k - * - * @return the previous line m' using the line p as annihilator during step k - * mt if p has never been used before that as an annihilator. - */ - int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m); - int *ipiv; - int minMN; - int ldd; - int a; - int p; - int domino; -}; /* * Common functions */ -static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); -static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); -static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); -static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); +static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); /* Permutation */ static void hqr_genperm ( libhqr_tree_t *qrtree ); static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ); -/* - * Subtree for low-level - */ -static void hqr_low_flat_init( hqr_subpiv_t *arg); -static void hqr_low_greedy_init( hqr_subpiv_t *arg, int minMN); -static void hqr_low_binary_init( hqr_subpiv_t *arg); -static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN); - - -/**************************************************** - * Reading functions - ************************************************** - * - * Common parameters for the following functions - * qrtree - tree used for the factorization - * k - Step k of the QR factorization - * m - line anhilated - */ - -int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - id = (k * qrtree->mt) + m; - return arg[id].type; -} - -int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m){ - int id, perm_m, p, a; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - perm_m = m; - p = qrtree->p; - a = qrtree->a; - myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) ); - myassert( (p==1) || (perm_m % p) == (m % p) ); - id = (k * qrtree->mt) + m; - return arg[id].currpiv; -} - -/* - * Extra parameter: - * p - line used as pivot - */ - -int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - int gmt = qrtree->mt; - myassert( m > p && p >= k ); - myassert( m == gmt || p == rdmtx_currpiv( qrtree, k, m ) ); - if(m == qrtree->mt){ - id = (k * qrtree->mt) + p; - return arg[id].first_nextpiv; - } - else{ - id = (k * qrtree->mt) + m; - return arg[id].nextpiv; - } -} - -int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - int gmt = qrtree->mt; - myassert( m >= p && p >= k && m < gmt ); - myassert( m == p || p == rdmtx_currpiv( qrtree, k, m ) ); - - if(m == p){ - id = (k * qrtree->mt) + p; - return arg[id].first_prevpiv; - } - else{ - id = (k * qrtree->mt) + m; - return arg[id].prevpiv; - } -} /**************************************************** * Common ipiv *************************************************** @@ -247,7 +103,6 @@ int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ * k - Step k of the QR factorization * */ -#define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0 /* * Extra parameter: @@ -255,7 +110,9 @@ int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ * Return: * The number of geqrt to execute in the panel k */ -static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { +static int +hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) +{ int a = qrtree->a; int p = qrtree->p; int gmt = qrtree->mt; @@ -275,7 +132,7 @@ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { nb_11 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; } else { - /* Number of extra tile of type 1 between the the tile of type 3 and the first of nb11 */ + /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ nb_2 = nbextra1_formula; /* First multiple of p*a under the diagonal of step 1 */ @@ -300,7 +157,8 @@ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { * Return: * The global indice m of the i th geqrt in the panel k */ -static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) +static int +hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) { int a = qrtree->a; int p = qrtree->p; @@ -331,7 +189,8 @@ static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) * Return: * The index i of the geqrt in the panel k */ -static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) +static int +hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) { int a = qrtree->a; int p = qrtree->p; @@ -367,7 +226,9 @@ static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) * 2 - if m is reduced thanks to the bubble tree * 3 - if m is reduced in distributed */ -static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) { +static int +hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) +{ int a = qrtree->a; int p = qrtree->p; int domino = qrtree->domino; @@ -392,2040 +253,204 @@ static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) { } /**************************************************** - * HQR_LOW_FLAT_TREE + * + * Generic functions currpiv,prevpiv,nextpiv + * ***************************************************/ -static int hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) +static int +hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) { - (void)m; - if ( arg->domino ) - return k / arg->a; - else - return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ; -}; + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, tmpk; + int lm, rank; + int a = qrtree->a; + int p = qrtree->p; + int domino = qrtree->domino; + int gmt = qrtree->mt; -static int hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - -#ifdef FLAT_UP - if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) ) - return start_pa-1; - else -#else /* FLAT_DOWN */ - if ( start_pa <= p_pa ) - return arg->ldd; - - if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { - if ( start_pa == arg->ldd ) - return p_pa+1; - else if ( start_pa < arg->ldd ) - return start_pa+1; - } -#endif - return arg->ldd; -} + lm = m / p; /* Local index in the distribution over p domains */ + rank = m % p; /* Staring index in this distribution */ -static int hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - -#ifdef FLAT_UP - if ( p_pa == k_a && (start_pa+1 < arg->ldd) ) - return start_pa+1; - else -#else - if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { - if ( start_pa == p_pa ) - return arg->ldd - 1; - else if ( start_pa > p_pa + 1 ) - return start_pa-1; + /* TS level common to every case */ + if ( domino ) { + switch( hqr_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + if ( tmp == k / a ) + return k * p + rank ; /* Below to the first bloc including the diagonal */ + else + return tmp * a * p + rank ; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank; + break; + case 2: + return m - p; + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } } -#endif - return arg->ldd; -}; - -static void hqr_low_flat_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_low_flat_currpiv; - arg->nextpiv = hqr_low_flat_nextpiv; - arg->prevpiv = hqr_low_flat_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_LOW_BINARY_TREE - ***************************************************/ -static int hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a; - int m_pa = (m / arg->p ) / arg->a; - - int tmp1 = m_pa - k_a; - int tmp2 = 1; - (void)arg; - - if ( tmp1 == 0) - return 0; - while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { - tmp1 = tmp1 >> 1; - tmp2 = tmp2 << 1; + else { + switch( hqr_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; + break; + case 2: + return m - p; + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } } - assert( m_pa - tmp2 >= k_a ); - return m_pa - tmp2; }; -static int hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +/** + * hqr_nextpiv - Computes the next row killed by the row p, after + * it has kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the next line killed by p after start + * start must be equal to A.mt to find the first row killed by p. + * if start != A.mt, start must be killed by p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the following row killed by p if it exists, A->mt otherwise + */ +static int +hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int lpivot, rpivot, lstart; + int a = qrtree->a; + int p = qrtree->p; + int gmt = qrtree->mt; - int tmpp, bit; - myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) ); + lpivot = pivot / p; /* Local index in the distribution over p domains */ + rpivot = pivot % p; /* Staring index in this distribution */ - if ( start_pa <= p_pa ) - return arg->ldd; + /* Local index in the distribution over p domains */ + lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p; - int offset = p_pa-k_a; - bit = 0; - if (start_pa != arg->ldd) { - while( ( (start_pa-k_a) & (1 << bit ) ) == 0 ) - bit++; - bit++; - } + myassert( start > pivot && pivot >= k ); + myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) ); - tmpp = offset | (1 << bit); - if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) ) - return tmpp + k_a; - else - return arg->ldd; -}; + /* TS level common to every case */ + ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1; + lp = hqr_gettype( qrtree, k, pivot ); -static int hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - int offset = p_pa - k_a; - - myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino || - hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) ); - - if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) { - int i, bit, tmp; - if ((p_pa - k_a) == 0) - bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) ); - else { - bit = 0; - while( (offset & (1 << bit )) == 0 ) - bit++; - } - for( i=bit; i>-1; i--){ - tmp = offset | (1 << i); - if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) ) - return tmp+k_a; - } - return arg->ldd; - } + switch( ls ) + { + case -1: - if ( (start_pa - p_pa) > 1 ) - return p_pa + ( (start_pa - p_pa) >> 1 ); - else { - return arg->ldd; - } -}; + if ( lp == LIBHQR_KILLED_BY_TS ) { + myassert( start == gmt ); + return gmt; + } -static void hqr_low_binary_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_low_binary_currpiv; - arg->nextpiv = hqr_low_binary_nextpiv; - arg->prevpiv = hqr_low_binary_prevpiv; - arg->ipiv = NULL; -}; + case LIBHQR_KILLED_BY_TS: -/**************************************************** - * HQR_LOW_FIBONACCI_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a; - return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; -} + /* If the tile is over the diagonal of step k, skip directly to type 2 */ + if ( arg->domino && lpivot < k ) + goto next_2; -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = (p / qrpiv->p ) / qrpiv->a; + if ( start == gmt ) + nextp = pivot + p; + else + nextp = start + p; - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } + if ( ( nextp < gmt ) && + ( nextp < pivot + a*p ) && + ( (nextp/p)%a != 0 ) ) + return nextp; + start = gmt; + lstart = arg->llvl->ldd * a; -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = (p / qrpiv->p ) / qrpiv->a; + case LIBHQR_KILLED_BY_LOCALTREE: - for( i=start_pa-1; i>k_a; i-- ) - if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) - return i; - return (qrpiv->ldd); -} + /* If the tile is over the diagonal of step k, skip directly to type 2 */ + if ( arg->domino && lpivot < k ) + goto next_2; -static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt; + /* Get the next pivot for the low level tree */ + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); - arg->currpiv = hqr_low_fibonacci_currpiv; - arg->nextpiv = hqr_low_fibonacci_nextpiv; - arg->prevpiv = hqr_low_fibonacci_prevpiv; + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == arg->llvl->ldd-1) ) + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); - mt = arg->ldd; + if ( tmp != arg->llvl->ldd ) + return tmp * a * p + rpivot; - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); + next_2: + /* no next of type 1, we reset start to search the next 2 */ + start = gmt; + lstart = arg->llvl->ldd * a; - /* - * Fibonacci of order 1: - * u_(n+1) = u_(n) + 1 - */ - { - int f1, k, m; + case LIBHQR_KILLED_BY_DOMINO: - /* Fill in the first column */ - f1 = 1; - for (m=1; m < mt; ) { - for (k=0; (k < f1) && (m < mt); k++, m++) { - ipiv[m] = m - f1; + if ( lp < LIBHQR_KILLED_BY_DOMINO ) { + return gmt; } - f1++; - } - for( k=1; k<minMN; k++) { - for(m=k+1; m < mt; m++) { - ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1; + /* Type 2 are killed only once if they are strictly in the band */ + if ( arg->domino && + (start == gmt) && + (lpivot < k) && + (pivot+p < gmt) ) { + return pivot+p; } - } - } -}; - -/**************************************************** - * HQR_LOW_GREEDY_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - if (qrpiv->domino) - return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; - else - return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) - + ( ( m / qrpiv->p ) / qrpiv->a ) ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int p_pa = p / qrpiv->p / qrpiv->a; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int pa = qrpiv->p * qrpiv->a; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = p / pa; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - - return (qrpiv->ldd); -} -static void hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, a, p, pa, domino; - - arg->currpiv = hqr_low_greedy_currpiv; - arg->nextpiv = hqr_low_greedy_nextpiv; - arg->prevpiv = hqr_low_greedy_prevpiv; + /* no next of type 2, we reset start to search the next 3 */ + start = gmt; + lstart = arg->llvl->ldd * a; - mt = arg->ldd; - a = arg->a; - p = arg->p; - pa = p * a; - domino = arg->domino; + case LIBHQR_KILLED_BY_DISTTREE: - if ( domino ) - { - int j, k, height, start, end, firstk = 0; - int *nT, *nZ; - - arg->minMN = libhqr_imin( minMN, mt*a ); - minMN = arg->minMN; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); - - nT = (int*)malloc(minMN*sizeof(int)); - nZ = (int*)malloc(minMN*sizeof(int)); - - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - - k = 0; - while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - ( firstk / a ) ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - if ( (( firstk % a) != a-1 ) - && ( firstk < minMN-1 ) ) - nT[firstk+1]++; - firstk++; - } - k = firstk; - continue; + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { + return gmt; } - if (k < minMN-1) nT[k+1] += height; - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - - for( j=start; j > end; j-- ) { - ipiv[ k*mt + j ] = (j - height); + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; } - k++; - if (k > minMN-1) k = firstk; + default: + return gmt; } - - free(nT); - free(nZ); - } - else - { - int j, k, myrank, height, start, end, firstk = 0; - int *nT2DO = (int*)malloc(minMN*sizeof(int)); - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*mt*p); - - for ( myrank=0; myrank<p; myrank++ ) { - int lminMN = minMN; - - memset( nT2DO, 0, minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - - for(k=0; k<lminMN; k++) { - nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - if ( nT2DO[k] == 0 ) { - lminMN = k; - break; - } - } - - k = 0; firstk = 0; - while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) && - ( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) ) - && ( firstk < lminMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < lminMN) && - ( nT[firstk] == nT2DO[firstk] ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - if ( ( firstk < lminMN-1 ) && - (( (firstk) % pa) != ((a-1)*p+myrank) ) ) - nT[firstk+1]++; - firstk++; - } - k = firstk; - continue; - } - - if (k < lminMN-1) nT[k+1] += height; - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - - for( j=start; j > end; j-- ) { - ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); - } - - k++; - if (k > lminMN-1) k = firstk; - } - } - - free(nT2DO); - free(nT); - free(nZ); - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*mt + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += mt*minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k*mt + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * HQR_LOW_GREEDY1P_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - if (qrpiv->domino) - return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; - else - return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) - + ( ( m / qrpiv->p ) / qrpiv->a ) ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int p_pa = p / qrpiv->p / qrpiv->a; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int pa = qrpiv->p * qrpiv->a; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = p / pa; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - - return (qrpiv->ldd); -} - -static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, a, p, pa, domino; - int j, k, height, start, end, nT, nZ; - - arg->currpiv = hqr_low_greedy1p_currpiv; - arg->nextpiv = hqr_low_greedy1p_nextpiv; - arg->prevpiv = hqr_low_greedy1p_prevpiv; - - mt = arg->ldd; - a = arg->a; - p = arg->p; - pa = p * a; - domino = arg->domino; - - /* This section has not been coded yet, and will perform a classic greedy */ - if ( domino ) - { - arg->minMN = libhqr_imin( minMN, mt*a ); - minMN = arg->minMN; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++) { - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - (k / a), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ k*mt + j ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - else - { - int myrank; - end = 0; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*mt*p); - - for ( myrank=0; myrank<p; myrank++ ) { - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++) { - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - /* No more computations on this node */ - if ( nT == 0 ) { - break; - } - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*mt + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += mt*minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k*mt + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * HQR_HIGH_FLAT_TREE - ***************************************************/ -static int hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - (void)arg; - (void)m; - return k; -}; - -static int hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - if ( p == k && arg->ldd > 1 ) { - if ( start == arg->ldd ) - return p+1; - else if ( start < arg->ldd && (start-k < arg->p-1) ) - return start+1; - } - return arg->ldd; -}; - -static int hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - assert( arg->p > 1 ); - if ( p == k && arg->ldd > 1 ) { - if ( start == p && p != arg->ldd-1 ) - return libhqr_imin( p + arg->p - 1, arg->ldd - 1 ); - else if ( start > p + 1 && (start-k < arg->p)) - return start-1; - } - return arg->ldd; -}; - -static void hqr_high_flat_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_high_flat_currpiv; - arg->nextpiv = hqr_high_flat_nextpiv; - arg->prevpiv = hqr_high_flat_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_HIGH_BINARY_TREE - ***************************************************/ -static int hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - int tmp1 = m - k; - int tmp2 = 1; - (void)arg; - - if ( tmp1 == 0) - return 0; - while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { - tmp1 = tmp1 >> 1; - tmp2 = tmp2 << 1; - } - assert( m - tmp2 >= k ); - return m - tmp2; -}; - -static int hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int tmpp, bit; - myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) ); - - if ( start <= p ) - return arg->ldd; - - int offset = p - k; - bit = 0; - if (start != arg->ldd) { - while( ( (start - k) & (1 << bit ) ) == 0 ) - bit++; - bit++; - } - - tmpp = offset | (1 << bit); - if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) ) - return tmpp + k; - else - return arg->ldd; -}; - - -static int hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int offset = p - k; - - myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) ); - - if ( (start == p) && ( offset%2 == 0 ) ) { - int i, bit, tmp; - if ( offset == 0 ) - bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); - else { - bit = 0; - while( (offset & (1 << bit )) == 0 ) - bit++; - } - for( i=bit; i>-1; i--){ - tmp = offset | (1 << i); - if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) ) - return tmp+k; - } - return arg->ldd; - } - - if ( (start - p) > 1 ) - return p + ( (start - p) >> 1 ); - else { - return arg->ldd; - } -}; - -static void hqr_high_binary_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_high_binary_currpiv; - arg->nextpiv = hqr_high_binary_nextpiv; - arg->prevpiv = hqr_high_binary_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_HIGH_FIBONACCI_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - return (qrpiv->ipiv)[ m-k ] + k; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { - int i; - myassert( p >= k && start >= p && start-k <= qrpiv->p); - - int lp = p - k; - int lstart= start - k; - int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p); - for( i=lstart+1; i<end; i++ ) - if ( (qrpiv->ipiv)[i] == lp ) - return i+k; - return qrpiv->ldd; -} - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { - int i; - myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) ); - - for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- ) - if ( (qrpiv->ipiv)[i] == (p-k) ) - return i + k; - return (qrpiv->ldd); -} - -static void hqr_high_fibonacci_init(hqr_subpiv_t *arg){ - int *ipiv; - int p; - - arg->currpiv = hqr_high_fibonacci_currpiv; - arg->nextpiv = hqr_high_fibonacci_nextpiv; - arg->prevpiv = hqr_high_fibonacci_prevpiv; - - p = arg->p; - - arg->ipiv = (int*)malloc( p * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*sizeof(int)); - - /* - * Fibonacci of order 1: - * u_(n+1) = u_(n) + 1 - */ - { - int f1, k, m; - - /* Fill in the first column */ - f1 = 1; - for (m=1; m < p; ) { - for (k=0; (k < f1) && (m < p); k++, m++) { - ipiv[m] = m - f1; - } - f1++; - } - } -}; - -/**************************************************** - * HQR_HIGH_GREEDY_TREE (1 panel duplicated) - ***************************************************/ -static void hqr_high_greedy1p_init(hqr_subpiv_t *arg){ - int *ipiv; - int mt, p; - - arg->currpiv = hqr_high_fibonacci_currpiv; - arg->nextpiv = hqr_high_fibonacci_nextpiv; - arg->prevpiv = hqr_high_fibonacci_prevpiv; - - mt = arg->ldd; - p = arg->p; - - arg->ipiv = (int*)malloc( p * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*sizeof(int)); - - { - int minMN = 1; - int j, k, height, start, end, firstk = 0; - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - nZ[0] = libhqr_imax( mt - p, 0 ); - for(k=1; k<minMN; k++) { - height = libhqr_imax(mt-k-p, 0); - nT[k] = height; - nZ[k] = height; - } - - k = 0; - while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - firstk ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - firstk++; - } - k = firstk; - continue; - } - - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - if (k < minMN-1) nT[k+1] = nZ[k]; - - for( j=start; j > end; j-- ) { - ipiv[ k*p + j-k ] = (j - height); - } - - k++; - if (k > minMN-1) k = firstk; - } - - free(nT); - free(nZ); - } -}; - -/**************************************************** - * HQR_HIGH_GREEDY_TREE - ***************************************************/ -static int hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - myassert( m >= k && m < k+arg->p ); - return (arg->ipiv)[ k * (arg->p) + (m - k) ]; -}; - -static int hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int i; - myassert( (start >= k && start < k+arg->p) || start == arg->ldd ); - for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- ) - if ( (arg->ipiv)[i-k + k* (arg->p)] == p ) - return i; - return (arg->ldd); -}; - -static int hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int i; - myassert( (start >= k && start < k+arg->p) || start == p ); - for( i=start-k+1; i<arg->p; i++ ) - if ( (arg->ipiv)[i + k * (arg->p)] == p ) - return k+i; - return arg->ldd; -}; - -static void hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, p; - - arg->currpiv = hqr_high_greedy_currpiv; - arg->nextpiv = hqr_high_greedy_nextpiv; - arg->prevpiv = hqr_high_greedy_prevpiv; - - mt = arg->ldd; - p = arg->p; - - arg->ipiv = (int*)malloc( p * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*minMN*sizeof(int)); - - { - int j, k, height, start, end, firstk = 0; - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - nZ[0] = libhqr_imax( mt - p, 0 ); - for(k=1; k<minMN; k++) { - height = libhqr_imax(mt-k-p, 0); - nT[k] = height; - nZ[k] = height; - } - - k = 0; - while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - firstk ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - firstk++; - } - k = firstk; - continue; - } - - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - if (k < minMN-1) nT[k+1] = nZ[k]; - - for( j=start; j > end; j-- ) { - ipiv[ k*p + j-k ] = (j - height); - } - - k++; - if (k > minMN-1) k = firstk; - } - - free(nT); - free(nZ); - } -}; - -/**************************************************** - * - * Generic functions currpiv,prevpiv,nextpiv - * - ***************************************************/ -static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, tmpk; - int lm, rank; - int a = qrtree->a; - int p = qrtree->p; - int domino = qrtree->domino; - int gmt = qrtree->mt; - - lm = m / p; /* Local index in the distribution over p domains */ - rank = m % p; /* Staring index in this distribution */ - - /* TS level common to every case */ - if ( domino ) { - switch( hqr_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - if ( tmp == k / a ) - return k * p + rank ; /* Below to the first bloc including the diagonal */ - else - return tmp * a * p + rank ; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank; - break; - case 2: - return m - p; - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - } - else { - switch( hqr_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; - break; - case 2: - return m - p; - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - } -}; - -/** - * hqr_nextpiv - Computes the next row killed by the row p, after - * it has kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the next line killed by p after start - * start must be equal to A.mt to find the first row killed by p. - * if start != A.mt, start must be killed by p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the following row killed by p if it exists, A->mt otherwise - */ -static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int a = qrtree->a; - int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - - /* Local index in the distribution over p domains */ - lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p; - - myassert( start > pivot && pivot >= k ); - myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) ); - - /* TS level common to every case */ - ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1; - lp = hqr_gettype( qrtree, k, pivot ); - - switch( ls ) - { - case -1: - - if ( lp == LIBHQR_KILLED_BY_TS ) { - myassert( start == gmt ); - return gmt; - } - - case LIBHQR_KILLED_BY_TS: - - /* If the tile is over the diagonal of step k, skip directly to type 2 */ - if ( arg->domino && lpivot < k ) - goto next_2; - - if ( start == gmt ) - nextp = pivot + p; - else - nextp = start + p; - - if ( ( nextp < gmt ) && - ( nextp < pivot + a*p ) && - ( (nextp/p)%a != 0 ) ) - return nextp; - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_LOCALTREE: - - /* If the tile is over the diagonal of step k, skip directly to type 2 */ - if ( arg->domino && lpivot < k ) - goto next_2; - - /* Get the next pivot for the low level tree */ - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == arg->llvl->ldd-1) ) - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != arg->llvl->ldd ) - return tmp * a * p + rpivot; - - next_2: - /* no next of type 1, we reset start to search the next 2 */ - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_DOMINO: - - if ( lp < LIBHQR_KILLED_BY_DOMINO ) { - return gmt; - } - - /* Type 2 are killed only once if they are strictly in the band */ - if ( arg->domino && - (start == gmt) && - (lpivot < k) && - (pivot+p < gmt) ) { - return pivot+p; - } - - /* no next of type 2, we reset start to search the next 3 */ - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_DISTTREE: - - if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { - return gmt; - } - - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - default: - return gmt; - } -} - -/** - * hqr_prevpiv - Computes the previous row killed by the row p, before - * to kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the previous line killed by p before start - * start must be killed by p, and start must be greater or equal to p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the previous row killed by p if it exists, A->mt otherwise - */ -static int -hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int a = qrtree->a; - int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - lstart = start / p; /* Local index in the distribution over p domains */ - - myassert( start >= pivot && pivot >= k && start < gmt ); - myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) ); - - /* T Slevel common to every case */ - ls = hqr_gettype( qrtree, k, start ); - lp = hqr_gettype( qrtree, k, pivot ); - - if ( lp == LIBHQR_KILLED_BY_TS ) - return gmt; - - myassert( lp >= ls ); - switch( ls ) - { - case LIBHQR_KILLED_BY_DISTTREE: - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - start = pivot; - lstart = pivot / p; - - case LIBHQR_KILLED_BY_DOMINO: - /* If the tile is over the diagonal of step k, process it as type 2 */ - if ( arg->domino && lpivot < k ) { - - if ( ( start == pivot ) && - (start+p < gmt ) ) - return start+p; - - if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) - return gmt; - } - - start = pivot; - lstart = pivot / p; - - /* If it is the 'local' diagonal block, we go to 1 */ - - case LIBHQR_KILLED_BY_LOCALTREE: - /* If the tile is over the diagonal of step k and is of type 2, - it cannot annihilate type 0 or 1 */ - if ( arg->domino && lpivot < k ) - return gmt; - - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == arg->llvl->ldd-1) ) - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != arg->llvl->ldd ) - return tmp * a * p + rpivot; - - start = pivot; - - case LIBHQR_KILLED_BY_TS: - /* Search for predecessor in TS tree */ - /* if ( ( start+p < gmt ) && */ - /* ( (((start+p) / p) % a) != 0 ) ) */ - /* return start + p; */ - - if ( start == pivot ) { - tmp = lpivot + a - 1 - lpivot%a; - nextp = tmp * p + rpivot; - - while( pivot < nextp && nextp >= gmt ) - nextp -= p; - } else { - nextp = start - p; /*(lstart - 1) * p + rpivot;*/ - } - assert(nextp < gmt); - if ( pivot < nextp ) - return nextp; - - default: - return gmt; - } -}; - -/**************************************************** - * - * Generate the permutation required for the round-robin on TS - * - ***************************************************/ -static void -hqr_genperm( libhqr_tree_t *qrtree ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int m = qrtree->mt; - int n = qrtree->nt; - int a = qrtree->a; - int p = qrtree->p; - int domino = arg->domino; - int minMN = libhqr_imin( m, n ); - int pa = p * a; - int i, j, k; - int nbextra1; - int end2; - int mpa = m % pa; - int endpa = m - mpa; - int *perm; - - arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) ); - perm = arg->perm; - - if ( arg->tsrr ) { - for(k=0; k<minMN; k++) { - for( i=0; i<m+1; i++) { - perm[i] = -1; - } - perm += m+1; - } - perm = arg->perm; - for(k=0; k<minMN; k++) { - nbextra1 = nbextra1_formula; - - end2 = p + ( domino ? k*p : k + nbextra1 ); - end2 = (( end2 + pa - 1 ) / pa ) * pa; - end2 = libhqr_imin( end2, m ); - - /* - * All tiles of type 3, 2 and: - * - 1 when domino is disabled - * - 0 before the first multiple of pa under the considered diagonal - */ - for( i=k; i<end2; i++) { - perm[i] = i; - } - - /* All permutations in groups of pa tiles */ - assert( i%pa == 0 || i>=endpa); - for( ; i<endpa; i+=pa ) { - for(j=0; j<pa; j++) { - perm[i+j] = i + ( j + p * (k%a) )%pa; - } - } - - /* Last group of tiles */ - for( ; i<m; i++) { - perm[i] = i; - } - perm[m] = m; - perm += m+1; - } - } - else { - for(k=0; k<minMN; k++) { - for( i=0; i<m+1; i++) { - perm[i] = i; - } - perm += m+1; - } - } -} - -static inline int -hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) -{ - int gmt = qrtree->mt + 1; - int a = qrtree->a; - int p = qrtree->p; - int pa = p * a; - int start = m / pa * pa; - int stop = libhqr_imin( start + pa, gmt ) - start; - int i; - - if (a == 1) - return m; - - for ( i=m%p; i < stop; i+=p ) { - //if( perm[i] == m ) - if( i == m ) - return i+start; - } - - /* We should never arrive here */ - myassert( 0 ); -} - -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_hqr_init - Creates the tree structure that will describes the - * operation performed during QR/LQ factorization with parameterized QR/LQ - * algorithms family. - * - * Trees available parameters are described below. It is recommended to: - * - set p to the same value than the P-by-Q process grid used to distribute - * the data. (P for QR factorization, Q for LQ factorization). - * - set the low level tree to LIBHQR_GREEDY_TREE. - * - set the high level tree to: - * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides - * by two the volume of communication of any other tree. - * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or - * small and fat (LQ), because it reduces the critical path length. - * - Disable the domino effect when problem is square, to keep high efficiency - * kernel proportion high. - * - Enable the domino effect when problem is tall and skinny (QR) or - * small and fat (LQ) to increase parallelism and reduce critical path length. - * - Round-robin on TS domain (tsrr) option should be disabled. It is - * experimental and is not safe. - * - * These are the default when a parameter is set to -1; - * - * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf - * - ******************************************************************************* - * - * @param[inout] qrtree - * On entry, an allocated structure uninitialized. - * On exit, the structure initialized according to the parameter given. - * - * @param[in] trans - * @arg QR: Structure is initialized for QR factorization. - * @arg LQ: Structure is initialized for LQ factorization. - * - * @param[in] A - * Descriptor of the distributed matrix A to be factorized, on which - * QR/LQ factorization will be performed. - * The descriptor is untouched and only mt/nt/P parameters are used. - * - * @param[in] type_llvl - * Defines the tree used to reduce the main tiles of each local domain - * together. The matrix of those tiles has a lower triangular structure - * with a diagonal by step a. - * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local - * tiles. - * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local - * tiles. - * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the - * local tiles. - * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local - * tiles. - * @arg -1: The default is used (LIBHQR_GREEDY_TREE) - * - * @param[in] type_hlvl - * Defines the tree used to reduce the main tiles of each domain. This - * is a band lower diagonal matrix of width p. - * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. - * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the - * tiles. - * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first - * column and then duplicated on all others. - * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) - * - * @param[in] a - * Defines the size of the local domains on which a classic flat TS - * tree is performed. If a==1, then all local tiles are reduced by the - * type_lllvl tree. If a is larger than mt/p, then no local reduction - * tree is performed and type_llvl is ignored. - * If a == -1, it is set to 4 by default. - * - * @param[in] p - * Defines the number of distributed domains, ie the width of the high - * level reduction tree. If p == 1, no high level reduction tree is - * used. If p == mt, a and type_llvl are ignored since only high level - * reduction are performed. - * By default, it is recommended to set p to P if trans == - * PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to - * distributed the data. (p > 0) - * - * @param[in] domino - * Enable/disable the domino effect that connects the high and low - * level reduction trees. Enabling the domino increases the proportion - * of TT (Triangle on top of Triangle) kernels that are less efficient, - * but increase the pipeline effect between independent factorization - * steps, reducin the critical path length. - * If disabled, it keeps the proprotion of more efficient TS (Triangle - * on top of Square) kernels high, but deteriorates the pipeline - * effect, and the critical path length. - * If domino == -1, it is enable when ration between M and N is lower - * than 1/2, and disabled otherwise. - * - * @param[in] tsrr - * Enable/Disable a round robin selection of the killer in local - * domains reduced by TS kernels. Enabling a round-robin selection of - * the killer allows to take benefit of the good pipelining of the flat - * trees and the high efficient TS kernels, while having other trees on - * top of it to reduce critical path length. - * WARNING: This option is under development and should not be enabled - * due to problem in corner cases. - * - ******************************************************************************* - * - * @retval -i if the ith parameters is incorrect. - * @retval 0 on success. - * - ******************************************************************************* - * - * @sa libhqr_hqr_finalize - * @sa libhqr_systolic_init - * - ******************************************************************************/ -int -libhqr_hqr_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int type_llvl, int type_hlvl, - int a, int p, - int domino, int tsrr ) -{ - libhqr_tree_t qrtree_init; - double ratio = 0.0; - int low_mt, minMN; - hqr_args_t *arg; - - if (qrtree == NULL) { - fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree"); - return -1; - } - if ((trans != LIBHQR_QR) && - (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_hqr_init, illegal value of trans"); - return -2; - } - if (A == NULL) { - fprintf(stderr, "libhqr_hqr_init, illegal value of A"); - return -3; - } - - /* Compute parameters */ - a = (a == -1) ? 4 : libhqr_imax( a, 1 ); - p = libhqr_imax( p, 1 ); - - /* Domino */ - if ( domino >= 0 ) { - domino = domino ? 1 : 0; - } - else { - if (trans == LIBHQR_QR) { - ratio = ((double)(A->nt) / (double)(A->mt)); - } else { - ratio = ((double)(A->mt) / (double)(A->nt)); - } - if ( ratio >= 0.5 ) { - domino = 0; - } else { - domino = 1; - } - } - - minMN = libhqr_imin(A->mt, A->nt); - - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.domino = domino; - qrtree_init.getnbgeqrf = hqr_getnbgeqrf; - qrtree_init.getm = hqr_getm; - qrtree_init.geti = hqr_geti; - qrtree_init.gettype = hqr_gettype; - qrtree_init.currpiv = hqr_currpiv; - qrtree_init.nextpiv = hqr_nextpiv; - qrtree_init.prevpiv = hqr_prevpiv; - - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = minMN; - - a = libhqr_imin( a, qrtree_init.mt ); - - qrtree_init.a = a; - qrtree_init.p = p; - qrtree_init.args = NULL; - - arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); - arg->domino = domino; - arg->tsrr = tsrr; - arg->perm = NULL; - - arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->hlvl = NULL; - - low_mt = (qrtree_init.mt + p * a - 1) / ( p * a ); - - arg->llvl->minMN = minMN; - arg->llvl->ldd = low_mt; - arg->llvl->a = a; - arg->llvl->p = p; - arg->llvl->domino = domino; - - switch( type_llvl ) { - case LIBHQR_FLAT_TREE : - hqr_low_flat_init(arg->llvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_low_fibonacci_init(arg->llvl, minMN); - break; - case LIBHQR_BINARY_TREE : - hqr_low_binary_init(arg->llvl); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_low_greedy1p_init(arg->llvl, minMN); - break; - case LIBHQR_GREEDY_TREE : - default: - hqr_low_greedy_init(arg->llvl, minMN); - } - - if ( p > 1 ) { - arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - - arg->llvl->minMN = minMN; - arg->hlvl->ldd = qrtree_init.mt; - arg->hlvl->a = a; - arg->hlvl->p = p; - arg->hlvl->domino = domino; - - switch( type_hlvl ) { - case LIBHQR_FLAT_TREE : - hqr_high_flat_init(arg->hlvl); - break; - case LIBHQR_GREEDY_TREE : - hqr_high_greedy_init(arg->hlvl, minMN); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_high_greedy1p_init(arg->hlvl); - break; - case LIBHQR_BINARY_TREE : - hqr_high_binary_init(arg->hlvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_high_fibonacci_init(arg->hlvl); - break; - default: - if ( ratio >= 0.5 ) { - hqr_high_flat_init(arg->hlvl); - } else { - hqr_high_fibonacci_init(arg->hlvl); - } - } - } - - qrtree_init.args = (void*)arg; - hqr_genperm( &qrtree_init ); - } - - /* Initialize the final QR tree */ - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); - qrtree->getnbgeqrf = hqr_getnbgeqrf; - qrtree->getm = hqr_getm; - qrtree->geti = hqr_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); - - /* Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); - - /* Free the initial qrtree */ - libhqr_hqr_finalize( &qrtree_init ); - - return 0; -} - -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to - * libhqr_hqr_init(). - * - ******************************************************************************* - * - * @param[in,out] qrtree - * On entry, an allocated structure to destroy. - * On exit, the structure is destroy and cannot be used. - * - ******************************************************************************* - * - * @sa libhqr_hqr_init - * - ******************************************************************************/ -void -libhqr_hqr_finalize( libhqr_tree_t *qrtree ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - - if (arg != NULL) { - if ( arg->llvl != NULL) { - if ( arg->llvl->ipiv != NULL ) - free( arg->llvl->ipiv ); - free( arg->llvl ); - } - - if ( arg->hlvl != NULL) { - if ( arg->hlvl->ipiv != NULL ) - free( arg->hlvl->ipiv ); - free( arg->hlvl ); - } - - if ( arg->perm != NULL ) - free(arg->perm); - - free(arg); - } -} - - -/* - * Common functions - */ -static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); -static int svd_getm( const libhqr_tree_t *qrtree, int k, int i ); -static int svd_geti( const libhqr_tree_t *qrtree, int k, int m ); -static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m ); - -#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) ) -#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] ) - -/* - * Extra parameter: - * gmt - Global number of tiles in a column of the complete distributed matrix - * Return: - * The number of geqrt to execute in the panel k - */ -static int -svd_getnbgeqrf( const libhqr_tree_t *qrtree, - int k ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int gmt = qrtree->mt; - int a = svd_geta(arg, k); - int pa = p * a; - int nb_1, nb_2, nb_3; - int nb_11, nb_12; - - /* Number of tasks of type 3 */ - nb_3 = p; - - /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ - nb_2 = nbextra1_formula; - - /* First multiple of p*a under the diagonal of step 1 */ - nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; - - /* Last multiple of p*a lower than A->mt */ - nb_12 = ( gmt / pa ) * pa; - - /* Number of tasks of type 1 between nb_11 and nb_12 */ - nb_1 = (nb_12 - nb_11) / a; - - /* Add leftover */ - nb_1 += libhqr_imin( p, gmt - nb_12 ); - - return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); -} - -/* - * Extra parameter: - * i - indice of the geqrt in the continuous space - * Return: - * The global indice m of the i th geqrt in the panel k - */ -static int -svd_getm( const libhqr_tree_t *qrtree, - int k, int i ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + nbextra1; - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( i < nb23 ) - return k+i; - /* Tile of type 1 */ - else { - j = i - nb23; - pa = p * a; - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - return pos1 + (j/p) * pa + j%p; - } -} - -/* - * Extra parameter: - * m - The global indice m of a geqrt in the panel k - * Return: - * The index i of the geqrt in the panel k - */ -static int -svd_geti( const libhqr_tree_t *qrtree, - int k, int m ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + nbextra1; - int end2 = p + k + nbextra1; - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( m < end2 ) - return m-k; - /* Tile of type 1 */ - else { - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - j = m - pos1; - return nb23 + (j / pa) * p + j%pa; - } -} - -/* - * Extra parameter: - * m - The global indice m of the row in the panel k - * Return - * -1 - Error - * 0 - if m is reduced thanks to a TS kernel - * 1 - if m is reduced thanks to the low level tree - * 2 - if m is reduced thanks to the bubble tree - * 3 - if m is reduced in distributed - */ -static int -svd_gettype( const libhqr_tree_t *qrtree, - int k, int m ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - /* Element to be reduce in distributed */ - if (m < k + p) { - return 3; - } - /* Lower triangle of the matrix */ - else { - if( (m / p) % a == 0 ) - return 1; - else - return 0; - } -} - -/**************************************************** - * SVD_LOW_ADAPTIV_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int -svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg, - int k, int m ) -{ - int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd; - int a = ipiv[0]; - - ipiv+=2; - return ipiv[ ( m / arg->p ) / a ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int -svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg, - int k, int p, int start_pa ) -{ - int i; - int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd; - int a = ipiv[0]; - int ldd = ipiv[1]; - int p_pa = p / arg->p / a; - - ipiv+=2; - for( i=start_pa+1; i<ldd; i++ ) - if ( ipiv[i] == p_pa ) - return i; - return i; -} - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int -svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg, - int k, int p, int start_pa ) -{ - int i; - int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd; - int a = ipiv[0]; - int ldd = ipiv[1]; - int pa = arg->p * a; - int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a; - int p_pa = p / pa; - - ipiv+=2; - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i] == p_pa ) - return i; - - return ldd; -} - -static void -svd_low_adaptiv_init(hqr_subpiv_t *arg, - int gmt, int gnt, int nbcores, int ratio) -{ - int *ipiv; - int mt, a, p, pa, maxmt, myrank; - int j, k, height, start, end, nT, nZ; - int minMN = libhqr_imin(gmt, gnt); - - arg->currpiv = svd_low_adaptiv_currpiv; - arg->nextpiv = svd_low_adaptiv_nextpiv; - arg->prevpiv = svd_low_adaptiv_prevpiv; - - p = arg->p; - - end = 0; - - /** - * Compute the local greedy tree of each column, on each node - */ - maxmt = 1; - for(k=0; k<minMN; k++) { - /** - * The objective is to have at least two columns of TS to reduce per - * core, so it must answer the following inequality: - * ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores ); - * so, - * a <= mt * (gnt-k) / (ratio * nbcores ) - */ - height = libhqr_iceil( gmt-k, p ); - a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); - - /* Now let's make sure all sub-parts are equilibrate */ - j = libhqr_iceil( height, a ); - a = libhqr_iceil( gmt-k, j ); - - /* Compute max dimension of the tree */ - mt = libhqr_iceil( gmt, p * a ); - maxmt = libhqr_imax( mt, maxmt ); - } - - arg->ldd = maxmt + 2; - arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p ); - - for ( myrank=0; myrank<p; myrank++ ) { - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++, ipiv += arg->ldd) { - /** - * The objective is to have at least two columns of TS to reduce per - * core, so it must answer the following inequality: - * (ldd / a ) * (gnt-k) >= ( ratio * nbcores ); - * so, - * a <= mt * (gnt-k) / (ratio * nbcores ) - */ - height = libhqr_iceil( gmt-k, p ); - a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); - - /* Now let's make sure all sub-parts are equilibrate */ - j = libhqr_iceil( height, a ); - a = libhqr_iceil( gmt-k, j ); - - pa = p * a; - mt = libhqr_iceil( gmt, pa ); - ipiv[0] = a; - ipiv[1] = mt; - - assert( a > 0 ); - assert( mt < arg->ldd-1 ); - - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - assert( nT <= mt ); - - /* No more computations on this node */ - if ( nT == 0 ) { - continue; - } - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ j+2 ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*(arg->ldd) + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += arg->ldd * minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k * arg->ldd + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * - * Generic functions currpiv,prevpiv,nextpiv - * - ***************************************************/ -static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, tmpk; - int lm, rank; - int a = svd_geta( arg, k ); - int p = qrtree->p; - int gmt = qrtree->mt; - - lm = m / p; /* Local index in the distribution over p domains */ - rank = m % p; /* Staring index in this distribution */ - - /* TS level common to every case */ - switch( svd_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; - break; - case 2: - assert(0); - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - return -1; -}; +} /** - * svd_nextpiv - Computes the next row killed by the row p, after - * it has kill the row start. + * hqr_prevpiv - Computes the previous row killed by the row p, before + * to kill the row start. * * @param[in] k * Factorization step @@ -2434,195 +459,216 @@ static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) * Line used as killer * * @param[in] start - * Starting point to search the next line killed by p after start - * start must be equal to A.mt to find the first row killed by p. - * if start != A.mt, start must be killed by p + * Starting point to search the previous line killed by p before start + * start must be killed by p, and start must be greater or equal to p * * @return: * - -1 if start doesn't respect the previous conditions - * - m, the following row killed by p if it exists, A->mt otherwise + * - m, the previous row killed by p if it exists, A->mt otherwise */ -static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +static int +hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; - int rpivot, lstart; - int *ipiv = svd_getipiv( arg, k ); - int a = ipiv[0]; - int ldd = ipiv[1]; + int lpivot, rpivot, lstart; + int a = qrtree->a; int p = qrtree->p; int gmt = qrtree->mt; + lpivot = pivot / p; /* Local index in the distribution over p domains */ rpivot = pivot % p; /* Staring index in this distribution */ + lstart = start / p; /* Local index in the distribution over p domains */ - /* Local index in the distribution over p domains */ - lstart = ( start == gmt ) ? ldd * a : start / p; + myassert( start >= pivot && pivot >= k && start < gmt ); + myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) ); - myassert( start > pivot && pivot >= k ); - myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) ); + /* T Slevel common to every case */ + ls = hqr_gettype( qrtree, k, start ); + lp = hqr_gettype( qrtree, k, pivot ); - /* TS level common to every case */ - ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1; - lp = svd_gettype( qrtree, k, pivot ); + if ( lp == LIBHQR_KILLED_BY_TS ) + return gmt; + myassert( lp >= ls ); switch( ls ) { + case LIBHQR_KILLED_BY_DISTTREE: + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + start = pivot; + lstart = pivot / p; + case LIBHQR_KILLED_BY_DOMINO: - assert(0); - case -1: + /* If the tile is over the diagonal of step k, process it as type 2 */ + if ( arg->domino && lpivot < k ) { - if ( lp == LIBHQR_KILLED_BY_TS ) { - myassert( start == gmt ); - return gmt; + if ( ( start == pivot ) && + (start+p < gmt ) ) + return start+p; + + if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) + return gmt; } - case LIBHQR_KILLED_BY_TS: - if ( start == gmt ) - nextp = pivot + p; - else - nextp = start + p; + start = pivot; + lstart = pivot / p; - if ( ( nextp < gmt ) && - ( nextp < pivot + a*p ) && - ( (nextp/p)%a != 0 ) ) - return nextp; - start = gmt; - lstart = ldd * a; + /* If it is the 'local' diagonal block, we go to 1 */ case LIBHQR_KILLED_BY_LOCALTREE: - /* Get the next pivot for the low level tree */ - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); + /* If the tile is over the diagonal of step k and is of type 2, + it cannot annihilate type 0 or 1 */ + if ( arg->domino && lpivot < k ) + return gmt; + + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); if ( (tmp * a * p + rpivot >= gmt) - && (tmp == ldd-1) ) - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); + && (tmp == arg->llvl->ldd-1) ) + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); - if ( tmp != ldd ) + if ( tmp != arg->llvl->ldd ) return tmp * a * p + rpivot; - /* no next of type 1, we reset start to search the next 2 */ - start = gmt; - lstart = ldd * a; + start = pivot; - case LIBHQR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_TS: + /* Search for predecessor in TS tree */ + /* if ( ( start+p < gmt ) && */ + /* ( (((start+p) / p) % a) != 0 ) ) */ + /* return start + p; */ - if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { - return gmt; - } + if ( start == pivot ) { + tmp = lpivot + a - 1 - lpivot%a; + nextp = tmp * p + rpivot; - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; + while( pivot < nextp && nextp >= gmt ) + nextp -= p; + } else { + nextp = start - p; /*(lstart - 1) * p + rpivot;*/ } + assert(nextp < gmt); + if ( pivot < nextp ) + return nextp; default: return gmt; } -} +}; -/** - * svd_prevpiv - Computes the previous row killed by the row p, before - * to kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer +/**************************************************** * - * @param[in] start - * Starting point to search the previous line killed by p before start - * start must be killed by p, and start must be greater or equal to p + * Generate the permutation required for the round-robin on TS * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the previous row killed by p if it exists, A->mt otherwise - */ -static int -svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) + ***************************************************/ +static void +hqr_genperm( libhqr_tree_t *qrtree ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int *ipiv = svd_getipiv( arg, k ); - int a = ipiv[0]; - int ldd = ipiv[1]; + int m = qrtree->mt; + int n = qrtree->nt; + int a = qrtree->a; int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - lstart = start / p; /* Local index in the distribution over p domains */ - - myassert( start >= pivot && pivot >= k && start < gmt ); - myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) ); - - /* TS level common to every case */ - ls = svd_gettype( qrtree, k, start ); - lp = svd_gettype( qrtree, k, pivot ); + int domino = arg->domino; + int minMN = libhqr_imin( m, n ); + int pa = p * a; + int i, j, k; + int nbextra1; + int end2; + int mpa = m % pa; + int endpa = m - mpa; + int *perm; - if ( lp == LIBHQR_KILLED_BY_TS ) - return gmt; + arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) ); + perm = arg->perm; - myassert( lp >= ls ); - switch( ls ) - { - case LIBHQR_KILLED_BY_DOMINO: - assert(0); - case LIBHQR_KILLED_BY_DISTTREE: - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; + if ( arg->tsrr ) { + for(k=0; k<minMN; k++) { + for( i=0; i<m+1; i++) { + perm[i] = -1; } + perm += m+1; + } + perm = arg->perm; + for(k=0; k<minMN; k++) { + nbextra1 = nbextra1_formula; - start = pivot; - lstart = pivot / p; - - case LIBHQR_KILLED_BY_LOCALTREE: - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); + end2 = p + ( domino ? k*p : k + nbextra1 ); + end2 = (( end2 + pa - 1 ) / pa ) * pa; + end2 = libhqr_imin( end2, m ); - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == ldd-1) ) - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); + /* + * All tiles of type 3, 2 and: + * - 1 when domino is disabled + * - 0 before the first multiple of pa under the considered diagonal + */ + for( i=k; i<end2; i++) { + perm[i] = i; + } - if ( tmp != ldd ) - return tmp * a * p + rpivot; + /* All permutations in groups of pa tiles */ + assert( i%pa == 0 || i>=endpa); + for( ; i<endpa; i+=pa ) { + for(j=0; j<pa; j++) { + perm[i+j] = i + ( j + p * (k%a) )%pa; + } + } - start = pivot; + /* Last group of tiles */ + for( ; i<m; i++) { + perm[i] = i; + } + perm[m] = m; + perm += m+1; + } + } + else { + for(k=0; k<minMN; k++) { + for( i=0; i<m+1; i++) { + perm[i] = i; + } + perm += m+1; + } + } +} - case LIBHQR_KILLED_BY_TS: - /* Search for predecessor in TS tree */ - /* if ( ( start+p < gmt ) && */ - /* ( (((start+p) / p) % a) != 0 ) ) */ - /* return perm[start + p]; */ +static inline int +hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) +{ + int gmt = qrtree->mt + 1; + int a = qrtree->a; + int p = qrtree->p; + int pa = p * a; + int start = m / pa * pa; + int stop = libhqr_imin( start + pa, gmt ) - start; + int i; - if ( start == pivot ) { - tmp = lpivot + a - 1 - lpivot%a; - nextp = tmp * p + rpivot; + if (a == 1) + return m; - while( pivot < nextp && nextp >= gmt ) - nextp -= p; - } else { - nextp = start - p; /*(lstart - 1) * p + rpivot;*/ - } - assert(nextp < gmt); - if ( pivot < nextp ) - return nextp; + for ( i=m%p; i < stop; i+=p ) { + //if( perm[i] == m ) + if( i == m ) + return i+start; + } - default: - return gmt; - } -}; + /* We should never arrive here */ + myassert( 0 ); +} /** ******************************************************************************* * - * @ingroup libhqr + * @ingroup dplasma * - * libhqr_svd_init - Create the tree structures that will describes the - * operation performed during QR/LQ reduction step of the gebrd_ge2gb operation. + * libhqr_hqr_init - Creates the tree structure that will describes the + * operation performed during QR/LQ factorization with parameterized QR/LQ + * algorithms family. * * Trees available parameters are described below. It is recommended to: * - set p to the same value than the P-by-Q process grid used to distribute @@ -2646,21 +692,33 @@ svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) * ******************************************************************************* * - * @param[in,out] qrtree + * @param[inout] qrtree * On entry, an allocated structure uninitialized. - * On exit, the structure initialized according to the given parameters. + * On exit, the structure initialized according to the parameter given. * * @param[in] trans - * @arg LIBHQR_QR: Structure is initialized for the QR steps. - * @arg LIBHQR_LQ: Structure is initialized for the LQ steps. + * @arg QR: Structure is initialized for QR factorization. + * @arg LQ: Structure is initialized for LQ factorization. * - * @param[in,out] A + * @param[in] A * Descriptor of the distributed matrix A to be factorized, on which - * QR/LQ reduction steps will be performed. In case, of - * R-bidiagonalization, don't forget to provide the square submatrix - * that is concerned by those operations. + * QR/LQ factorization will be performed. * The descriptor is untouched and only mt/nt/P parameters are used. * + * @param[in] type_llvl + * Defines the tree used to reduce the main tiles of each local domain + * together. The matrix of those tiles has a lower triangular structure + * with a diagonal by step a. + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local + * tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local + * tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * local tiles. + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local + * tiles. + * @arg -1: The default is used (LIBHQR_GREEDY_TREE) + * * @param[in] type_hlvl * Defines the tree used to reduce the main tiles of each domain. This * is a band lower diagonal matrix of width p. @@ -2673,184 +731,358 @@ svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) * column and then duplicated on all others. * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) * + * @param[in] a + * Defines the size of the local domains on which a classic flat TS + * tree is performed. If a==1, then all local tiles are reduced by the + * type_lllvl tree. If a is larger than mt/p, then no local reduction + * tree is performed and type_llvl is ignored. + * If a == -1, it is set to 4 by default. + * * @param[in] p * Defines the number of distributed domains, ie the width of the high * level reduction tree. If p == 1, no high level reduction tree is - * used. If p == mt, this enforce the high level reduction tree to be - * performed on the full matrix. + * used. If p == mt, a and type_llvl are ignored since only high level + * reduction are performed. * By default, it is recommended to set p to P if trans == - * LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid - * used to distributed the data. (p > 0) + * PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to + * distributed the data. (p > 0) * - * @param[in] nbthread_per_node - * Define the number of working threads per node to configure the - * adaptativ local tree to provide at least (ratio * nbthread_per_node) - * tasks per step when possible by creating the right amount of TS and - * TT kernels. + * @param[in] domino + * Enable/disable the domino effect that connects the high and low + * level reduction trees. Enabling the domino increases the proportion + * of TT (Triangle on top of Triangle) kernels that are less efficient, + * but increase the pipeline effect between independent factorization + * steps, reducin the critical path length. + * If disabled, it keeps the proprotion of more efficient TS (Triangle + * on top of Square) kernels high, but deteriorates the pipeline + * effect, and the critical path length. + * If domino == -1, it is enable when ration between M and N is lower + * than 1/2, and disabled otherwise. * - * @param[in] ratio - * Define the minimal number of tasks per thread that the adaptiv tree - * must provide at the lowest level of the tree. + * @param[in] tsrr + * Enable/Disable a round robin selection of the killer in local + * domains reduced by TS kernels. Enabling a round-robin selection of + * the killer allows to take benefit of the good pipelining of the flat + * trees and the high efficient TS kernels, while having other trees on + * top of it to reduce critical path length. + * WARNING: This option is under development and should not be enabled + * due to problem in corner cases. * ******************************************************************************* * - * @return - * \retval -i if the ith parameters is incorrect. - * \retval 0 on success. + * @retval -i if the ith parameters is incorrect. + * @retval 0 on success. * ******************************************************************************* * * @sa libhqr_hqr_finalize - * @sa libhqr_hqr_init - * @sa dplasma_zgeqrf_param - * @sa dplasma_cgeqrf_param - * @sa dplasma_dgeqrf_param - * @sa dplasma_sgeqrf_param + * @sa libhqr_systolic_init * ******************************************************************************/ int -libhqr_svd_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int type_hlvl, int p, int nbthread_per_node, int ratio ) +libhqr_initfct_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) { - int low_mt, minMN, a = -1; + double ratio = 0.0; + int low_mt, minMN; hqr_args_t *arg; - libhqr_tree_t qrtree_init; if (qrtree == NULL) { - fprintf(stderr,"libhqr_svd_init, illegal value of qrtree"); - return -1; + fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree"); + return -1; } if ((trans != LIBHQR_QR) && (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_svd_init, illegal value of trans"); - return -2; + fprintf(stderr, "libhqr_hqr_init, illegal value of trans"); + return -2; } if (A == NULL) { - fprintf(stderr, "libhqr_svd_init, illegal value of A"); - return -3; + fprintf(stderr, "libhqr_hqr_init, illegal value of A"); + return -3; } /* Compute parameters */ + a = (a == -1) ? 4 : libhqr_imax( a, 1 ); p = libhqr_imax( p, 1 ); - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.getnbgeqrf = svd_getnbgeqrf; - qrtree_init.getm = svd_getm; - qrtree_init.geti = svd_geti; - qrtree_init.gettype = svd_gettype; - qrtree_init.currpiv = svd_currpiv; - qrtree_init.nextpiv = svd_nextpiv; - qrtree_init.prevpiv = svd_prevpiv; + /* Domino */ + if ( domino >= 0 ) { + domino = domino ? 1 : 0; + } + else { + if (trans == LIBHQR_QR) { + ratio = ((double)(A->nt) / (double)(A->mt)); + } else { + ratio = ((double)(A->mt) / (double)(A->nt)); + } + if ( ratio >= 0.5 ) { + domino = 0; + } else { + domino = 1; + } + } + + minMN = libhqr_imin(A->mt, A->nt); - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = (trans == LIBHQR_QR) ? A->nt : A->mt; + qrtree->domino = domino; + qrtree->getnbgeqrf = hqr_getnbgeqrf; + qrtree->getm = hqr_getm; + qrtree->geti = hqr_geti; + qrtree->gettype = hqr_gettype; + qrtree->currpiv = hqr_currpiv; + qrtree->nextpiv = hqr_nextpiv; + qrtree->prevpiv = hqr_prevpiv; - qrtree_init.a = a; - qrtree_init.p = p; - qrtree_init.args = NULL; + qrtree->init = LIBHQR_QRTREE_HQR; - arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); - arg->domino = 0; - arg->tsrr = 0; - arg->perm = NULL; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = minMN; - arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->hlvl = NULL; + a = libhqr_imin( a, qrtree->mt ); - minMN = libhqr_imin(A->mt, A->nt); - low_mt = (qrtree_init.mt + p - 1) / ( p ); + qrtree->a = a; + qrtree->p = p; + qrtree->args = NULL; - arg->llvl->minMN = minMN; - arg->llvl->ldd = low_mt; - arg->llvl->a = a; - arg->llvl->p = p; - arg->llvl->domino = 0; + arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); + arg->domino = domino; + arg->tsrr = tsrr; + arg->perm = NULL; - svd_low_adaptiv_init(arg->llvl, qrtree_init.mt, qrtree_init.nt, - nbthread_per_node * (A->nodes / p), ratio ); + arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + arg->hlvl = NULL; + + low_mt = (qrtree->mt + p * a - 1) / ( p * a ); + + arg->llvl->minMN = minMN; + arg->llvl->ldd = low_mt; + arg->llvl->a = a; + arg->llvl->p = p; + arg->llvl->domino = domino; + + switch( type_llvl ) { + case LIBHQR_FLAT_TREE : + hqr_low_flat_init(arg->llvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_low_fibonacci_init(arg->llvl, minMN); + break; + case LIBHQR_BINARY_TREE : + hqr_low_binary_init(arg->llvl); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_low_greedy1p_init(arg->llvl, minMN); + break; + case LIBHQR_GREEDY_TREE : + default: + hqr_low_greedy_init(arg->llvl, minMN); + } - if ( p > 1 ) { - arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + if ( p > 1 ) { + arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->llvl->minMN = minMN; - arg->hlvl->ldd = qrtree_init.mt; - arg->hlvl->a = a; - arg->hlvl->p = p; - arg->hlvl->domino = 0; + arg->llvl->minMN = minMN; + arg->hlvl->ldd = qrtree->mt; + arg->hlvl->a = a; + arg->hlvl->p = p; + arg->hlvl->domino = domino; - switch( type_hlvl ) { - case LIBHQR_FLAT_TREE : + switch( type_hlvl ) { + case LIBHQR_FLAT_TREE : + hqr_high_flat_init(arg->hlvl); + break; + case LIBHQR_GREEDY_TREE : + hqr_high_greedy_init(arg->hlvl, minMN); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_high_greedy1p_init(arg->hlvl); + break; + case LIBHQR_BINARY_TREE : + hqr_high_binary_init(arg->hlvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_high_fibonacci_init(arg->hlvl); + break; + default: + if ( ratio >= 0.5 ) { hqr_high_flat_init(arg->hlvl); - break; - case LIBHQR_GREEDY_TREE : - hqr_high_greedy_init(arg->hlvl, minMN); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_high_greedy1p_init(arg->hlvl); - break; - case LIBHQR_BINARY_TREE : - hqr_high_binary_init(arg->hlvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_high_fibonacci_init(arg->hlvl); - break; - default: + } else { hqr_high_fibonacci_init(arg->hlvl); } } - - qrtree_init.args = (void*)arg; - hqr_genperm( &qrtree_init ); } - /* Initialize the final QR tree */ - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); - qrtree->getnbgeqrf = svd_getnbgeqrf; - qrtree->getm = svd_getm; - qrtree->geti = svd_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); + qrtree->args = (void*)arg; + hqr_genperm( qrtree ); + + return 0; +} + +int +libhqr_initmtx_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) +{ + libhqr_tree_t qrtreefct; + int rc; + + rc = libhqr_initfct_hqr( &qrtreefct, trans, A, + type_llvl, type_hlvl, a, p, + domino, tsrr ); + if ( rc != 0 ) { + return rc; + } - /* Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); + libhqr_fct_to_mtx( &qrtreefct, qrtree ); /* Free the initial qrtree */ - libhqr_hqr_finalize( &qrtree_init ); + libhqr_finalize( &qrtreefct ); return 0; } -void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init){ - int i, minMN, tile_id, p, k; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - minMN = libhqr_imin(qrtree->nt, qrtree->mt); - for (k = 0; k < minMN; k++){ - for (i = 0; i < qrtree->mt; i++){ - tile_id = (k * qrtree->mt) + i; - - arg[tile_id].type = qrtree_init->gettype(qrtree_init, k, i); - assert( (i < k) || (arg[tile_id].type >= 0) ); - arg[tile_id].currpiv = qrtree_init->currpiv(qrtree_init, k, i); - p = arg[tile_id].currpiv; - arg[tile_id].first_nextpiv = qrtree_init->nextpiv(qrtree_init, k, i, qrtree->mt); - arg[tile_id].first_prevpiv = qrtree_init->prevpiv(qrtree_init, k, i, i); - arg[tile_id].nextpiv = qrtree_init->nextpiv(qrtree_init, k, p, i); - arg[tile_id].prevpiv = qrtree_init->prevpiv(qrtree_init, k, p, i); - assert( (arg[tile_id].nextpiv <= qrtree->mt && arg[tile_id].nextpiv >= k) || arg[tile_id].nextpiv==-1 ); - assert( (arg[tile_id].prevpiv <= qrtree->mt && arg[tile_id].prevpiv >= k) || arg[tile_id].prevpiv==-1 ); - assert( (arg[tile_id].first_nextpiv <= qrtree->mt && arg[tile_id].first_nextpiv >= k) || arg[tile_id].first_nextpiv==-1 ); - assert( (arg[tile_id].first_prevpiv <= qrtree->mt && arg[tile_id].first_prevpiv >= k) || arg[tile_id].first_prevpiv==-1 ); +int +libhqr_init_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) +{ + return libhqr_initmtx_hqr( qrtree, trans, A, + type_llvl, type_hlvl, a, p, + domino, tsrr ); + +} + +/** + * libhqr_walk_stepk + * + * @param[in] arg + * Arguments specific to the reduction tree used + * + * @param[in] k + * Factorization step + * + * @param[in] tiles + * Array of size qrtree->mt that stores in the first entries the tiles + * to kill in the right order to walk through the tree. + * + */ +void +libhqr_walk_stepk( const libhqr_tree_t *qrtree, + int k, int *tiles ) +{ + int tsid = -1, ttid = -1; + int p = k; + int pivot = k; + int m = 0; + libhqr_queue_tile_t *tt = libhqr_queue_tile_new(); + libhqr_queue_tile_t *ts = libhqr_queue_tile_new(); + libhqr_queue_tile_push(&tt, k); + + while( tt ) + { + /* Stack all elements killed on the way down */ + p = qrtree->prevpiv(qrtree, k, pivot, p); + while( p != qrtree->mt ) { + /* If it's a TS tile, or a TT at the bottom of the tree that kills noone */ + if( (qrtree->gettype(qrtree, k, p) == 0) || + (qrtree->prevpiv(qrtree, k, p, p) != qrtree->mt) ) + { + libhqr_queue_tile_push(&tt,p); + /* printf("PUSH TT: %d\n", p); */ + } + libhqr_queue_tile_push(&ts, p); + /* printf("PUSH TS: %d\n", p); */ + p = qrtree->prevpiv(qrtree, k, pivot, p); + assert( p != -1 ); + } + + tsid = libhqr_queue_tile_head(&ts); + ttid = libhqr_queue_tile_pop(&tt); + + /* Unstack all element killed by TS, or by TT and already discovered */ + while( (tsid != -1) && + (tsid != ttid) ) + { + /* printf( "TS=%d, TT=%d\n", tsid, ttid ); */ + + tsid = libhqr_queue_tile_pop(&ts); + /* printf("POP TS: %d\n", tsid); */ + /* printf("Call function on (%d, %d)\n", + qrtree->currpiv(qrtree, k, tsid), tsid ); */ + assert(m < (qrtree->mt-1)); + tiles[m] = tsid; + m++; + tsid = libhqr_queue_tile_head(&ts); } + pivot = p = ttid; } - qrtree->args = (void*)arg; + + //assert(m == (qrtree->mt-1)); + assert(ts == NULL); + assert(tt == NULL); } -void libhqr_matrix_finalize(libhqr_tree_t *qrtree){ - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - free(arg); +/** + ******************************************************************************* + * + * @ingroup dplasma + * + * libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to + * libhqr_hqr_init(). + * + ******************************************************************************* + * + * @param[in,out] qrtree + * On entry, an allocated structure to destroy. + * On exit, the structure is destroy and cannot be used. + * + ******************************************************************************* + * + * @sa libhqr_hqr_init + * + ******************************************************************************/ +void +libhqr_finalize( libhqr_tree_t *qrtree ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + + if ( (qrtree->init != LIBHQR_QRTREE_HQR) && + (qrtree->init != LIBHQR_QRTREE_SVD) && + (qrtree->init != LIBHQR_QRTREE_SYS) && + (qrtree->init != LIBHQR_QRTREE_MTX) ) + { + fprintf(stderr, "libhqr_finalize: qrtree not initialized\n" ); + return; + } + + if (arg != NULL) { + + if ( (qrtree->init == LIBHQR_QRTREE_HQR) || + (qrtree->init == LIBHQR_QRTREE_HQR) ) + { + if ( arg->llvl != NULL) { + if ( arg->llvl->ipiv != NULL ) + free( arg->llvl->ipiv ); + free( arg->llvl ); + } + + if ( arg->hlvl != NULL) { + if ( arg->hlvl->ipiv != NULL ) + free( arg->hlvl->ipiv ); + free( arg->hlvl ); + } + + if ( arg->perm != NULL ) + free(arg->perm); + } + + free(arg); + } } diff --git a/src/low_adaptiv.c b/src/low_adaptiv.c index d8abfd5101a30440c81e4b29859b06efc86a2f6e..ba6f30f8ec88472445307e33743168d1cddf3563 100644 --- a/src/low_adaptiv.c +++ b/src/low_adaptiv.c @@ -17,6 +17,7 @@ * */ #include "libhqr_internal.h" +#include <stdlib.h> /* Return the pivot to use for the row m at step k */ inline static int @@ -111,11 +112,9 @@ svd_low_adaptiv_init( hqr_subpiv_t *arg, } arg->ldd = maxmt + 2; - arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p ); + arg->ipiv = (int*)calloc( arg->ldd * minMN * p, sizeof(int) ); ipiv = arg->ipiv; - memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p ); - for ( myrank=0; myrank<p; myrank++ ) { /** diff --git a/src/low_greedy1p.c b/src/low_greedy1p.c index 7d6fa5beaa5ac88999eff8486c97cdf43f3385dc..7ec8692c8a8c70a92a9e95032b2f2855a0932e3f 100644 --- a/src/low_greedy1p.c +++ b/src/low_greedy1p.c @@ -17,6 +17,7 @@ * */ #include "libhqr_internal.h" +#include <stdlib.h> /* Return the pivot to use for the row m at step k */ static inline int @@ -79,9 +80,8 @@ hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN) { arg->minMN = libhqr_imin( minMN, mt*a ); minMN = arg->minMN; - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); /** * Compute the local greedy tree of each column, on each node @@ -110,11 +110,9 @@ hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN) { int myrank; end = 0; - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); + arg->ipiv = (int*)calloc( mt * minMN * p, sizeof(int) ); ipiv = arg->ipiv; - memset( ipiv, 0, minMN*sizeof(int)*mt*p); - for ( myrank=0; myrank<p; myrank++ ) { /** diff --git a/src/svd.c b/src/svd.c index f39bbfb15b7e775cae63d3f0040a6a6142270435..a7151a6ac0b292e8d2e6b4603d97af965339356d 100644 --- a/src/svd.c +++ b/src/svd.c @@ -78,11 +78,7 @@ * These lines are defined by (i-k)/p = 0. */ #include "libhqr_internal.h" -#include <assert.h> #include <stdlib.h> -#include <stdio.h> -#include <math.h> -#include <string.h> /* * Common functions @@ -558,16 +554,16 @@ libhqr_initfct_svd( libhqr_tree_t *qrtree, hqr_args_t *arg; if (qrtree == NULL) { - fprintf(stderr,"libhqr_svd_init, illegal value of qrtree"); + fprintf(stderr,"libhqr_initfct_svd: illegal value of qrtree"); return -1; } if ((trans != LIBHQR_QR) && (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_svd_init, illegal value of trans"); + fprintf(stderr, "libhqr_initfct_svd: illegal value of trans"); return -2; } if (A == NULL) { - fprintf(stderr, "libhqr_svd_init, illegal value of A"); + fprintf(stderr, "libhqr_initfct_svd: illegal value of A"); return -3; } @@ -651,28 +647,18 @@ libhqr_initmtx_svd( libhqr_tree_t *qrtree, libhqr_facto_e trans, libhqr_matrix_t *A, int type_hlvl, int p, int nbthread_per_node, int ratio ) { - libhqr_tree_t qrtree_init; + libhqr_tree_t qrtreefct; int rc; - rc = libhqr_initfct_svd( &qrtree_init, trans, A, type_hlvl, p, nbthread_per_node, ratio ); + rc = libhqr_initfct_svd( &qrtreefct, trans, A, type_hlvl, p, nbthread_per_node, ratio ); if ( rc != 0 ) { return rc; } - /* Initialize the final QR tree */ - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); - qrtree->init = LIBHQR_QRTREE_MTX; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tile_info_t) ); - - /* Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); + libhqr_fct_to_mtx( &qrtreefct, qrtree ); /* Free the initial qrtree */ - libhqr_finalize( &qrtree_init ); + libhqr_finalize( &qrtreefct ); return 0; } diff --git a/src/systolic.c b/src/systolic.c index d66d3bb9666b06bd142ad055f2e7f84d1b665248..8e20a6994af552fe90df9b6f9256350fd0b5e57f 100644 --- a/src/systolic.c +++ b/src/systolic.c @@ -15,34 +15,24 @@ * @date 2017-03-21 * */ -#include "libhqr.h" -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define PRINT_PIVGEN 0 -#ifdef PRINT_PIVGEN -#define myassert( test ) {if ( ! (test) ) return -1;} -#else -#define myassert(test) {assert((test)); return -1;} -#endif - -#define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0 - -static int systolic_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) +#include "libhqr_internal.h" + +static int +systolic_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { int pq = qrtree->p * qrtree->a; return libhqr_imin( pq, qrtree->mt - k); } -static int systolic_getm( const libhqr_tree_t *qrtree, int k, int i ) +static int +systolic_getm( const libhqr_tree_t *qrtree, int k, int i ) { (void)qrtree; return k+i; } -static int systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) +static int +systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) { (void)qrtree; return m-k; @@ -57,7 +47,8 @@ static int systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) * 1 - if m is reduced thanks to the 2nd coordinate flat tree * 3 - if m is reduced thanks to the 1st coordinate flat tree */ -static int systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { +static int +systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { int p = qrtree->p; int q = qrtree->a; int pq = p * q; @@ -80,7 +71,8 @@ static int systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { * Generic functions currpiv,prevpiv,nextpiv * ***************************************************/ -static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) +static int +systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) { int p = qrtree->p; int q = qrtree->a; @@ -100,7 +92,7 @@ static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) default: return qrtree->mt; } -}; +} /** * systolic_nextpiv - Computes the next row killed by the row p, after @@ -121,7 +113,8 @@ static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) * - -1 if start doesn't respect the previous conditions * - m, the following row killed by p if it exists, A->mt otherwise */ -static int systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +static int +systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int q = qrtree->a; @@ -210,7 +203,8 @@ static int systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s * - -1 if start doesn't respect the previous conditions * - m, the previous row killed by p if it exists, A->mt otherwise */ -static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +static int +systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int rpivot; @@ -290,7 +284,7 @@ static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s default: return mt; } -}; +} /** ******************************************************************************* @@ -349,94 +343,78 @@ static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s * ******************************************************************************/ int -libhqr_systolic_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int p, int q ) +libhqr_initfct_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) { - libhqr_tree_t qrtree_init; - if (qrtree == NULL) { - fprintf(stderr, "libhqr_systolic_init, illegal value of qrtree"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of qrtree"); return -1; } + if ((trans != LIBHQR_QR) && (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_systolic_init, illegal value of trans"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of trans"); return -2; } if (A == NULL) { - fprintf(stderr, "libhqr_systolic_init, illegal value of A"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of A"); return -3; } if ( p < 0 ) { - fprintf(stderr, "libhqr_systolic_init, illegal value of p"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of p"); return -4; } if ( q < -1 ) { - fprintf(stderr, "libhqr_systolic_init, illegal value of q"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of q"); return -5; } - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.getnbgeqrf = systolic_getnbgeqrf; - qrtree_init.getm = systolic_getm; - qrtree_init.geti = systolic_geti; - qrtree_init.gettype = systolic_gettype; - qrtree_init.currpiv = systolic_currpiv; - qrtree_init.nextpiv = systolic_nextpiv; - qrtree_init.prevpiv = systolic_prevpiv; - - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - - qrtree_init.a = libhqr_imax( q, 1 ); - qrtree_init.p = libhqr_imax( p, 1 ); - qrtree_init.args = NULL; - } - - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); qrtree->getnbgeqrf = systolic_getnbgeqrf; qrtree->getm = systolic_getm; qrtree->geti = systolic_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); + qrtree->gettype = systolic_gettype; + qrtree->currpiv = systolic_currpiv; + qrtree->nextpiv = systolic_nextpiv; + qrtree->prevpiv = systolic_prevpiv; - /*Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); + qrtree->init = LIBHQR_QRTREE_SYS; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - /* Free the initial qrtree */ - libhqr_systolic_finalize( &qrtree_init ); + qrtree->a = libhqr_imax( q, 1 ); + qrtree->p = libhqr_imax( p, 1 ); + qrtree->args = NULL; return 0; } -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_systolic_finalize - Cleans the qrtree data structure allocated by - * call to libhqr_systolic_init(). - * - ******************************************************************************* - * - * @param[in,out] qrtree - * On entry, an allocated structure to destroy. - * On exit, the structure is destroy and cannot be used. - * - ******************************************************************************* - * - * @sa libhqr_systolic_init - * - ******************************************************************************/ -void -libhqr_systolic_finalize( libhqr_tree_t *qrtree ) + +int +libhqr_initmtx_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) { - if ( qrtree->args != NULL) { - free( qrtree->args ); + libhqr_tree_t qrtreefct; + int rc; + + rc = libhqr_initfct_sys( &qrtreefct, trans, A, p, q ); + if ( rc != 0 ) { + return rc; } + + libhqr_fct_to_mtx( &qrtreefct, qrtree ); + + /* Free the initial qrtree */ + libhqr_finalize( &qrtreefct ); + + return 0; +} + +int +libhqr_init_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) +{ + return libhqr_initmtx_sys( qrtree, trans, A, p, q ); } diff --git a/src/treedraw.c b/src/treedraw.c index 4a22317d6fd35609769721190dc67005035fb4e6..35836b2052d4d848271aa0031bc694e655e952b8 100644 --- a/src/treedraw.c +++ b/src/treedraw.c @@ -13,7 +13,6 @@ * @date 2017-04-04 * */ - #include <string.h> #include <stdio.h> #include "libhqr_draw.h" diff --git a/testings/CMakeLists.txt b/testings/CMakeLists.txt index 1a184dbcc280da64b5503b5eb43b43f9c1bbcc03..a0bf66c7da76e5e89297b81a7428c99feaf4e111 100644 --- a/testings/CMakeLists.txt +++ b/testings/CMakeLists.txt @@ -16,7 +16,6 @@ set(TESTINGS draw_systolic.c testing_hqr.c testing_systolic.c - testing_tileinit.c ) foreach (_file ${TESTINGS}) diff --git a/testings/draw_hqr.c b/testings/draw_hqr.c index 18b1bc9b8c6fb1f84434b895bf7bc338fd2b1577..8054276f7f348304b56f2fd0b55b98d9ab237c09 100644 --- a/testings/draw_hqr.c +++ b/testings/draw_hqr.c @@ -24,7 +24,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; /* Get options */ @@ -36,9 +36,9 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - libhqr_print_tree( &qrtree, &matrix ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); + libhqr_print_svg( &qrtree, "hqr.svg" ); + libhqr_finalize( &qrtree ); return 1; } diff --git a/testings/draw_systolic.c b/testings/draw_systolic.c index 31252f86341e69c09b7b192a87294b68def835c8..2efcb796bb1c0b3ecf4158158bd737d451be3b5e 100644 --- a/testings/draw_systolic.c +++ b/testings/draw_systolic.c @@ -24,7 +24,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; /* Get options */ @@ -36,9 +36,9 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, P, Q ); - libhqr_print_tree( &qrtree, &matrix ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, P, Q ); + libhqr_print_svg( &qrtree, "systolic.svg" ); + libhqr_finalize( &qrtree ); return 1; } diff --git a/testings/testing_hqr.c b/testings/testing_hqr.c index e13f5529a5675f7e76954b932ff8bb92070c4064..15797e4b7c3a1798b81f817d83a332e687ce2db5 100644 --- a/testings/testing_hqr.c +++ b/testings/testing_hqr.c @@ -26,7 +26,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; int rc, ret = 0; @@ -90,11 +90,11 @@ main(int argc, char ** argv) if (tsrr==1 && a==1) continue; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --qr_a=%d %s FAILED(%d)\n", @@ -150,11 +150,11 @@ main(int argc, char ** argv) if (tsrr==1 && a==1) continue; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s FAILED(%d)\n", @@ -180,21 +180,24 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s FAILED(%d)\n", argv[0], MT, NT, llvl, hlvl, qr_a, qr_p, domino, (tsrr ? "-r" : " "), rc); ret |= 1; } + else { + fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s SUCCESS\n", + argv[0], MT, NT, llvl, hlvl, qr_a, qr_p, domino, (tsrr ? "-r" : " ")); + } } if ( ret == 0 ) { - printf("\n"); return EXIT_SUCCESS; } else { diff --git a/testings/testing_systolic.c b/testings/testing_systolic.c index e39173f56ea2c4b40b9946b97e971993d9bcbdba..0291f84360f8e372861af75aba63e6ce852d03d1 100644 --- a/testings/testing_systolic.c +++ b/testings/testing_systolic.c @@ -26,7 +26,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; int rc, ret = 0; @@ -74,9 +74,9 @@ main(int argc, char ** argv) NT = all_nt[n]; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, P, Q ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, P, Q ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d FAILED(%d)\n", @@ -98,21 +98,24 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, - qr_p, qr_a ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, + qr_p, qr_a ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d FAILED(%d)\n", argv[0], MT, NT, qr_p, qr_a, rc); ret |= 1; } + else { + fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d SUCCESS\n", + argv[0], MT, NT, qr_p, qr_a); + } } if ( ret == 0 ) { - printf("\n"); return EXIT_SUCCESS; } else { diff --git a/testings/testing_tileinit.c b/testings/testing_tileinit.c index f75f8900c996ce9148aabc5fbd3a7924f52fa8cd..2e070add4017ebc385bb362d9de413b0e1f0b4ec 100644 --- a/testings/testing_tileinit.c +++ b/testings/testing_tileinit.c @@ -23,15 +23,15 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; memset( &qrtree, 0xdeadbeef, sizeof(libhqr_tree_t) ); matrix.nodes = 1; matrix.p = 3; matrix.mt = 1; matrix.nt = 1; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, 0, 0, 1, 3, 0, 0); - if(libhqr_tree_check( &matrix, &qrtree )) return 0; - libhqr_matrix_finalize( &qrtree ); + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, 0, 0, 1, 3, 0, 0); + if(libhqr_check( &matrix, &qrtree )) return 0; + libhqr_finalize( &qrtree ); return 1; }