From 224bee6ed61a1cbd426eff5344978371903ba35b Mon Sep 17 00:00:00 2001 From: Raphael Boucherie <rboucher@zimbra.inria.fr> Date: Wed, 15 Mar 2017 13:08:22 +0100 Subject: [PATCH] Made libhqr independent, all test work --- include/{dplasma_qr_param.h => libhqr.h} | 130 ++++--- src/{dplasma_hqr.c => libhqr.c} | 343 +++++++++--------- src/{dplasma_hqr_dbg.c => libhqr_dbg.c} | 94 +++-- ...plasma_systolic_qr.c => libhqr_systolic.c} | 92 ++--- testings/testing_pivgen.c | 333 +++++++---------- 5 files changed, 477 insertions(+), 515 deletions(-) rename include/{dplasma_qr_param.h => libhqr.h} (54%) rename src/{dplasma_hqr.c => libhqr.c} (89%) rename src/{dplasma_hqr_dbg.c => libhqr_dbg.c} (86%) rename src/{dplasma_systolic_qr.c => libhqr_systolic.c} (79%) diff --git a/include/dplasma_qr_param.h b/include/libhqr.h similarity index 54% rename from include/dplasma_qr_param.h rename to include/libhqr.h index b487899..49b3f42 100644 --- a/include/dplasma_qr_param.h +++ b/include/libhqr.h @@ -6,34 +6,70 @@ * @precisions normal z -> z c d s * */ -#ifndef _DPLASMA_QR_PARAM_H_ -#define _DPLASMA_QR_PARAM_H_ +#ifndef _LIBHQR_H_ +#define _LIBHQR_H_ + +#undef BEGIN_C_DECLS +#undef END_C_DECLS +#if defined(c_plusplus) || defined(__cplusplus) +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else +#define BEGIN_C_DECLS /* empty */ +#define END_C_DECLS /* empty */ +#endif BEGIN_C_DECLS +static inline int libhqr_imin(int a, int b){ + return (a > b) ? b : a; +} + +static inline int libhqr_imax(int a, int b){ + return (a > b) ? a : b; +} + +static inline int libhqr_iceil(int a, int b){ + return (a + b - 1) / b; +} + /* - * DPLASMA_QR_KILLED_BY_TS needs to be set to 0 for all variant of QR + * LIBHQR_KILLED_BY_TS needs to be set to 0 for all variant of QR * factorization to distinguish TT kernels from TS kernels in jdf */ -typedef enum dplasma_qr_type_ { - DPLASMA_QR_KILLED_BY_TS = 0, - DPLASMA_QR_KILLED_BY_LOCALTREE = 1, - DPLASMA_QR_KILLED_BY_DOMINO = 2, - DPLASMA_QR_KILLED_BY_DISTTREE = 3, -} dplasma_qr_type_e; - -typedef enum dplasma_qr_tree_ { - DPLASMA_FLAT_TREE = 0, - DPLASMA_GREEDY_TREE = 1, - DPLASMA_FIBONACCI_TREE = 2, - DPLASMA_BINARY_TREE = 3, - DPLASMA_GREEDY1P_TREE = 4, -} dplasma_qr_tree_e; - -struct dplasma_qrtree_s; -typedef struct dplasma_qrtree_s dplasma_qrtree_t; - -struct dplasma_qrtree_s { +typedef enum libhqr_type_ { + LIBHQR_KILLED_BY_TS = 0, + LIBHQR_KILLED_BY_LOCALTREE = 1, + LIBHQR_KILLED_BY_DOMINO = 2, + LIBHQR_KILLED_BY_DISTTREE = 3, +} libhqr_type_e; + +typedef enum libhqr_tree_ { + LIBHQR_FLAT_TREE = 0, + LIBHQR_GREEDY_TREE = 1, + LIBHQR_FIBONACCI_TREE = 2, + LIBHQR_BINARY_TREE = 3, + LIBHQR_GREEDY1P_TREE = 4, +} libqr_tree_e; + +typedef enum libhqr_typefacto_ { + LIBHQR_QR = 0, + LIBHQR_LQ = 1, +} libhqr_typefacto_e; + +typedef struct libhqr_tiledesc_s{ + int mt; + int nt; + int nodes; + int p; +} libhqr_tiledesc_t; + +struct libhqr_tree_s; +typedef struct libhqr_tree_s libhqr_tree_t; + +typedef struct libhqr_context_s libhqr_context_t; + +struct libhqr_tree_s { /** * getnbgeqrf * @param[in] arg arguments specific to the reduction tree used @@ -41,7 +77,7 @@ struct dplasma_qrtree_s { * * @return The number of geqrt applied to the panel k */ - int (*getnbgeqrf)( const dplasma_qrtree_t *arg, int k ); + int (*getnbgeqrf)( const libhqr_tree_t *arg, int k ); /** * getm: @@ -51,7 +87,7 @@ struct dplasma_qrtree_s { * * @return The row index of the i-th geqrt applied on the panel k */ - int (*getm)( const dplasma_qrtree_t *arg, int k, int i ); + int (*getm)( const libhqr_tree_t *arg, int k, int i ); /** * geti: @@ -61,7 +97,7 @@ struct dplasma_qrtree_s { * * @returns the index in the list of geqrt applied to panel k */ - int (*geti)( const dplasma_qrtree_t *qrtree, int k, int m ); + int (*geti)( const libhqr_tree_t *qrtree, int k, int m ); /** * gettype: * @param[in] arg arguments specific to the reduction tree used @@ -72,7 +108,7 @@ struct dplasma_qrtree_s { * - 0 if it is a TS kernel * - >0 otherwise. (TT kernel) */ - int (*gettype)( const dplasma_qrtree_t *qrtree, int k, int m ); + int (*gettype)( const libhqr_tree_t *qrtree, int k, int m ); /** * currpiv * @param[in] arg arguments specific to the reduction tree used @@ -81,7 +117,7 @@ struct dplasma_qrtree_s { * * @return The index of the row annihilating the row m at step k */ - int (*currpiv)( const dplasma_qrtree_t *qrtree, int k, int m ); + int (*currpiv)( const libhqr_tree_t *qrtree, int k, int m ); /** * nextpiv * @param[in] arg arguments specific to the reduction tree used @@ -93,7 +129,7 @@ struct dplasma_qrtree_s { * @return the next line that the row p will kill during step k * desc->mt if p will never be used again as an annihilator. */ - int (*nextpiv)(const dplasma_qrtree_t *qrtree, int k, int p, int m); + int (*nextpiv)(const libhqr_tree_t *qrtree, int k, int p, int m); /** * prevpiv * @param[in] arg arguments specific to the reduction tree used @@ -105,7 +141,7 @@ struct dplasma_qrtree_s { * @return the previous line killed by the row p during step k * desc->mt if p has never been used before as an annihilator. */ - int (*prevpiv)(const dplasma_qrtree_t *qrtree, int k, int p, int m); + int (*prevpiv)(const libhqr_tree_t *qrtree, int k, int p, int m); /** Descriptor infos associated to the factorization */ int mt; @@ -117,34 +153,34 @@ struct dplasma_qrtree_s { void *args; }; -int dplasma_systolic_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +int libhqr_systolic_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int p, int q ); -void dplasma_systolic_finalize( dplasma_qrtree_t *qrtree ); +void libhqr_systolic_finalize( libhqr_tree_t *qrtree ); -int dplasma_svd_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +int libhqr_svd_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int type_hlvl, int p, int nbcores_per_node, int ratio ); -int dplasma_hqr_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +int libhqr_hqr_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int type_llvl, int type_hlvl, int a, int p, int domino, int tsrr ); -void dplasma_hqr_finalize( dplasma_qrtree_t *qrtree ); +void libhqr_hqr_finalize( libhqr_tree_t *qrtree ); /* * Debugging functions */ -int dplasma_qrtree_check ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ); -void dplasma_qrtree_print_dag ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, char *filename ); -void dplasma_qrtree_print_type ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ); -void dplasma_qrtree_print_pivot ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ); -void dplasma_qrtree_print_nbgeqrt( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ); -void dplasma_qrtree_print_perm ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int *perm ); -void dplasma_qrtree_print_next_k ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ); -void dplasma_qrtree_print_prev_k ( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ); -void dplasma_qrtree_print_geqrt_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ); +int libhqr_tree_check ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); +void libhqr_tree_print_dag ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ); +void libhqr_tree_print_type ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); +void libhqr_tree_print_pivot ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); +void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); +void libhqr_tree_print_perm ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ); +void libhqr_tree_print_next_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); +void libhqr_tree_print_prev_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); +void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); END_C_DECLS -#endif /* _DPLASMA_QR_PARAM_H_ */ +#endif /* _LIBHQR_H_ */ diff --git a/src/dplasma_hqr.c b/src/libhqr.c similarity index 89% rename from src/dplasma_hqr.c rename to src/libhqr.c index f91d4d2..413a4ca 100644 --- a/src/dplasma_hqr.c +++ b/src/libhqr.c @@ -70,14 +70,14 @@ * high level tree to reduce communications. * These lines are defined by (i-k)/p = 0. */ -#include "parsec.h" -#include "dplasma.h" -#include "dplasma_qr_param.h" - +#include "libhqr.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> #include <math.h> -#if defined(PARSEC_HAVE_STRING_H) +//#if defined(LIBHQR_HAVE_STRING_H) #include <string.h> -#endif /* defined(PARSEC_HAVE_STRING_H) */ +//#endif /* defined(PARSEC_HAVE_STRING_H) */ #define PRINT_PIVGEN 0 #ifdef PRINT_PIVGEN @@ -145,14 +145,14 @@ struct hqr_subpiv_s { /* * Common functions */ -static int hqr_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ); -static int hqr_getm( const dplasma_qrtree_t *qrtree, int k, int i ); -static int hqr_geti( const dplasma_qrtree_t *qrtree, int k, int m ); -static int hqr_gettype( const dplasma_qrtree_t *qrtree, int k, int m ); +static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); /* Permutation */ -static void hqr_genperm ( dplasma_qrtree_t *qrtree ); -static int hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ); +static void hqr_genperm ( libhqr_tree_t *qrtree ); +static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ); /* * Subtree for low-level @@ -181,7 +181,7 @@ static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN); * Return: * The number of geqrt to execute in the panel k */ -static int hqr_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ) { +static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; int p = qrtree->p; @@ -216,9 +216,9 @@ static int hqr_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ) { nb_1 = (nb_12 - nb_11) / a; /* Add leftover */ - nb_1 += dplasma_imin( p, gmt - nb_12 ); + nb_1 += libhqr_imin( p, gmt - nb_12 ); - return dplasma_imin( nb_1 + nb_2 + nb_3, gmt - k); + return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); } /* @@ -227,7 +227,7 @@ static int hqr_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ) { * Return: * The global indice m of the i th geqrt in the panel k */ -static int hqr_getm( const dplasma_qrtree_t *qrtree, int k, int i ) +static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; @@ -261,7 +261,7 @@ static int hqr_getm( const dplasma_qrtree_t *qrtree, int k, int i ) * Return: * The index i of the geqrt in the panel k */ -static int hqr_geti( const dplasma_qrtree_t *qrtree, int k, int m ) +static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; @@ -298,7 +298,7 @@ static int hqr_geti( const dplasma_qrtree_t *qrtree, int k, int m ) * 2 - if m is reduced thanks to the bubble tree * 3 - if m is reduced in distributed */ -static int hqr_gettype( const dplasma_qrtree_t *qrtree, int k, int m ) { +static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; int p = qrtree->p; @@ -602,7 +602,7 @@ static void hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN){ int j, k, height, start, end, firstk = 0; int *nT, *nZ; - arg->minMN = dplasma_imin( minMN, mt*a ); + arg->minMN = libhqr_imin( minMN, mt*a ); minMN = arg->minMN; arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); @@ -673,7 +673,7 @@ static void hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN){ nT[0] = mt; for(k=0; k<lminMN; k++) { - nT2DO[k] = dplasma_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); if ( nT2DO[k] == 0 ) { lminMN = k; break; @@ -806,7 +806,7 @@ static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){ /* This section has not been coded yet, and will perform a classic greedy */ if ( domino ) { - arg->minMN = dplasma_imin( minMN, mt*a ); + arg->minMN = libhqr_imin( minMN, mt*a ); minMN = arg->minMN; arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); @@ -818,7 +818,7 @@ static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){ */ for(k=0; k<minMN; k++) { /* Number of tiles to factorized in this column on this rank */ - nT = dplasma_imax( mt - (k / a), 0 ); + nT = libhqr_imax( mt - (k / a), 0 ); /* Number of tiles already killed */ nZ = 0; @@ -852,7 +852,7 @@ static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){ */ for(k=0; k<minMN; k++) { /* Number of tiles to factorized in this column on this rank */ - nT = dplasma_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); /* Number of tiles already killed */ nZ = 0; @@ -934,7 +934,7 @@ static int hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int star assert( arg->p > 1 ); if ( p == k && arg->ldd > 1 ) { if ( start == p && p != arg->ldd-1 ) - return dplasma_imin( p + arg->p - 1, arg->ldd - 1 ); + return libhqr_imin( p + arg->p - 1, arg->ldd - 1 ); else if ( start > p + 1 && (start-k < arg->p)) return start-1; } @@ -1000,7 +1000,7 @@ static int hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int st if ( (start == p) && ( offset%2 == 0 ) ) { int i, bit, tmp; if ( offset == 0 ) - bit = (int)( log( (double)( dplasma_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); + bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); else { bit = 0; while( (offset & (1 << bit )) == 0 ) @@ -1043,7 +1043,7 @@ inline static int hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int lp = p - k; int lstart= start - k; - int end = dplasma_imin(qrpiv->ldd-k, qrpiv->p); + int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p); for( i=lstart+1; i<end; i++ ) if ( (qrpiv->ipiv)[i] == lp ) return i+k; @@ -1055,7 +1055,7 @@ inline static int hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int i; myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) ); - for( i=dplasma_imin(start-k-1, qrpiv->p-1); i>0; i-- ) + for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- ) if ( (qrpiv->ipiv)[i] == (p-k) ) return i + k; return (qrpiv->ldd); @@ -1120,9 +1120,9 @@ static void hqr_high_greedy1p_init(hqr_subpiv_t *arg){ memset( nZ, 0, minMN*sizeof(int)); nT[0] = mt; - nZ[0] = dplasma_imax( mt - p, 0 ); + nZ[0] = libhqr_imax( mt - p, 0 ); for(k=1; k<minMN; k++) { - height = dplasma_imax(mt-k-p, 0); + height = libhqr_imax(mt-k-p, 0); nT[k] = height; nZ[k] = height; } @@ -1173,7 +1173,7 @@ static int hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int st { int i; myassert( (start >= k && start < k+arg->p) || start == arg->ldd ); - for( i=dplasma_imin(start-1, k+arg->p-1); i > k; i-- ) + for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- ) if ( (arg->ipiv)[i-k + k* (arg->p)] == p ) return i; return (arg->ldd); @@ -1212,9 +1212,9 @@ static void hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN){ memset( nZ, 0, minMN*sizeof(int)); nT[0] = mt; - nZ[0] = dplasma_imax( mt - p, 0 ); + nZ[0] = libhqr_imax( mt - p, 0 ); for(k=1; k<minMN; k++) { - height = dplasma_imax(mt-k-p, 0); + height = libhqr_imax(mt-k-p, 0); nT[k] = height; nZ[k] = height; } @@ -1257,7 +1257,7 @@ static void hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN){ * Generic functions currpiv,prevpiv,nextpiv * ***************************************************/ -static int hqr_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) +static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, tmpk, perm_m; @@ -1346,7 +1346,7 @@ static int hqr_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) * - -1 if start doesn't respect the previous conditions * - m, the following row killed by p if it exists, A->mt otherwise */ -static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; @@ -1380,12 +1380,12 @@ static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta { case -1: - if ( lp == DPLASMA_QR_KILLED_BY_TS ) { + if ( lp == LIBHQR_KILLED_BY_TS ) { myassert( start == gmt ); return gmt; } - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: /* If the tile is over the diagonal of step k, skip directly to type 2 */ if ( arg->domino && lpivot < k ) @@ -1403,7 +1403,7 @@ static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta start = gmt; lstart = arg->llvl->ldd * a; - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: /* If the tile is over the diagonal of step k, skip directly to type 2 */ if ( arg->domino && lpivot < k ) @@ -1424,9 +1424,9 @@ static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta start = gmt; lstart = arg->llvl->ldd * a; - case DPLASMA_QR_KILLED_BY_DOMINO: + case LIBHQR_KILLED_BY_DOMINO: - if ( lp < DPLASMA_QR_KILLED_BY_DOMINO ) { + if ( lp < LIBHQR_KILLED_BY_DOMINO ) { return gmt; } @@ -1442,9 +1442,9 @@ static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta start = gmt; lstart = arg->llvl->ldd * a; - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: - if ( lp < DPLASMA_QR_KILLED_BY_DISTTREE ) { + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { return gmt; } @@ -1478,7 +1478,7 @@ static int hqr_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta * - m, the previous row killed by p if it exists, A->mt otherwise */ static int -hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; @@ -1505,13 +1505,13 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) ls = hqr_gettype( qrtree, k, ostart ); lp = hqr_gettype( qrtree, k, opivot ); - if ( lp == DPLASMA_QR_KILLED_BY_TS ) + if ( lp == LIBHQR_KILLED_BY_TS ) return gmt; myassert( lp >= ls ); switch( ls ) { - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: if( arg->hlvl != NULL ) { tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); if ( tmp != gmt ) @@ -1521,7 +1521,7 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) start = pivot; lstart = pivot / p; - case DPLASMA_QR_KILLED_BY_DOMINO: + case LIBHQR_KILLED_BY_DOMINO: /* If the tile is over the diagonal of step k, process it as type 2 */ if ( arg->domino && lpivot < k ) { @@ -1529,7 +1529,7 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) (start+p < gmt ) ) return perm[start+p]; - if ( lp > DPLASMA_QR_KILLED_BY_LOCALTREE ) + if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) return gmt; } @@ -1538,7 +1538,7 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) /* If it is the 'local' diagonal block, we go to 1 */ - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: /* If the tile is over the diagonal of step k and is of type 2, it cannot annihilate type 0 or 1 */ if ( arg->domino && lpivot < k ) @@ -1555,7 +1555,7 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) start = pivot; - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: /* Search for predecessor in TS tree */ /* if ( ( start+p < gmt ) && */ /* ( (((start+p) / p) % a) != 0 ) ) */ @@ -1585,7 +1585,7 @@ hqr_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * ***************************************************/ static void -hqr_genperm( dplasma_qrtree_t *qrtree ) +hqr_genperm( libhqr_tree_t *qrtree ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int m = qrtree->mt; @@ -1593,7 +1593,7 @@ hqr_genperm( dplasma_qrtree_t *qrtree ) int a = qrtree->a; int p = qrtree->p; int domino = arg->domino; - int minMN = dplasma_imin( m, n ); + int minMN = libhqr_imin( m, n ); int pa = p * a; int i, j, k; int nbextra1; @@ -1618,7 +1618,7 @@ hqr_genperm( dplasma_qrtree_t *qrtree ) end2 = p + ( domino ? k*p : k + nbextra1 ); end2 = (( end2 + pa - 1 ) / pa ) * pa; - end2 = dplasma_imin( end2, m ); + end2 = libhqr_imin( end2, m ); /* * All tiles of type 3, 2 and: @@ -1656,7 +1656,7 @@ hqr_genperm( dplasma_qrtree_t *qrtree ) } static int -hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) +hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int gmt = qrtree->mt + 1; @@ -1664,7 +1664,7 @@ hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) int p = qrtree->p; int pa = p * a; int start = m / pa * pa; - int stop = dplasma_imin( start + pa, gmt ) - start; + int stop = libhqr_imin( start + pa, gmt ) - start; int *perm = arg->perm + gmt * k + start; int i; @@ -1685,18 +1685,18 @@ hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) * * @ingroup dplasma * - * dplasma_hqr_init - Creates the tree structure that will describes the + * libhqr_hqr_init - Creates the tree structure that will describes the * operation performed during QR/LQ factorization with parameterized QR/LQ * algorithms family. * * Trees available parameters are described below. It is recommended to: * - set p to the same value than the P-by-Q process grid used to distribute * the data. (P for QR factorization, Q for LQ factorization). - * - set the low level tree to DPLASMA_GREEDY_TREE. + * - set the low level tree to LIBHQR_GREEDY_TREE. * - set the high level tree to: - * 1) DPLASMA_FLAT_TREE when the problem is square, because it divides + * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides * by two the volume of communication of any other tree. - * 2) DPLASMA_FIBONACCI_TREE when the problem is tall and skinny (QR) or + * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or * small and fat (LQ), because it reduces the critical path length. * - Disable the domino effect when problem is square, to keep high efficiency * kernel proportion high. @@ -1729,27 +1729,27 @@ hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) * Defines the tree used to reduce the main tiles of each local domain * together. The matrix of those tiles has a lower triangular structure * with a diagonal by step a. - * @arg DPLASMA_FLAT_TREE: A Flat tree is used to reduce the local + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local * tiles. - * @arg DPLASMA_GREEDY_TREE: A Greedy tree is used to reduce the local + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local * tiles. - * @arg DPLASMA_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the * local tiles. - * @arg DPLASMA_BINARY_TREE: A Binary tree is used to reduce the local + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local * tiles. - * @arg -1: The default is used (DPLASMA_GREEDY_TREE) + * @arg -1: The default is used (LIBHQR_GREEDY_TREE) * * @param[in] type_hlvl * Defines the tree used to reduce the main tiles of each domain. This * is a band lower diagonal matrix of width p. - * @arg DPLASMA_FLAT_TREE: A Flat tree is used to reduce the tiles. - * @arg DPLASMA_GREEDY_TREE: A Greedy tree is used to reduce the tiles. - * @arg DPLASMA_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the * tiles. - * @arg DPLASMA_BINARY_TREE: A Binary tree is used to reduce the tiles. - * @arg DPLASMA_GREEDY1P_TREE: A Greedy tree is computed for the first + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first * column and then duplicated on all others. - * @arg -1: The default is used (DPLASMA_FIBONACCI_TREE) + * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) * * @param[in] a * Defines the size of the local domains on which a classic flat TS @@ -1796,8 +1796,8 @@ hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) * ******************************************************************************* * - * @sa dplasma_hqr_finalize - * @sa dplasma_systolic_init + * @sa libhqr_hqr_finalize + * @sa libhqr_systolic_init * @sa dplasma_zgeqrf_param * @sa dplasma_cgeqrf_param * @sa dplasma_dgeqrf_param @@ -1805,8 +1805,8 @@ hqr_getinvperm( const dplasma_qrtree_t *qrtree, int k, int m ) * ******************************************************************************/ int -dplasma_hqr_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +libhqr_hqr_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int type_llvl, int type_hlvl, int a, int p, int domino, int tsrr ) @@ -1816,30 +1816,29 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, hqr_args_t *arg; if (qrtree == NULL) { - dplasma_error("dplasma_hqr_init", "illegal value of qrtree"); - return -1; + fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree"); + return -1; } - if ((trans != PlasmaNoTrans) && - (trans != PlasmaTrans) && - (trans != PlasmaConjTrans)) { - dplasma_error("dplasma_hqr_init", "illegal value of trans"); - return -2; + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_hqr_ini, illegal value of trans"); + return -2; } if (A == NULL) { - dplasma_error("dplasma_hqr_init", "illegal value of A"); - return -3; + fprintf(stderr, "libhqr_hqr_init, illegal value of A"); + return -3; } /* Compute parameters */ - a = (a == -1) ? 4 : dplasma_imax( a, 1 ); - p = dplasma_imax( p, 1 ); + a = (a == -1) ? 4 : libhqr_imax( a, 1 ); + p = libhqr_imax( p, 1 ); /* Domino */ if ( domino >= 0 ) { domino = domino ? 1 : 0; } else { - if (trans == PlasmaNoTrans) { + if (trans == LIBHQR_QR) { ratio = ((double)(A->nt) / (double)(A->mt)); } else { ratio = ((double)(A->mt) / (double)(A->nt)); @@ -1859,10 +1858,10 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, qrtree->nextpiv = hqr_nextpiv; qrtree->prevpiv = hqr_prevpiv; - qrtree->mt = (trans == PlasmaNoTrans) ? A->mt : A->nt; - qrtree->nt = (trans == PlasmaNoTrans) ? A->nt : A->mt; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - a = dplasma_imin( a, qrtree->mt ); + a = libhqr_imin( a, qrtree->mt ); qrtree->a = a; qrtree->p = p; @@ -1876,7 +1875,7 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); arg->hlvl = NULL; - minMN = dplasma_imin(A->mt, A->nt); + minMN = libhqr_imin(A->mt, A->nt); low_mt = (qrtree->mt + p * a - 1) / ( p * a ); arg->llvl->minMN = minMN; @@ -1886,19 +1885,19 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, arg->llvl->domino = domino; switch( type_llvl ) { - case DPLASMA_FLAT_TREE : + case LIBHQR_FLAT_TREE : hqr_low_flat_init(arg->llvl); break; - case DPLASMA_FIBONACCI_TREE : + case LIBHQR_FIBONACCI_TREE : hqr_low_fibonacci_init(arg->llvl, minMN); break; - case DPLASMA_BINARY_TREE : + case LIBHQR_BINARY_TREE : hqr_low_binary_init(arg->llvl); break; - case DPLASMA_GREEDY1P_TREE : + case LIBHQR_GREEDY1P_TREE : hqr_low_greedy1p_init(arg->llvl, minMN); break; - case DPLASMA_GREEDY_TREE : + case LIBHQR_GREEDY_TREE : default: hqr_low_greedy_init(arg->llvl, minMN); } @@ -1913,19 +1912,19 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, arg->hlvl->domino = domino; switch( type_hlvl ) { - case DPLASMA_FLAT_TREE : + case LIBHQR_FLAT_TREE : hqr_high_flat_init(arg->hlvl); break; - case DPLASMA_GREEDY_TREE : + case LIBHQR_GREEDY_TREE : hqr_high_greedy_init(arg->hlvl, minMN); break; - case DPLASMA_GREEDY1P_TREE : + case LIBHQR_GREEDY1P_TREE : hqr_high_greedy1p_init(arg->hlvl); break; - case DPLASMA_BINARY_TREE : + case LIBHQR_BINARY_TREE : hqr_high_binary_init(arg->hlvl); break; - case DPLASMA_FIBONACCI_TREE : + case LIBHQR_FIBONACCI_TREE : hqr_high_fibonacci_init(arg->hlvl); break; default: @@ -1948,8 +1947,8 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, * * @ingroup dplasma * - * dplasma_hqr_finalize - Cleans the qrtree data structure allocated by call to - * dplasma_hqr_init(). + * libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to + * libhqr_hqr_init(). * ******************************************************************************* * @@ -1959,11 +1958,11 @@ dplasma_hqr_init( dplasma_qrtree_t *qrtree, * ******************************************************************************* * - * @sa dplasma_hqr_init + * @sa libhqr_hqr_init * ******************************************************************************/ void -dplasma_hqr_finalize( dplasma_qrtree_t *qrtree ) +libhqr_hqr_finalize( libhqr_tree_t *qrtree ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); @@ -1991,10 +1990,10 @@ dplasma_hqr_finalize( dplasma_qrtree_t *qrtree ) /* * Common functions */ -static int svd_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ); -static int svd_getm( const dplasma_qrtree_t *qrtree, int k, int i ); -static int svd_geti( const dplasma_qrtree_t *qrtree, int k, int m ); -static int svd_gettype( const dplasma_qrtree_t *qrtree, int k, int m ); +static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int svd_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int svd_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m ); #define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) ) #define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] ) @@ -2006,7 +2005,7 @@ static int svd_gettype( const dplasma_qrtree_t *qrtree, int k, int m ); * The number of geqrt to execute in the panel k */ static int -svd_getnbgeqrf( const dplasma_qrtree_t *qrtree, +svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); @@ -2033,9 +2032,9 @@ svd_getnbgeqrf( const dplasma_qrtree_t *qrtree, nb_1 = (nb_12 - nb_11) / a; /* Add leftover */ - nb_1 += dplasma_imin( p, gmt - nb_12 ); + nb_1 += libhqr_imin( p, gmt - nb_12 ); - return dplasma_imin( nb_1 + nb_2 + nb_3, gmt - k); + return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); } /* @@ -2045,7 +2044,7 @@ svd_getnbgeqrf( const dplasma_qrtree_t *qrtree, * The global indice m of the i th geqrt in the panel k */ static int -svd_getm( const dplasma_qrtree_t *qrtree, +svd_getm( const libhqr_tree_t *qrtree, int k, int i ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); @@ -2075,7 +2074,7 @@ svd_getm( const dplasma_qrtree_t *qrtree, * The index i of the geqrt in the panel k */ static int -svd_geti( const dplasma_qrtree_t *qrtree, +svd_geti( const libhqr_tree_t *qrtree, int k, int m ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); @@ -2109,7 +2108,7 @@ svd_geti( const dplasma_qrtree_t *qrtree, * 3 - if m is reduced in distributed */ static int -svd_gettype( const dplasma_qrtree_t *qrtree, +svd_gettype( const libhqr_tree_t *qrtree, int k, int m ) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); @@ -2190,7 +2189,7 @@ svd_low_adaptiv_init(hqr_subpiv_t *arg, int *ipiv; int mt, a, p, pa, maxmt, myrank; int j, k, height, start, end, nT, nZ; - int minMN = dplasma_imin(gmt, gnt); + int minMN = libhqr_imin(gmt, gnt); arg->currpiv = svd_low_adaptiv_currpiv; arg->nextpiv = svd_low_adaptiv_nextpiv; @@ -2212,16 +2211,16 @@ svd_low_adaptiv_init(hqr_subpiv_t *arg, * so, * a <= mt * (gnt-k) / (ratio * nbcores ) */ - height = dplasma_iceil( gmt-k, p ); - a = dplasma_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); /* Now let's make sure all sub-parts are equilibrate */ - j = dplasma_iceil( height, a ); - a = dplasma_iceil( gmt-k, j ); + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); /* Compute max dimension of the tree */ - mt = dplasma_iceil( gmt, p * a ); - maxmt = dplasma_imax( mt, maxmt ); + mt = libhqr_iceil( gmt, p * a ); + maxmt = libhqr_imax( mt, maxmt ); } arg->ldd = maxmt + 2; @@ -2243,15 +2242,15 @@ svd_low_adaptiv_init(hqr_subpiv_t *arg, * so, * a <= mt * (gnt-k) / (ratio * nbcores ) */ - height = dplasma_iceil( gmt-k, p ); - a = dplasma_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); /* Now let's make sure all sub-parts are equilibrate */ - j = dplasma_iceil( height, a ); - a = dplasma_iceil( gmt-k, j ); + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); pa = p * a; - mt = dplasma_iceil( gmt, pa ); + mt = libhqr_iceil( gmt, pa ); ipiv[0] = a; ipiv[1] = mt; @@ -2259,7 +2258,7 @@ svd_low_adaptiv_init(hqr_subpiv_t *arg, assert( mt < arg->ldd-1 ); /* Number of tiles to factorized in this column on this rank */ - nT = dplasma_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); /* Number of tiles already killed */ nZ = 0; @@ -2321,7 +2320,7 @@ svd_low_adaptiv_init(hqr_subpiv_t *arg, * Generic functions currpiv,prevpiv,nextpiv * ***************************************************/ -static int svd_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) +static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, tmpk; @@ -2379,7 +2378,7 @@ static int svd_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) * - -1 if start doesn't respect the previous conditions * - m, the following row killed by p if it exists, A->mt otherwise */ -static int svd_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; @@ -2404,16 +2403,16 @@ static int svd_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta switch( ls ) { - case DPLASMA_QR_KILLED_BY_DOMINO: + case LIBHQR_KILLED_BY_DOMINO: assert(0); case -1: - if ( lp == DPLASMA_QR_KILLED_BY_TS ) { + if ( lp == LIBHQR_KILLED_BY_TS ) { myassert( start == gmt ); return gmt; } - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: if ( start == gmt ) nextp = pivot + p; else @@ -2426,7 +2425,7 @@ static int svd_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta start = gmt; lstart = ldd * a; - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: /* Get the next pivot for the low level tree */ tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); @@ -2441,9 +2440,9 @@ static int svd_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta start = gmt; lstart = ldd * a; - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: - if ( lp < DPLASMA_QR_KILLED_BY_DISTTREE ) { + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { return gmt; } @@ -2477,7 +2476,7 @@ static int svd_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int sta * - m, the previous row killed by p if it exists, A->mt otherwise */ static int -svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; @@ -2499,15 +2498,15 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) ls = svd_gettype( qrtree, k, start ); lp = svd_gettype( qrtree, k, pivot ); - if ( lp == DPLASMA_QR_KILLED_BY_TS ) + if ( lp == LIBHQR_KILLED_BY_TS ) return gmt; myassert( lp >= ls ); switch( ls ) { - case DPLASMA_QR_KILLED_BY_DOMINO: + case LIBHQR_KILLED_BY_DOMINO: assert(0); - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: if( arg->hlvl != NULL ) { tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); if ( tmp != gmt ) @@ -2517,7 +2516,7 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) start = pivot; lstart = pivot / p; - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); if ( (tmp * a * p + rpivot >= gmt) @@ -2529,7 +2528,7 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) start = pivot; - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: /* Search for predecessor in TS tree */ /* if ( ( start+p < gmt ) && */ /* ( (((start+p) / p) % a) != 0 ) ) */ @@ -2558,17 +2557,17 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * * @ingroup dplasma * - * dplasma_svd_init - Create the tree structures that will describes the + * libhqr_svd_init - Create the tree structures that will describes the * operation performed during QR/LQ reduction step of the gebrd_ge2gb operation. * * Trees available parameters are described below. It is recommended to: * - set p to the same value than the P-by-Q process grid used to distribute * the data. (P for QR factorization, Q for LQ factorization). - * - set the low level tree to DPLASMA_GREEDY_TREE. + * - set the low level tree to LIBHQR_GREEDY_TREE. * - set the high level tree to: - * 1) DPLASMA_FLAT_TREE when the problem is square, because it divides + * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides * by two the volume of communication of any other tree. - * 2) DPLASMA_FIBONACCI_TREE when the problem is tall and skinny (QR) or + * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or * small and fat (LQ), because it reduces the critical path length. * - Disable the domino effect when problem is square, to keep high efficiency * kernel proportion high. @@ -2588,9 +2587,8 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * On exit, the structure initialized according to the given parameters. * * @param[in] trans - * @arg PlasmaNoTrans: Structure is initialized for the QR steps. - * @arg PlasmaTrans: Structure is initialized for the LQ steps. - * @arg PlasmaConjTrans: Structure is initialized for the LQ steps. + * @arg LIBHQR_QR: Structure is initialized for the QR steps. + * @arg LIBHQR_LQ: Structure is initialized for the LQ steps. * * @param[in,out] A * Descriptor of the distributed matrix A to be factorized, on which @@ -2602,14 +2600,14 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * @param[in] type_hlvl * Defines the tree used to reduce the main tiles of each domain. This * is a band lower diagonal matrix of width p. - * @arg DPLASMA_FLAT_TREE: A Flat tree is used to reduce the tiles. - * @arg DPLASMA_GREEDY_TREE: A Greedy tree is used to reduce the tiles. - * @arg DPLASMA_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the * tiles. - * @arg DPLASMA_BINARY_TREE: A Binary tree is used to reduce the tiles. - * @arg DPLASMA_GREEDY1P_TREE: A Greedy tree is computed for the first + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first * column and then duplicated on all others. - * @arg -1: The default is used (DPLASMA_FIBONACCI_TREE) + * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) * * @param[in] p * Defines the number of distributed domains, ie the width of the high @@ -2617,7 +2615,7 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * used. If p == mt, this enforce the high level reduction tree to be * performed on the full matrix. * By default, it is recommended to set p to P if trans == - * PlasmaNoTrans, and to Q otherwise, where P-by-Q is the process grid + * LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid * used to distributed the data. (p > 0) * * @param[in] nbthread_per_node @@ -2638,8 +2636,8 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * ******************************************************************************* * - * @sa dplasma_hqr_finalize - * @sa dplasma_hqr_init + * @sa libhqr_hqr_finalize + * @sa libhqr_hqr_init * @sa dplasma_zgeqrf_param * @sa dplasma_cgeqrf_param * @sa dplasma_dgeqrf_param @@ -2647,30 +2645,29 @@ svd_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) * ******************************************************************************/ int -dplasma_svd_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +libhqr_svd_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int type_hlvl, int p, int nbthread_per_node, int ratio ) { int low_mt, minMN, a = -1; hqr_args_t *arg; if (qrtree == NULL) { - dplasma_error("dplasma_svd_init", "illegal value of qrtree"); + fprintf(stderr,"libhqr_svd_init, illegal value of qrtree"); return -1; } - if ((trans != PlasmaNoTrans) && - (trans != PlasmaTrans) && - (trans != PlasmaConjTrans)) { - dplasma_error("dplasma_svd_init", "illegal value of trans"); + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_svd_init, illegal value of trans"); return -2; } if (A == NULL) { - dplasma_error("dplasma_svd_init", "illegal value of A"); + fprintf(stderr, "libhqr_svd_init, illegal value of A"); return -3; } /* Compute parameters */ - p = dplasma_imax( p, 1 ); + p = libhqr_imax( p, 1 ); qrtree->getnbgeqrf = svd_getnbgeqrf; qrtree->getm = svd_getm; @@ -2680,8 +2677,8 @@ dplasma_svd_init( dplasma_qrtree_t *qrtree, qrtree->nextpiv = svd_nextpiv; qrtree->prevpiv = svd_prevpiv; - qrtree->mt = (trans == PlasmaNoTrans) ? A->mt : A->nt; - qrtree->nt = (trans == PlasmaNoTrans) ? A->nt : A->mt; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; qrtree->a = a; qrtree->p = p; @@ -2695,7 +2692,7 @@ dplasma_svd_init( dplasma_qrtree_t *qrtree, arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); arg->hlvl = NULL; - minMN = dplasma_imin(A->mt, A->nt); + minMN = libhqr_imin(A->mt, A->nt); low_mt = (qrtree->mt + p - 1) / ( p ); arg->llvl->minMN = minMN; @@ -2705,7 +2702,7 @@ dplasma_svd_init( dplasma_qrtree_t *qrtree, arg->llvl->domino = 0; svd_low_adaptiv_init(arg->llvl, qrtree->mt, qrtree->nt, - nbthread_per_node * (A->super.nodes / p), ratio ); + nbthread_per_node * (A->nodes / p), ratio ); if ( p > 1 ) { arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); @@ -2717,19 +2714,19 @@ dplasma_svd_init( dplasma_qrtree_t *qrtree, arg->hlvl->domino = 0; switch( type_hlvl ) { - case DPLASMA_FLAT_TREE : + case LIBHQR_FLAT_TREE : hqr_high_flat_init(arg->hlvl); break; - case DPLASMA_GREEDY_TREE : + case LIBHQR_GREEDY_TREE : hqr_high_greedy_init(arg->hlvl, minMN); break; - case DPLASMA_GREEDY1P_TREE : + case LIBHQR_GREEDY1P_TREE : hqr_high_greedy1p_init(arg->hlvl); break; - case DPLASMA_BINARY_TREE : + case LIBHQR_BINARY_TREE : hqr_high_binary_init(arg->hlvl); break; - case DPLASMA_FIBONACCI_TREE : + case LIBHQR_FIBONACCI_TREE : hqr_high_fibonacci_init(arg->hlvl); break; default: diff --git a/src/dplasma_hqr_dbg.c b/src/libhqr_dbg.c similarity index 86% rename from src/dplasma_hqr_dbg.c rename to src/libhqr_dbg.c index 33b412f..023f1c0 100644 --- a/src/dplasma_hqr_dbg.c +++ b/src/libhqr_dbg.c @@ -70,15 +70,13 @@ * high level tree to reduce communications. * These lines are defined by (i-k)/p = 0. */ -#include "parsec.h" -#include "dplasma.h" -#include "dplasmatypes.h" -#include "dplasmaaux.h" - +#include "libhqr.h" +#include <stdio.h> +#include <stdlib.h> #include <math.h> -#if defined(PARSEC_HAVE_STRING_H) +//#if defined(LIBHQR_HAVE_STRING_H) #include <string.h> -#endif /* defined(PARSEC_HAVE_STRING_H) */ +//#endif /* defined(LIBHQR_HAVE_STRING_H) */ /* static int dplasma_qrtree_getinon0( const qr_piv_t *arg, */ /* const int k, int i, int mt ); */ @@ -87,9 +85,9 @@ if ( !test ) \ return ret; -int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) +int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) { - int minMN = dplasma_imin(A->mt, A->nt ); + int minMN = libhqr_imin(A->mt, A->nt ); int i, m, k, nb; int check; @@ -100,8 +98,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) * Check Formula for NB geqrt */ { - /* dplasma_qrtree_print_type( A, qrtree ); */ - /* dplasma_qrtree_print_nbgeqrt( A, qrtree ); */ + /* libhqr_tree_print_type( A, qrtree ); */ + /* libhqr_tree_print_nbgeqrt( A, qrtree ); */ check = 1; for (k=0; k<minMN; k++) { nb = 0; @@ -130,7 +128,7 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) int prevm = -1; check = 1; for (k=0; k<minMN; k++) { - /* dplasma_qrtree_print_geqrt_k( A, qrtree, k ); */ + /* libhqr_tree_print_geqrt_k( A, qrtree, k ); */ nb = qrtree->getnbgeqrf( qrtree, k ); prevm = -1; for (i=0; i < nb; i++) { @@ -138,7 +136,7 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) m = qrtree->getm( qrtree, k, i ); /* - * getm ahas to be the inverse of geti + * getm has to be the inverse of geti */ if ( i != qrtree->geti( qrtree, k, m) ) { check = 0; @@ -191,8 +189,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) nb++; } if ( nb > 1 ) { - dplasma_qrtree_print_next_k( A, qrtree, k); - dplasma_qrtree_print_prev_k( A, qrtree, k); + libhqr_tree_print_next_k( A, qrtree, k); + libhqr_tree_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -203,8 +201,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) return 3; } else if ( nb == 0 ) { - dplasma_qrtree_print_next_k( A, qrtree, k); - dplasma_qrtree_print_prev_k( A, qrtree, k); + libhqr_tree_print_next_k( A, qrtree, k); + libhqr_tree_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -234,8 +232,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) nb++; } if ( nb > 1 ) { - dplasma_qrtree_print_next_k( A, qrtree, k); - dplasma_qrtree_print_prev_k( A, qrtree, k); + libhqr_tree_print_next_k( A, qrtree, k); + libhqr_tree_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -246,8 +244,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) return 3; } else if ( nb == 0 ) { - dplasma_qrtree_print_next_k( A, qrtree, k); - dplasma_qrtree_print_prev_k( A, qrtree, k); + libhqr_tree_print_next_k( A, qrtree, k); + libhqr_tree_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -281,8 +279,8 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) prev = qrtree->prevpiv(qrtree, k, m, next); if ( start != prev ) { - dplasma_qrtree_print_next_k( A, qrtree, k); - dplasma_qrtree_print_prev_k( A, qrtree, k); + libhqr_tree_print_next_k( A, qrtree, k); + libhqr_tree_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -303,9 +301,9 @@ int dplasma_qrtree_check( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree) return 0; } -void dplasma_qrtree_print_type( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ) +void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) { - int minMN = dplasma_imin(A->mt, A->nt ); + int minMN = libhqr_imin(A->mt, A->nt ); int m, k; int lm = 0; int lmg = 0; @@ -314,19 +312,19 @@ void dplasma_qrtree_print_type( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree printf("\n------------ Localization = Type of pivot --------------\n"); for(m=0; m<A->mt; m++) { printf("%3d | ", m); - for (k=0; k<dplasma_imin(minMN, m+1); k++) { + for (k=0; k<libhqr_imin(minMN, m+1); k++) { printf( "%3d ", qrtree->gettype( qrtree, k, m ) ); } - for (k=dplasma_imin(minMN, m+1); k<minMN; k++) { + for (k=libhqr_imin(minMN, m+1); k<minMN; k++) { printf( " " ); } printf(" "); printf("%2d,%3d | ", rank, lmg); - for (k=0; k<dplasma_imin(minMN, lmg+1); k++) { + for (k=0; k<libhqr_imin(minMN, lmg+1); k++) { printf( "%3d ", qrtree->gettype( qrtree, k, lmg) ); } - for (k=dplasma_imin(minMN, lmg+1); k<minMN; k++) { + for (k=libhqr_imin(minMN, lmg+1); k<minMN; k++) { printf( " " ); } lm++; lmg+=qrtree->p; @@ -339,9 +337,9 @@ void dplasma_qrtree_print_type( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree } } -void dplasma_qrtree_print_pivot( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ) +void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) { - int minMN = dplasma_imin(A->mt, A->nt ); + int minMN = libhqr_imin(A->mt, A->nt ); int m, k; int lm = 0; int lmg = 0; @@ -349,19 +347,19 @@ void dplasma_qrtree_print_pivot( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtre printf("\n------------ Current Pivot--------------\n"); for(m=0; m<A->mt; m++) { printf("%3d | ", m); - for (k=0; k<dplasma_imin(minMN, m+1); k++) { + for (k=0; k<libhqr_imin(minMN, m+1); k++) { printf( "%3d ", qrtree->currpiv(qrtree, k, m) ); } - for (k=dplasma_imin(minMN, m+1); k<minMN; k++) { + for (k=libhqr_imin(minMN, m+1); k<minMN; k++) { printf( " " ); } printf(" "); printf("%2d,%3d | ", rank, lmg); - for (k=0; k<dplasma_imin(minMN, lmg+1); k++) { + for (k=0; k<libhqr_imin(minMN, lmg+1); k++) { printf( "%3d ", qrtree->currpiv(qrtree, k, lmg) ); } - for (k=dplasma_imin(minMN, lmg+1); k<minMN; k++) { + for (k=libhqr_imin(minMN, lmg+1); k<minMN; k++) { printf( " " ); } lm++; lmg+=qrtree->p; @@ -374,7 +372,7 @@ void dplasma_qrtree_print_pivot( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtre } } -void dplasma_qrtree_print_next_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ) +void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Next (k = %d)--------------\n", k); @@ -393,7 +391,7 @@ void dplasma_qrtree_print_next_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtr } } -void dplasma_qrtree_print_prev_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ) +void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Prev (k = %d)--------------\n", k); @@ -412,9 +410,9 @@ void dplasma_qrtree_print_prev_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtr } } -void dplasma_qrtree_print_perm( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int *perm ) +void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ) { - int minMN = dplasma_imin(A->mt, A->nt ); + int minMN = libhqr_imin(A->mt, A->nt ); int m, k; (void)qrtree; @@ -437,9 +435,9 @@ void dplasma_qrtree_print_perm( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree printf( "\n" ); } -void dplasma_qrtree_print_nbgeqrt( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree ) +void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) { - int minMN = dplasma_imin(A->mt, A->nt ); + int minMN = libhqr_imin(A->mt, A->nt ); int m, k, nb; printf("\n------------ Nb GEQRT per k --------------\n"); @@ -465,7 +463,7 @@ void dplasma_qrtree_print_nbgeqrt( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrt printf( "\n" ); } -void dplasma_qrtree_print_geqrt_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, int k ) +void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) { int i, m, nb; (void)A; @@ -485,12 +483,12 @@ void dplasma_qrtree_print_geqrt_k( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrt } -/* static int dplasma_qrtree_getinon0( const dplasma_qrtree_t *qrtree, */ +/* static int libhqr_tree_getinon0( const libhqr_tree_t *qrtree, */ /* const int k, int i, int mt ) */ /* { */ /* int j; */ /* for(j=k; j<mt; j++) { */ -/* if ( dplasma_qrtree_gettype( qrtree, k, j ) != 0 ) */ +/* if ( libhqr_tree_gettype( qrtree, k, j ) != 0 ) */ /* i--; */ /* if ( i == -1 ) */ /* break; */ @@ -523,11 +521,11 @@ char *color[] = { }; #define DAG_NBCOLORS 7 -void dplasma_qrtree_print_dag( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, char *filename ) +void libhqr_tree_print_dag( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ) { int *pos, *next, *done; int k, m, n, lpos, prev, length; - int minMN = dplasma_imin( A->mt, A->nt ); + int minMN = libhqr_imin( A->mt, A->nt ); FILE *f = fopen( filename, "w" ); done = (int*)malloc( A->mt * sizeof(int) ); @@ -557,7 +555,7 @@ void dplasma_qrtree_print_dag( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, if ( next[n] != A->mt ) continue; if ( n != A->mt ) { - lpos = dplasma_imax( pos[m], pos[n] ); + lpos = libhqr_imax( pos[m], pos[n] ); lpos++; pos[m] = lpos; pos[n] = lpos; @@ -592,7 +590,7 @@ void dplasma_qrtree_print_dag( tiled_matrix_desc_t *A, dplasma_qrtree_t *qrtree, length = 0; for(m=0; m < A->mt; m++) { - length = dplasma_imax(length, pos[m]); + length = libhqr_imax(length, pos[m]); } length++; for(k=0; k<length; k++) diff --git a/src/dplasma_systolic_qr.c b/src/libhqr_systolic.c similarity index 79% rename from src/dplasma_systolic_qr.c rename to src/libhqr_systolic.c index fa10912..f76dbf8 100644 --- a/src/dplasma_systolic_qr.c +++ b/src/libhqr_systolic.c @@ -10,14 +10,15 @@ * used in the Xgeqrf_param.jdf file. * */ -#include "parsec.h" -#include "dplasma.h" -#include "dplasma_qr_param.h" +#include "libhqr.h" +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> #include <math.h> -#if defined(PARSEC_HAVE_STRING_H) +//#if defined(LIBHQR_HAVE_STRING_H) #include <string.h> -#endif /* defined(PARSEC_HAVE_STRING_H) */ +//#endif /* defined(LIBHQR_HAVE_STRING_H) */ #define PRINT_PIVGEN 0 #ifdef PRINT_PIVGEN @@ -28,19 +29,19 @@ #define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0 -static int systolic_getnbgeqrf( const dplasma_qrtree_t *qrtree, int k ) +static int systolic_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { int pq = qrtree->p * qrtree->a; - return dplasma_imin( pq, qrtree->mt - k); + return libhqr_imin( pq, qrtree->mt - k); } -static int systolic_getm( const dplasma_qrtree_t *qrtree, int k, int i ) +static int systolic_getm( const libhqr_tree_t *qrtree, int k, int i ) { (void)qrtree; return k+i; } -static int systolic_geti( const dplasma_qrtree_t *qrtree, int k, int m ) +static int systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) { (void)qrtree; return m-k; @@ -55,7 +56,7 @@ static int systolic_geti( const dplasma_qrtree_t *qrtree, int k, int m ) * 1 - if m is reduced thanks to the 2nd coordinate flat tree * 3 - if m is reduced thanks to the 1st coordinate flat tree */ -static int systolic_gettype( const dplasma_qrtree_t *qrtree, int k, int m ) { +static int systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { int p = qrtree->p; int q = qrtree->a; int pq = p * q; @@ -78,7 +79,7 @@ static int systolic_gettype( const dplasma_qrtree_t *qrtree, int k, int m ) { * Generic functions currpiv,prevpiv,nextpiv * ***************************************************/ -static int systolic_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) +static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) { int p = qrtree->p; int q = qrtree->a; @@ -119,7 +120,7 @@ static int systolic_currpiv(const dplasma_qrtree_t *qrtree, int k, int m) * - -1 if start doesn't respect the previous conditions * - m, the following row killed by p if it exists, A->mt otherwise */ -static int systolic_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +static int systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int q = qrtree->a; @@ -138,12 +139,12 @@ static int systolic_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in { case -1: - if ( lp == DPLASMA_QR_KILLED_BY_TS ) { + if ( lp == LIBHQR_KILLED_BY_TS ) { myassert( start == mt ); return mt; } - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: if ( start == mt ) nextp = pivot + pq; @@ -155,9 +156,9 @@ static int systolic_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in start = mt; - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: - if (lp < DPLASMA_QR_KILLED_BY_DISTTREE) + if (lp < LIBHQR_KILLED_BY_DISTTREE) return mt; if ( start == mt ) @@ -172,7 +173,7 @@ static int systolic_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in start = mt; - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: if (pivot > k) return mt; @@ -208,7 +209,7 @@ static int systolic_nextpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in * - -1 if start doesn't respect the previous conditions * - m, the previous row killed by p if it exists, A->mt otherwise */ -static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, int start) +static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int rpivot; @@ -226,13 +227,13 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in ls = systolic_gettype( qrtree, k, start ); lp = systolic_gettype( qrtree, k, pivot ); - if ( lp == DPLASMA_QR_KILLED_BY_TS ) + if ( lp == LIBHQR_KILLED_BY_TS ) return mt; myassert( lp >= ls ); switch( ls ) { - case DPLASMA_QR_KILLED_BY_DISTTREE: + case LIBHQR_KILLED_BY_DISTTREE: if ( pivot == k ) { if ( start == pivot ) { @@ -250,9 +251,9 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in } start = pivot; - case DPLASMA_QR_KILLED_BY_LOCALTREE: + case LIBHQR_KILLED_BY_LOCALTREE: - if ( lp > DPLASMA_QR_KILLED_BY_LOCALTREE ) { + if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) { if ( start == pivot ) { nextp = start + (q-1) * p; @@ -269,9 +270,9 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in } start = pivot; - case DPLASMA_QR_KILLED_BY_TS: + case LIBHQR_KILLED_BY_TS: /* Search for predecessor in TS tree */ - if ( lp > DPLASMA_QR_KILLED_BY_TS ) { + if ( lp > LIBHQR_KILLED_BY_TS ) { if ( start == pivot ) { nextp = mt - (mt - rpivot - 1)%pq - 1; @@ -293,9 +294,9 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in /** ******************************************************************************* * - * @ingroup dplasma + * @ingroup libhqr * - * dplasma_systolic_init - Creates the tree structure that will describes the + * libhqr_systolic_init - Creates the tree structure that will describes the * operation performed during QR/LQ factorization with parameterized QR/LQ * algorithms family. The trees created here correspond to systolic arrays with * 1, 2, or 3 dimensions of flat trees. @@ -338,8 +339,8 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in * ******************************************************************************* * - * @sa dplasma_systolic_finalize - * @sa dplasma_hqr_init + * @sa libhqr_systolic_finalize + * @sa libhqr_hqr_init * @sa dplasma_zgeqrf_param * @sa dplasma_cgeqrf_param * @sa dplasma_dgeqrf_param @@ -347,30 +348,29 @@ static int systolic_prevpiv(const dplasma_qrtree_t *qrtree, int k, int pivot, in * ******************************************************************************/ int -dplasma_systolic_init( dplasma_qrtree_t *qrtree, - PLASMA_enum trans, tiled_matrix_desc_t *A, +libhqr_systolic_init( libhqr_tree_t *qrtree, + libhqr_typefacto_e trans, libhqr_tiledesc_t *A, int p, int q ) { if (qrtree == NULL) { - dplasma_error("dplasma_systolic_init", "illegal value of qrtree"); + fprintf(stderr, "libhqr_systolic_init, illegal value of qrtree"); return -1; } - if ((trans != PlasmaNoTrans) && - (trans != PlasmaTrans) && - (trans != PlasmaConjTrans)) { - dplasma_error("dplasma_systolic_init", "illegal value of trans"); + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_systolic_init, illegal value of trans"); return -2; } if (A == NULL) { - dplasma_error("dplasma_systolic_init", "illegal value of A"); + fprintf(stderr, "libhqr_systolic_init, illegal value of A"); return -3; } if ( p < 0 ) { - dplasma_error("dplasma_systolic_init", "illegal value of p"); + fprintf(stderr, "libhqr_systolic_init, illegal value of p"); return -4; } if ( q < -1 ) { - dplasma_error("dplasma_systolic_init", "illegal value of q"); + fprintf(stderr, "libhqr_systolic_init, illegal value of q"); return -5; } @@ -382,11 +382,11 @@ dplasma_systolic_init( dplasma_qrtree_t *qrtree, qrtree->nextpiv = systolic_nextpiv; qrtree->prevpiv = systolic_prevpiv; - qrtree->mt = (trans == PlasmaNoTrans) ? A->mt : A->nt; - qrtree->nt = (trans == PlasmaNoTrans) ? A->nt : A->mt; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - qrtree->a = dplasma_imax( q, 1 ); - qrtree->p = dplasma_imax( p, 1 ); + qrtree->a = libhqr_imax( q, 1 ); + qrtree->p = libhqr_imax( p, 1 ); qrtree->args = NULL; return 0; @@ -397,8 +397,8 @@ dplasma_systolic_init( dplasma_qrtree_t *qrtree, * * @ingroup dplasma * - * dplasma_systolic_finalize - Cleans the qrtree data structure allocated by - * call to dplasma_systolic_init(). + * libhqr_systolic_finalize - Cleans the qrtree data structure allocated by + * call to libhqr_systolic_init(). * ******************************************************************************* * @@ -408,11 +408,11 @@ dplasma_systolic_init( dplasma_qrtree_t *qrtree, * ******************************************************************************* * - * @sa dplasma_systolic_init + * @sa libhqr_systolic_init * ******************************************************************************/ void -dplasma_systolic_finalize( dplasma_qrtree_t *qrtree ) +libhqr_systolic_finalize( libhqr_tree_t *qrtree ) { if ( qrtree->args != NULL) { free( qrtree->args ); diff --git a/testings/testing_pivgen.c b/testings/testing_pivgen.c index b86b441..135f61a 100644 --- a/testings/testing_pivgen.c +++ b/testings/testing_pivgen.c @@ -7,213 +7,144 @@ * */ -#include "common.h" -#include "data_dist/matrix/two_dim_rectangle_cyclic.h" +#include "libhqr.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> int main(int argc, char ** argv) { - parsec_context_t* parsec; - dplasma_qrtree_t qrtree; + libhqr_tree_t qrtree; + libhqr_tiledesc_t matrix; + int alltreel[] = { 0, 1, 2, 3, 4 }; + int alltreeh[] = { 0, 1, 2, 3, 4 }; + int allP[] = { 3, 5, 7, 8 }; + int allA[] = { 1, 2, 4, 7 }; + int allM[] = { 1, 3, 4, 10, 17, 25, 128 }; + int allN[] = { 1, 2, 5, 13, 26, 58 }; + int nbtreel = 4; + int nbtreeh = 5; + int nbP = 4; + int nbA = 4; + int nbM = 7; + int nbN = 6; int rc, ret = 0; - int iparam[IPARAM_SIZEOF]; - char *dot_filename; - - /* Set defaults for non argv iparams */ - iparam_default_facto(iparam); - iparam_default_ibnbmb(iparam, 1, 1, 1); - iparam[IPARAM_LDA] = -'m'; - iparam[IPARAM_LDB] = -'m'; - - /* Initialize PaRSEC */ - parsec = setup_parsec(argc, argv, iparam); - PASTE_CODE_IPARAM_LOCALS(iparam); - - if (check) { - tiled_matrix_desc_t *B; - int alltreel[] = { 0, 1, 2, 3, 4 }; - int alltreeh[] = { 0, 1, 2, 3, 4 }; - int allP[] = { 3, 5, 7, 8 }; - int allA[] = { 1, 2, 4, 7 }; - int allM[] = { 1, 3, 4, 10, 17, 25, 128 }; - int allN[] = { 1, 2, 5, 13, 26, 58 }; - int nbtreel = 4; - int nbtreeh = 5; - int nbP = 4; - int nbA = 4; - int nbM = 7; - int nbN = 6; - int l, h, p, a, m, n, d, r; - int done, todo; - todo = 0; - done = 0; - - /* HQR */ - todo += nbtreel * nbM * nbN * (2 * nbA - 1) * (1 + 2 * nbtreeh * nbP); - /* systolic */ - todo += nbM * nbN * nbA * nbP; - - LDA = max(allM[ nbM-1 ], LDA); - /* initializing matrix structure */ - PASTE_CODE_ALLOCATE_MATRIX(ddescA, 1, - two_dim_block_cyclic, (&ddescA, matrix_ComplexDouble, matrix_Tile, - nodes, rank, MB, NB, LDA, allN[ nbN-1 ], 0, 0, - allM[ nbM-1 ], allN[ nbN-1 ], SMB, SNB, P)); - - /* - * - * Tests for HQR code - * - */ - for( l=0; l<nbtreel; l++) { - /* No High Level */ - h = 0; d = 0; p = -1; - for( a=0; a<nbA; a++) { - for( m=0; m<nbM; m++) { - for( n=0; n<nbN; n++) { - for( r=0; r<2; r++) { - if (r==1 && a==1) - continue; - - B = tiled_matrix_submatrix((tiled_matrix_desc_t*)&ddescA, 0, 0, allM[m], allN[n] ); - dplasma_hqr_init( &qrtree, PlasmaNoTrans, B, alltreel[l], 0, allA[a], -1, 0, r ); - - rc = dplasma_qrtree_check( B, &qrtree ); - if (rc != 0) { - fprintf(stderr, "-M %d -N %d --treel=%d --qr_a=%d --tsrr=%d FAILED(%d)\n", - allM[m], allN[n], alltreel[l], allA[a], r, rc); - ret |= 1; - } - dplasma_hqr_finalize( &qrtree ); - free(B); - - done++; - printf("\r%6d / %6d", done, todo); - } - } - } - } - /* With High level */ - for( d=0; d<2; d++) { /* Domino */ - if (d == 1 && alltreel[l] == DPLASMA_GREEDY1P_TREE) - continue; - for( h=0; h<nbtreeh; h++) { - for( p=0; p<nbP; p++) { - for( a=0; a<nbA; a++) { - for( m=0; m<nbM; m++) { - for( n=0; n<nbN; n++) { - for( r=0; r<2; r++) { - if (r==1 && a==1) - continue; - - B = tiled_matrix_submatrix((tiled_matrix_desc_t*)&ddescA, 0, 0, allM[m], allN[n] ); - dplasma_hqr_init( &qrtree, PlasmaNoTrans, B, alltreel[l], alltreeh[h], allA[a], allP[p], d, r); - - rc = dplasma_qrtree_check( B, &qrtree ); - if (rc != 0) { - fprintf(stderr, "-M %d -N %d --treel=%d --qr_a=%d --tsrr=%d --qr_p=%d --treeh=%d --domino=%d FAILED(%d)\n", - allM[m], allN[n], alltreel[l], allA[a], r, allP[p], alltreeh[h], d, rc); - ret |= 1; - } - - dplasma_hqr_finalize( &qrtree ); - free(B); - - done++; - printf("\r%6d / %6d", done, todo); - } - } - } - } - } - } - } - } - - /* - * - * Tests for systolic code - * - */ - /* With High level */ - for( p=0; p<nbP; p++) { - for( a=0; a<nbA; a++) { - for( m=0; m<nbM; m++) { - for( n=0; n<nbN; n++) { - B = tiled_matrix_submatrix((tiled_matrix_desc_t*)&ddescA, 0, 0, allM[m], allN[n] ); - dplasma_systolic_init( &qrtree, PlasmaNoTrans, B, allA[a], allP[p]); - - rc = dplasma_qrtree_check( B, &qrtree ); - if (rc != 0) { - fprintf(stderr, "systolic: -M %d -N %d --qr_a=%d --qr_p=%d FAILED(%d)\n", - allM[m], allN[n], allA[a], allP[p], rc); - ret |= 1; - } - - dplasma_systolic_finalize( &qrtree ); - free(B); - - done++; - printf("\r%6d / %6d", done, todo); - } - } - } - } - - parsec_data_free(ddescA.mat); - tiled_matrix_desc_destroy( (tiled_matrix_desc_t*)&ddescA); - - } else { - - LDA = max(M, LDA); - /* initializing matrix structure */ - PASTE_CODE_ALLOCATE_MATRIX(ddescA, 1, - two_dim_block_cyclic, (&ddescA, matrix_ComplexDouble, matrix_Tile, - nodes, rank, MB, NB, LDA, N, 0, 0, - M, N, SMB, SNB, P)); - -#if defined(SYSTOLIC) - dplasma_systolic_init( &qrtree, - PlasmaNoTrans, (tiled_matrix_desc_t *)&ddescA, - iparam[IPARAM_QR_HLVL_SZE], - iparam[IPARAM_QR_TS_SZE] ); -#else - dplasma_hqr_init( &qrtree, - PlasmaNoTrans, (tiled_matrix_desc_t*)&ddescA, - iparam[IPARAM_LOWLVL_TREE], iparam[IPARAM_HIGHLVL_TREE], - iparam[IPARAM_QR_TS_SZE], iparam[IPARAM_QR_HLVL_SZE], - iparam[IPARAM_QR_DOMINO], iparam[IPARAM_QR_TSRR] ); -#endif - - asprintf(&dot_filename, "tree-%dx%d-a%d-p%d-l%d-h%d-d%d.dot", - M, N, - iparam[IPARAM_QR_TS_SZE], - iparam[IPARAM_QR_HLVL_SZE], - iparam[IPARAM_LOWLVL_TREE], - iparam[IPARAM_HIGHLVL_TREE], - iparam[IPARAM_QR_DOMINO]); - - /*dplasma_qrtree_print_dag( (tiled_matrix_desc_t*)&ddescA, &qrtree, dot_filename );*/ - ret = dplasma_qrtree_check( (tiled_matrix_desc_t*)&ddescA, &qrtree ); - - /* dplasma_qrtree_print_pivot( (tiled_matrix_desc_t*)&ddescA, &qrtree); */ - /* dplasma_qrtree_print_next_k( (tiled_matrix_desc_t*)&ddescA, &qrtree, 0); */ - /* dplasma_qrtree_print_prev_k( (tiled_matrix_desc_t*)&ddescA, &qrtree, 0); */ - /* dplasma_qrtree_print_nbgeqrt( (tiled_matrix_desc_t*)&ddescA, &qrtree ); */ - /* dplasma_qrtree_print_type ( (tiled_matrix_desc_t*)&ddescA, &qrtree ); */ - -#if defined(SYSTOLIC) - dplasma_systolic_finalize( &qrtree ); -#else - dplasma_hqr_finalize( &qrtree ); -#endif - - free(dot_filename); - - parsec_data_free(ddescA.mat); - tiled_matrix_desc_destroy( (tiled_matrix_desc_t*)&ddescA); + int nbnodes = 1; + int P = 1; + int l, h, p, a, m, n, d, r; + int done, todo; + todo = 0; + done = 0; + + /* HQR */ + todo += nbtreel * nbM * nbN * (2 * nbA - 1) * (1 + 2 * nbtreeh * nbP); + /* systolic */ + todo += nbM * nbN * nbA * nbP; + + /* + * + * Tests for HQR code + * + */ + matrix.nodes = nbnodes; + matrix.p = P; + for( l=0; l<nbtreel; l++) { + /* No High Level */ + h = 0; d = 0; p = -1; + for( a=0; a<nbA; a++) { + for( m=0; m<nbM; m++) { + matrix.mt = allM[m]; + for( n=0; n<nbN; n++) { + matrix.nt = allN[n]; + for( r=0; r<2; r++) { + if (r==1 && a==1) + continue; + + libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, alltreel[l], 0, allA[a], -1, 0, r ); + + rc = libhqr_tree_check( &matrix, &qrtree ); + if (rc != 0) { + fprintf(stderr, "-M %d -N %d --treel=%d --qr_a=%d --tsrr=%d FAILED(%d)\n", + allM[m], allN[n], alltreel[l], allA[a], r, rc); + ret |= 1; + } + libhqr_hqr_finalize( &qrtree ); + + done++; + printf("\r%6d / %6d", done, todo); + } + } + } + } + /* With High level */ + matrix.nodes = nbnodes; + for( d=0; d<2; d++) { /* Domino */ + if (d == 1 && alltreel[l] == LIBHQR_GREEDY1P_TREE) + continue; + for( h=0; h<nbtreeh; h++) { + for( p=0; p<nbP; p++) { + matrix.p = allP[p]; + for( a=0; a<nbA; a++) { + for( m=0; m<nbM; m++) { + matrix.mt = allM[m]; + for( n=0; n<nbN; n++) { + matrix.nt = allN[n]; + for( r=0; r<2; r++) { + if (r==1 && a==1) + continue; + libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, alltreel[l], alltreeh[h], allA[a], allP[p], d, r); + + rc = libhqr_tree_check( &matrix, &qrtree ); + if (rc != 0) { + fprintf(stderr, "-M %d -N %d --treel=%d --qr_a=%d --tsrr=%d --qr_p=%d --treeh=%d --domino=%d FAILED(%d)\n", + allM[m], allN[n], alltreel[l], allA[a], r, allP[p], alltreeh[h], d, rc); + ret |= 1; + } + + libhqr_hqr_finalize( &qrtree ); + + done++; + printf("\r%6d / %6d", done, todo); + } + } + } + } + } + } + } } - cleanup_parsec(parsec, iparam); + /* + * + * Tests for systolic code + * + */ + /* With High level */ + for( p=0; p<nbP; p++) { + matrix.p = allP[p]; + for( a=0; a<nbA; a++) { + for( m=0; m<nbM; m++) { + matrix.mt = allM[m]; + for( n=0; n<nbN; n++) { + matrix.nt = allN[n]; + libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, allA[a], allP[p]); + + rc = libhqr_tree_check( &matrix, &qrtree ); + if (rc != 0) { + fprintf(stderr, "systolic: -M %d -N %d --qr_a=%d --qr_p=%d FAILED(%d)\n", + allM[m], allN[n], allA[a], allP[p], rc); + ret |= 1; + } + + libhqr_systolic_finalize( &qrtree ); + + done++; + printf("\r%6d / %6d", done, todo); + } + } + } + } if ( ret == 0 ) return EXIT_SUCCESS; -- GitLab