diff --git a/include/libhqr_internal.h b/include/libhqr_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..0b7dccadcfb69dee13e4cb93e2669a238b5988dd --- /dev/null +++ b/include/libhqr_internal.h @@ -0,0 +1,166 @@ +/** + * + * @file libhqr.h + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + */ +#ifndef _LIBHQR_INTERNAL_H_ +#define _LIBHQR_INTERNAL_H_ + +#include "libhqr.h" +#include <stdio.h> +#include <assert.h> + +#define PRINT_PIVGEN 0 +#ifdef PRINT_PIVGEN +#define myassert( test ) {if ( ! (test) ) return -1;} +#else +#define myassert(test) {assert((test)); return -1;} +#endif + +typedef enum qrtree_type_ { + LIBHQR_QRTREE_UNSET = 0, + LIBHQR_QRTREE_HQR, + LIBHQR_QRTREE_SVD, + LIBHQR_QRTREE_SYS, + LIBHQR_QRTREE_MTX, +} qrtree_type_e; + +struct hqr_args_s; +typedef struct hqr_args_s hqr_args_t; + +struct hqr_subpiv_s; +typedef struct hqr_subpiv_s hqr_subpiv_t; + +/** + * @brief jhj + */ +struct hqr_args_s { + int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */ + int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */ + hqr_subpiv_t *llvl; + hqr_subpiv_t *hlvl; + int *perm; +}; + +struct hqr_subpiv_s { + /** + * currpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] m line you want to eliminate + * + * @return the annihilator p used with m at step k + */ + int (*currpiv)(const hqr_subpiv_t *arg, int k, int m); + /* + * nextpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] p line currently used as an annihilator + * @param[in] m line actually annihilated. + * m = MT to find the first time p is used as an annihilator during step k + * + * @return the next line m' using the line p as annihilator during step k + * mt if p will never be used again as an annihilator. + */ + int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m); + /* + * prevpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] p line currently used as an annihilator + * @param[in] m line actually annihilated. + * m = p to find the last time p has been used as an annihilator during step k + * + * @return the previous line m' using the line p as annihilator during step k + * mt if p has never been used before that as an annihilator. + */ + int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m); + int *ipiv; + int minMN; + int ldd; + int a; + int p; + int domino; +}; + +/** + * @brief Minimal structure to store the information related to each tile. + */ +typedef struct libhqr_tile_info_s { + int type; /**< The type of the reduction applied to the tile (@sa libhqr_type_e) */ + int currpiv; /**< The row index of the pivot for this tile */ + int nextpiv; /**< The next tile for which the currpiv is a pivot */ + int prevpiv; /**< The previous tile for which currpiv was a pivot */ + int first_nextpiv; /**< If type != 0, the first tile for which this tile is a pivot */ + int first_prevpiv; /**< If type != 0, the last tile for which this tile is a pivot */ +} libhqr_tile_info_t; + +static inline int libhqr_imin(int a, int b){ + return (a > b) ? b : a; +} + +static inline int libhqr_imax(int a, int b){ + return (a > b) ? a : b; +} + +static inline int libhqr_iceil(int a, int b){ + return (a + b - 1) / b; +} + + +/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ +#define nbextra1_formula (( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0) + +/* + * Common functions + */ +/* int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); */ +/* int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); */ +/* int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); */ +/* int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); */ + +/* + * Subtree for low-level + */ +void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init); +int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m); +int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m); +void libhqr_matrix_finalize(libhqr_tree_t *qrtree); + +/* + * function for drawing the tree + */ +void draw_rectangle(int k, int p, int m, int step_m, FILE *file); +void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file); +void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file); +void draw_vertical_line( int k, int p, int m, int step_m, FILE *file); + +void hqr_low_flat_init ( hqr_subpiv_t *arg ); +void hqr_low_binary_init ( hqr_subpiv_t *arg ); +void hqr_low_fibonacci_init( hqr_subpiv_t *arg, int minMN ); +void hqr_low_greedy_init ( hqr_subpiv_t *arg, int minMN ); +void hqr_low_greedy1p_init ( hqr_subpiv_t *arg, int minMN ); +void svd_low_adaptiv_init ( hqr_subpiv_t *arg, int gmt, int gnt, int nbcores, int ratio ); + +void hqr_high_flat_init ( hqr_subpiv_t *arg ); +void hqr_high_binary_init ( hqr_subpiv_t *arg ); +void hqr_high_fibonacci_init( hqr_subpiv_t *arg ); +void hqr_high_greedy_init ( hqr_subpiv_t *arg, int minMN ); +void hqr_high_greedy1p_init ( hqr_subpiv_t *arg ); + +#endif /* _LIBHQR_INTERNAL_H_ */ diff --git a/src/libhqr_dbg.c b/src/check.c similarity index 100% rename from src/libhqr_dbg.c rename to src/check.c diff --git a/src/gendot.c b/src/gendot.c new file mode 100644 index 0000000000000000000000000000000000000000..50cd4cddfcf364bdbf6a92cbfe6b9fc8152f2e9e --- /dev/null +++ b/src/gendot.c @@ -0,0 +1,132 @@ +/** + * + * @file gendot.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + */ +#include "libhqr_internal.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#define DAG_HEADER "digraph G { orientation=portrait; \n" +#define DAG_FOOTER "} // close graph\n" +#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n" +#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n" +#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n" +#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" +#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" +#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n" +#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" +#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" +#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n" +#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n" +#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n" + +char *color[] = { + "red", + "blue", + "green", + "orange", + "cyan", + "purple", + "yellow", +}; +#define DAG_NBCOLORS 7 + +void +libhqr_print_dot( const libhqr_tree_t *qrtree, + const char *filename ) +{ + int *pos, *next, *done; + int k, m, n, lpos, prev, length; + int minMN = libhqr_imin( qrtree->mt, qrtree->nt ); + FILE *f = fopen( filename, "w" ); + + done = (int*)malloc( qrtree->mt * sizeof(int) ); + pos = (int*)malloc( qrtree->mt * sizeof(int) ); + next = (int*)malloc( qrtree->mt * sizeof(int) ); + memset(pos, 0, qrtree->mt * sizeof(int) ); + memset(next, 0, qrtree->mt * sizeof(int) ); + + /* Print header */ + fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/ + for(m=0; m < qrtree->mt; m++) { + fprintf(f, DAG_LABELNODE, m, m, m); + } + + for(k=0; k<minMN; k++ ) { + int nb2reduce = qrtree->mt - k - 1; + + for(m=k; m < qrtree->mt; m++) { + fprintf(f, DAG_STARTNODE, m, qrtree->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); + next[m] = qrtree->nextpiv( qrtree, k, m, qrtree->mt); + } + + while( nb2reduce > 0 ) { + memset(done, 0, qrtree->mt * sizeof(int) ); + for(m=qrtree->mt-1; m > (k-1); m--) { + n = next[m]; + if ( next[n] != qrtree->mt ) + continue; + if ( n != qrtree->mt ) { + lpos = libhqr_imax( pos[m], pos[n] ); + lpos++; + pos[m] = lpos; + pos[n] = lpos; + + fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + prev = qrtree->prevpiv( qrtree, k, m, n ); + fprintf(f, DAG_EDGE_PIV, + m, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + prev = qrtree->prevpiv( qrtree, k, n, n ); + if ( qrtree->gettype(qrtree, k, n) == 0 ) + fprintf(f, DAG_EDGE_TS, + n, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + else + fprintf(f, DAG_EDGE_TT, + n, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + next[m] = qrtree->nextpiv( qrtree, k, m, n); + done[m] = done[n] = 1; + nb2reduce--; + } + } + } + } + + length = 0; + for(m=0; m < qrtree->mt; m++) { + length = libhqr_imax(length, pos[m]); + } + length++; + for(k=0; k<length; k++) + fprintf(f, DAG_LENGTHNODE, k, k, k); + fprintf(f, DAG_FOOTER); + + printf("Tic Max = %d\n", length-1); + + fclose( f ); + free(pos); + free(next); +} diff --git a/src/treewalk.c b/src/gensvg.c similarity index 100% rename from src/treewalk.c rename to src/gensvg.c diff --git a/src/high_binary.c b/src/high_binary.c new file mode 100644 index 0000000000000000000000000000000000000000..ef9d20b570aea371ec010b2f5239f490151f4312 --- /dev/null +++ b/src/high_binary.c @@ -0,0 +1,101 @@ +/** + * + * @file high_binary.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level binary tree + * + */ +#include "libhqr_internal.h" +#include <math.h> + +static int +hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + int tmp1 = m - k; + int tmp2 = 1; + (void)arg; + + if ( tmp1 == 0) + return 0; + while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { + tmp1 = tmp1 >> 1; + tmp2 = tmp2 << 1; + } + assert( m - tmp2 >= k ); + return m - tmp2; +} + +static int +hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int tmpp, bit; + myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) ); + + if ( start <= p ) + return arg->ldd; + + int offset = p - k; + bit = 0; + if (start != arg->ldd) { + while( ( (start - k) & (1 << bit ) ) == 0 ) + bit++; + bit++; + } + + tmpp = offset | (1 << bit); + if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) ) + return tmpp + k; + else + return arg->ldd; +} + +static int +hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int offset = p - k; + + myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) ); + + if ( (start == p) && ( offset%2 == 0 ) ) { + int i, bit, tmp; + if ( offset == 0 ) + bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); + else { + bit = 0; + while( (offset & (1 << bit )) == 0 ) + bit++; + } + for( i=bit; i>-1; i--){ + tmp = offset | (1 << i); + if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) ) + return tmp+k; + } + return arg->ldd; + } + + if ( (start - p) > 1 ) + return p + ( (start - p) >> 1 ); + else { + return arg->ldd; + } +} + +void +hqr_high_binary_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_high_binary_currpiv; + arg->nextpiv = hqr_high_binary_nextpiv; + arg->prevpiv = hqr_high_binary_prevpiv; + arg->ipiv = NULL; +} + diff --git a/src/high_fibonacci.c b/src/high_fibonacci.c new file mode 100644 index 0000000000000000000000000000000000000000..a41ec888a85218bd3ac8ef958ae08beeac84fa18 --- /dev/null +++ b/src/high_fibonacci.c @@ -0,0 +1,152 @@ +/** + * + * @file high_fibonacci.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level fibonacci tree, and init for duplicated greedy. + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/**************************************************** + * HQR_HIGH_FIBONACCI_TREE + ***************************************************/ +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + return (qrpiv->ipiv)[ m-k ] + k; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { + int i; + myassert( p >= k && start >= p && start-k <= qrpiv->p); + + int lp = p - k; + int lstart= start - k; + int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p); + for( i=lstart+1; i<end; i++ ) + if ( (qrpiv->ipiv)[i] == lp ) + return i+k; + return qrpiv->ldd; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { + int i; + myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) ); + + for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- ) + if ( (qrpiv->ipiv)[i] == (p-k) ) + return i + k; + return (qrpiv->ldd); +} + +void +hqr_high_fibonacci_init(hqr_subpiv_t *arg) { + int *ipiv; + int p; + + arg->currpiv = hqr_high_fibonacci_currpiv; + arg->nextpiv = hqr_high_fibonacci_nextpiv; + arg->prevpiv = hqr_high_fibonacci_prevpiv; + + p = arg->p; + + arg->ipiv = (int*)calloc( p, sizeof(int) ); + ipiv = arg->ipiv; + + /* + * Fibonacci of order 1: + * u_(n+1) = u_(n) + 1 + */ + { + int f1, k, m; + + /* Fill in the first column */ + f1 = 1; + for (m=1; m < p; ) { + for (k=0; (k < f1) && (m < p); k++, m++) { + ipiv[m] = m - f1; + } + f1++; + } + } +} + +/**************************************************** + * HQR_HIGH_GREEDY_TREE (1 panel duplicated) + ***************************************************/ +void hqr_high_greedy1p_init(hqr_subpiv_t *arg){ + int *ipiv; + int mt, p; + + arg->currpiv = hqr_high_fibonacci_currpiv; + arg->nextpiv = hqr_high_fibonacci_nextpiv; + arg->prevpiv = hqr_high_fibonacci_prevpiv; + + mt = arg->ldd; + p = arg->p; + + arg->ipiv = (int*)calloc( p, sizeof(int) ); + ipiv = arg->ipiv; + + { + int minMN = 1; + int j, k, height, start, end, firstk = 0; + int *nT = (int*)calloc(minMN, sizeof(int)); + int *nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + nZ[0] = libhqr_imax( mt - p, 0 ); + for(k=1; k<minMN; k++) { + height = libhqr_imax(mt-k-p, 0); + nT[k] = height; + nZ[k] = height; + } + + k = 0; + while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - firstk ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + firstk++; + } + k = firstk; + continue; + } + + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + if (k < minMN-1) nT[k+1] = nZ[k]; + + for( j=start; j > end; j-- ) { + ipiv[ k*p + j-k ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } +} diff --git a/src/high_flat.c b/src/high_flat.c new file mode 100644 index 0000000000000000000000000000000000000000..29003463fa6a5573b7ad5865c0faed7cfb5d242e --- /dev/null +++ b/src/high_flat.c @@ -0,0 +1,61 @@ +/** + * + * @file high_flat.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level flat tree + * + */ +#include "libhqr_internal.h" + +static int +hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + (void)arg; + (void)m; + return k; +} + +static int +hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + if ( p == k && arg->ldd > 1 ) { + if ( start == arg->ldd ) + return p+1; + else if ( start < arg->ldd && (start-k < arg->p-1) ) + return start+1; + } + return arg->ldd; +} + +static int +hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + assert( arg->p > 1 ); + if ( p == k && arg->ldd > 1 ) { + if ( start == p && p != arg->ldd-1 ) + return libhqr_imin( p + arg->p - 1, arg->ldd - 1 ); + else if ( start > p + 1 && (start-k < arg->p)) + return start-1; + } + return arg->ldd; +} + +void +hqr_high_flat_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_high_flat_currpiv; + arg->nextpiv = hqr_high_flat_nextpiv; + arg->prevpiv = hqr_high_flat_prevpiv; + arg->ipiv = NULL; +} + diff --git a/src/high_greedy.c b/src/high_greedy.c new file mode 100644 index 0000000000000000000000000000000000000000..d16b4c5ce3802f972738312ad7eb836c3dd000b6 --- /dev/null +++ b/src/high_greedy.c @@ -0,0 +1,110 @@ +/** + * + * @file high_greedy.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level greedy tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +static int +hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + myassert( m >= k && m < k+arg->p ); + return (arg->ipiv)[ k * (arg->p) + (m - k) ]; +} + +static int +hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int i; + myassert( (start >= k && start < k+arg->p) || start == arg->ldd ); + for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- ) + if ( (arg->ipiv)[i-k + k* (arg->p)] == p ) + return i; + return (arg->ldd); +} + +static int +hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int i; + myassert( (start >= k && start < k+arg->p) || start == p ); + for( i=start-k+1; i<arg->p; i++ ) + if ( (arg->ipiv)[i + k * (arg->p)] == p ) + return k+i; + return arg->ldd; +} + +void +hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, p; + + arg->currpiv = hqr_high_greedy_currpiv; + arg->nextpiv = hqr_high_greedy_nextpiv; + arg->prevpiv = hqr_high_greedy_prevpiv; + + mt = arg->ldd; + p = arg->p; + + arg->ipiv = (int*)calloc( p * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + { + int j, k, height, start, end, firstk = 0; + int *nT = (int*)calloc(minMN, sizeof(int)); + int *nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + nZ[0] = libhqr_imax( mt - p, 0 ); + for(k=1; k<minMN; k++) { + height = libhqr_imax(mt-k-p, 0); + nT[k] = height; + nZ[k] = height; + } + + k = 0; + while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - firstk ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + firstk++; + } + k = firstk; + continue; + } + + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + if (k < minMN-1) nT[k+1] = nZ[k]; + + for( j=start; j > end; j-- ) { + ipiv[ k*p + j-k ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } +} diff --git a/src/libhqr.c b/src/hqr.c similarity index 100% rename from src/libhqr.c rename to src/hqr.c diff --git a/src/low_adaptiv.c b/src/low_adaptiv.c new file mode 100644 index 0000000000000000000000000000000000000000..d8abfd5101a30440c81e4b29859b06efc86a2f6e --- /dev/null +++ b/src/low_adaptiv.c @@ -0,0 +1,203 @@ +/** + * + * @file low_adaptiv.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level adaptiv tree for SVD/EVD reduction to band. + * + */ +#include "libhqr_internal.h" + +/* Return the pivot to use for the row m at step k */ +inline static int +svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg, + int k, int m ) +{ + int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd; + int a = ipiv[0]; + + ipiv+=2; + return ipiv[ ( m / arg->p ) / a ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +inline static int +svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg, + int k, int p, int start_pa ) +{ + int i; + int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd; + int a = ipiv[0]; + int ldd = ipiv[1]; + int p_pa = p / arg->p / a; + + ipiv+=2; + for( i=start_pa+1; i<ldd; i++ ) + if ( ipiv[i] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +inline static int +svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg, + int k, int p, int start_pa ) +{ + int i; + int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd; + int a = ipiv[0]; + int ldd = ipiv[1]; + int pa = arg->p * a; + int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a; + int p_pa = p / pa; + + ipiv+=2; + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i] == p_pa ) + return i; + + return ldd; +} + +void +svd_low_adaptiv_init( hqr_subpiv_t *arg, + int gmt, int gnt, int nbcores, int ratio ) +{ + int *ipiv; + int mt, a, p, pa, maxmt, myrank; + int j, k, height, start, end, nT, nZ; + int minMN = libhqr_imin(gmt, gnt); + + arg->currpiv = svd_low_adaptiv_currpiv; + arg->nextpiv = svd_low_adaptiv_nextpiv; + arg->prevpiv = svd_low_adaptiv_prevpiv; + + p = arg->p; + + end = 0; + + /** + * Compute the local greedy tree of each column, on each node + */ + maxmt = 1; + for(k=0; k<minMN; k++) { + /** + * The objective is to have at least two columns of TS to reduce per + * core, so it must answer the following inequality: + * ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores ); + * so, + * a <= mt * (gnt-k) / (ratio * nbcores ) + */ + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + + /* Now let's make sure all sub-parts are equilibrate */ + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); + + /* Compute max dimension of the tree */ + mt = libhqr_iceil( gmt, p * a ); + maxmt = libhqr_imax( mt, maxmt ); + } + + arg->ldd = maxmt + 2; + arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p ); + ipiv = arg->ipiv; + + memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p ); + + for ( myrank=0; myrank<p; myrank++ ) { + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++, ipiv += arg->ldd) { + /** + * The objective is to have at least two columns of TS to reduce per + * core, so it must answer the following inequality: + * (ldd / a ) * (gnt-k) >= ( ratio * nbcores ); + * so, + * a <= mt * (gnt-k) / (ratio * nbcores ) + */ + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + + /* Now let's make sure all sub-parts are equilibrate */ + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); + + pa = p * a; + mt = libhqr_iceil( gmt, pa ); + ipiv[0] = a; + ipiv[1] = mt; + + assert( a > 0 ); + assert( mt < arg->ldd-1 ); + + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + assert( nT <= mt ); + + /* No more computations on this node */ + if ( nT == 0 ) { + continue; + } + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ j+2 ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*(arg->ldd) + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += arg->ldd * minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k * arg->ldd + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} diff --git a/src/mtxtree.c b/src/mtxtree.c new file mode 100644 index 0000000000000000000000000000000000000000..9c68b92288233ae84b315690c3f3b1805eb82ce9 --- /dev/null +++ b/src/mtxtree.c @@ -0,0 +1,193 @@ +/** + * + * @file mtxtree.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * This file contains all the function to describe the dependencies + * used in the Xgeqrf_param.jdf file. + * The QR factorization done with this file relies on three levels: + * - the first one is using a flat tree with TS kernels. The + * height of this tree is defined by the parameter 'a'. If 'a' + * is set to A->mt, the factorization is identical to the one + * perform by PLASMA_zgeqrf. + * For all subdiagonal "macro-tiles", the line reduced is always + * the first. For all diagonal "macro-tiles", the factorization + * performed is identical to the one performed by PLASMA_zgeqrf. + * + * - the third level is using a reduction tree of size 'p'. By + * default, the parameter 'p' should be equal to the number of + * processors used for the computation, but can be set + * differently. (see further example). The type of tree used at + * this level is defined by the hlvl parameter. It can be flat + * or greedy. + * CODE DETAILS: This tree and all the function related to it + * are performing a QR factorization on a band matrix with 'p' + * the size of the band. All the functions take global indices + * as input and return global indices as output. + * + * - Finally, a second 'low' level of reduction tree is applied. + * The size of this tree is induced by the parameters 'a' and 'p' + * from the first and third levels and is A->mt / ( p * a ). This + * tree is reproduced p times for each subset of tiles + * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. + * The tree used for the reduction is defined by the llvl + * parameter and can be: flat, greedy, fibonacci or binary. + * CODE DETAILS: For commodity, the size of this tree is always + * ceil(A->mt / (p * a) ) inducing some extra tests in the code. + * All the functions related to this level of tree take as input + * the local indices in the A->mt / (p*a) matrix and the global + * k. They return the local index. The reductions are so + * performed on a trapezoidal matrices where the step is defined + * by a: + * <- min( lhlvl_mt, min( mt, nt ) ) -> + * __a__ a a + * | |_____ + * | |_____ + * | |_____ + * llvl_mt = ceil(MT/ (a*p)) | |_____ + * | |_____ + * |___________________________________| + * + * + * + * At each step of the factorization, the lines of tiles are divided + * in 4 types: + * - QRPARAM_TILE_TS: They are the lines annihilated by a TS + * kernel, these lines are never used as an annihilator. They are + * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p + * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor + * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They + * are themselves annihilated by the TT kernel of the low level + * reduction tree. The roots of the local trees are the lines i, + * with i/p = k. + * - QRPARAM_TILE_DOMINO: These are the lines that are + * annhilihated with a domino effect in the band defined by (i/p) + * <= k and i >= k + * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the + * high level tree to reduce communications. + * These lines are defined by (i-k)/p = 0. + */ +#include "libhqr_internal.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> + +/**************************************************** + * Reading functions + ************************************************** + * + * Common parameters for the following functions + * qrtree - tree used for the factorization + * k - Step k of the QR factorization + * m - line anhilated + */ + +int mtxtree_gettype(const libhqr_tree_t *qrtree, int k, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + id = (k * qrtree->mt) + m; + return arg[id].type; +} + +int mtxtree_currpiv(const libhqr_tree_t *qrtree, int k, int m){ + int id, perm_m, p, a; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + perm_m = m; + p = qrtree->p; + a = qrtree->a; + myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) ); + myassert( (p==1) || (perm_m % p) == (m % p) ); + id = (k * qrtree->mt) + m; + return arg[id].currpiv; +} + +/* + * Extra parameter: + * p - line used as pivot + */ + +int mtxtree_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + int gmt = qrtree->mt; + myassert( m > p && p >= k ); + myassert( m == gmt || p == mtxtree_currpiv( qrtree, k, m ) ); + if(m == qrtree->mt){ + id = (k * qrtree->mt) + p; + return arg[id].first_nextpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].nextpiv; + } +} + +int mtxtree_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + int gmt = qrtree->mt; + myassert( m >= p && p >= k && m < gmt ); + myassert( m == p || p == mtxtree_currpiv( qrtree, k, m ) ); + + if(m == p){ + id = (k * qrtree->mt) + p; + return arg[id].first_prevpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].prevpiv; + } +} + +void +libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out ) +{ + libhqr_tile_info_t *tileinfo; + int i, minMN, p, k; + + minMN = libhqr_imin( in->mt, in->nt ); + + /* Copy the input tree to the output one */ + memcpy( out, in, sizeof(libhqr_tree_t) ); + + /* Switch to matrix storage format and functions */ + out->init = LIBHQR_QRTREE_MTX; + out->gettype = mtxtree_gettype; + out->currpiv = mtxtree_currpiv; + out->nextpiv = mtxtree_nextpiv; + out->prevpiv = mtxtree_prevpiv; + out->args = malloc( out->mt * minMN * sizeof(libhqr_tile_info_t) ); + + tileinfo = (libhqr_tile_info_t*)(out->args); + + /* Initialize the matrix */ + for (k=0; k<minMN; k++) + { + for (i=0; i<in->mt; i++, tileinfo++) + { + tileinfo->type = in->gettype(in, k, i); + tileinfo->currpiv = p = in->currpiv(in, k, i); + tileinfo->first_nextpiv = in->nextpiv(in, k, i, in->mt); + tileinfo->first_prevpiv = in->prevpiv(in, k, i, i); + tileinfo->nextpiv = in->nextpiv(in, k, p, i); + tileinfo->prevpiv = in->prevpiv(in, k, p, i); + + assert( ((tileinfo->nextpiv <= in->mt) && (tileinfo->nextpiv >= k)) || (tileinfo->nextpiv == -1) ); + assert( ((tileinfo->prevpiv <= in->mt) && (tileinfo->prevpiv >= k)) || (tileinfo->prevpiv == -1) ); + assert( ((tileinfo->first_nextpiv <= in->mt) && (tileinfo->first_nextpiv >= k)) || (tileinfo->first_nextpiv == -1) ); + assert( ((tileinfo->first_prevpiv <= in->mt) && (tileinfo->first_prevpiv >= k)) || (tileinfo->first_prevpiv == -1) ); + } + } +} diff --git a/src/svd.c b/src/svd.c new file mode 100644 index 0000000000000000000000000000000000000000..f39bbfb15b7e775cae63d3f0040a6a6142270435 --- /dev/null +++ b/src/svd.c @@ -0,0 +1,686 @@ +/** + * + * @file svd.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * This file contains all the function to describe the dependencies + * used in the Xgeqrf_param.jdf file. + * The QR factorization done with this file relies on three levels: + * - the first one is using a flat tree with TS kernels. The + * height of this tree is defined by the parameter 'a'. If 'a' + * is set to A->mt, the factorization is identical to the one + * perform by PLASMA_zgeqrf. + * For all subdiagonal "macro-tiles", the line reduced is always + * the first. For all diagonal "macro-tiles", the factorization + * performed is identical to the one performed by PLASMA_zgeqrf. + * + * - the third level is using a reduction tree of size 'p'. By + * default, the parameter 'p' should be equal to the number of + * processors used for the computation, but can be set + * differently. (see further example). The type of tree used at + * this level is defined by the hlvl parameter. It can be flat + * or greedy. + * CODE DETAILS: This tree and all the function related to it + * are performing a QR factorization on a band matrix with 'p' + * the size of the band. All the functions take global indices + * as input and return global indices as output. + * + * - Finally, a second 'low' level of reduction tree is applied. + * The size of this tree is induced by the parameters 'a' and 'p' + * from the first and third levels and is A->mt / ( p * a ). This + * tree is reproduced p times for each subset of tiles + * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. + * The tree used for the reduction is defined by the llvl + * parameter and can be: flat, greedy, fibonacci or binary. + * CODE DETAILS: For commodity, the size of this tree is always + * ceil(A->mt / (p * a) ) inducing some extra tests in the code. + * All the functions related to this level of tree take as input + * the local indices in the A->mt / (p*a) matrix and the global + * k. They return the local index. The reductions are so + * performed on a trapezoidal matrices where the step is defined + * by a: + * <- min( lhlvl_mt, min( mt, nt ) ) -> + * __a__ a a + * | |_____ + * | |_____ + * | |_____ + * llvl_mt = ceil(MT/ (a*p)) | |_____ + * | |_____ + * |___________________________________| + * + * + * + * At each step of the factorization, the lines of tiles are divided + * in 4 types: + * - QRPARAM_TILE_TS: They are the lines annihilated by a TS + * kernel, these lines are never used as an annihilator. They are + * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p + * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor + * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They + * are themselves annihilated by the TT kernel of the low level + * reduction tree. The roots of the local trees are the lines i, + * with i/p = k. + * - QRPARAM_TILE_DOMINO: These are the lines that are + * annhilihated with a domino effect in the band defined by (i/p) + * <= k and i >= k + * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the + * high level tree to reduce communications. + * These lines are defined by (i-k)/p = 0. + */ +#include "libhqr_internal.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> + +/* + * Common functions + */ +static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int svd_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int svd_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m ); + +#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) ) +#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] ) + +/* + * Extra parameter: + * gmt - Global number of tiles in a column of the complete distributed matrix + * Return: + * The number of geqrt to execute in the panel k + */ +static int +svd_getnbgeqrf( const libhqr_tree_t *qrtree, + int k ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int gmt = qrtree->mt; + int a = svd_geta(arg, k); + int pa = p * a; + int nb_1, nb_2, nb_3; + int nb_11, nb_12; + + /* Number of tasks of type 3 */ + nb_3 = p; + + /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ + nb_2 = nbextra1_formula; + + /* First multiple of p*a under the diagonal of step 1 */ + nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; + + /* Last multiple of p*a lower than A->mt */ + nb_12 = ( gmt / pa ) * pa; + + /* Number of tasks of type 1 between nb_11 and nb_12 */ + nb_1 = (nb_12 - nb_11) / a; + + /* Add leftover */ + nb_1 += libhqr_imin( p, gmt - nb_12 ); + + return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); +} + +/* + * Extra parameter: + * i - indice of the geqrt in the continuous space + * Return: + * The global indice m of the i th geqrt in the panel k + */ +static int +svd_getm( const libhqr_tree_t *qrtree, + int k, int i ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + nbextra1; + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( i < nb23 ) + return k+i; + /* Tile of type 1 */ + else { + j = i - nb23; + pa = p * a; + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + return pos1 + (j/p) * pa + j%p; + } +} + +/* + * Extra parameter: + * m - The global indice m of a geqrt in the panel k + * Return: + * The index i of the geqrt in the panel k + */ +static int +svd_geti( const libhqr_tree_t *qrtree, + int k, int m ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + nbextra1; + int end2 = p + k + nbextra1; + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( m < end2 ) + return m-k; + /* Tile of type 1 */ + else { + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + j = m - pos1; + return nb23 + (j / pa) * p + j%pa; + } +} + +/* + * Extra parameter: + * m - The global indice m of the row in the panel k + * Return + * -1 - Error + * 0 - if m is reduced thanks to a TS kernel + * 1 - if m is reduced thanks to the low level tree + * 2 - if m is reduced thanks to the bubble tree + * 3 - if m is reduced in distributed + */ +static int +svd_gettype( const libhqr_tree_t *qrtree, + int k, int m ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + /* Element to be reduce in distributed */ + if (m < k + p) { + return 3; + } + /* Lower triangle of the matrix */ + else { + if( (m / p) % a == 0 ) + return 1; + else + return 0; + } +} + +/**************************************************** + * + * Generic functions currpiv,prevpiv,nextpiv + * + ***************************************************/ +static int +svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, tmpk; + int lm, rank; + int a = svd_geta( arg, k ); + int p = qrtree->p; + int gmt = qrtree->mt; + + lm = m / p; /* Local index in the distribution over p domains */ + rank = m % p; /* Staring index in this distribution */ + + /* TS level common to every case */ + switch( svd_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; + break; + case 2: + assert(0); + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } + return -1; +}; + +/** + * svd_nextpiv - Computes the next row killed by the row p, after + * it has kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the next line killed by p after start + * start must be equal to A.mt to find the first row killed by p. + * if start != A.mt, start must be killed by p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the following row killed by p if it exists, A->mt otherwise + */ +static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int rpivot, lstart; + int *ipiv = svd_getipiv( arg, k ); + int a = ipiv[0]; + int ldd = ipiv[1]; + int p = qrtree->p; + int gmt = qrtree->mt; + + rpivot = pivot % p; /* Staring index in this distribution */ + + /* Local index in the distribution over p domains */ + lstart = ( start == gmt ) ? ldd * a : start / p; + + myassert( start > pivot && pivot >= k ); + myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) ); + + /* TS level common to every case */ + ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1; + lp = svd_gettype( qrtree, k, pivot ); + + switch( ls ) + { + case LIBHQR_KILLED_BY_DOMINO: + assert(0); + case -1: + + if ( lp == LIBHQR_KILLED_BY_TS ) { + myassert( start == gmt ); + return gmt; + } + + case LIBHQR_KILLED_BY_TS: + if ( start == gmt ) + nextp = pivot + p; + else + nextp = start + p; + + if ( ( nextp < gmt ) && + ( nextp < pivot + a*p ) && + ( (nextp/p)%a != 0 ) ) + return nextp; + start = gmt; + lstart = ldd * a; + + case LIBHQR_KILLED_BY_LOCALTREE: + /* Get the next pivot for the low level tree */ + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == ldd-1) ) + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != ldd ) + return tmp * a * p + rpivot; + + /* no next of type 1, we reset start to search the next 2 */ + start = gmt; + lstart = ldd * a; + + case LIBHQR_KILLED_BY_DISTTREE: + + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { + return gmt; + } + + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + default: + return gmt; + } +} + +/** + * svd_prevpiv - Computes the previous row killed by the row p, before + * to kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the previous line killed by p before start + * start must be killed by p, and start must be greater or equal to p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the previous row killed by p if it exists, A->mt otherwise + */ +static int +svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int lpivot, rpivot, lstart; + int *ipiv = svd_getipiv( arg, k ); + int a = ipiv[0]; + int ldd = ipiv[1]; + int p = qrtree->p; + int gmt = qrtree->mt; + + lpivot = pivot / p; /* Local index in the distribution over p domains */ + rpivot = pivot % p; /* Staring index in this distribution */ + lstart = start / p; /* Local index in the distribution over p domains */ + + myassert( start >= pivot && pivot >= k && start < gmt ); + myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) ); + + /* TS level common to every case */ + ls = svd_gettype( qrtree, k, start ); + lp = svd_gettype( qrtree, k, pivot ); + + if ( lp == LIBHQR_KILLED_BY_TS ) + return gmt; + + myassert( lp >= ls ); + switch( ls ) + { + case LIBHQR_KILLED_BY_DOMINO: + assert(0); + case LIBHQR_KILLED_BY_DISTTREE: + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + start = pivot; + lstart = pivot / p; + + case LIBHQR_KILLED_BY_LOCALTREE: + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == ldd-1) ) + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != ldd ) + return tmp * a * p + rpivot; + + start = pivot; + + case LIBHQR_KILLED_BY_TS: + if ( start == pivot ) { + tmp = lpivot + a - 1 - lpivot%a; + nextp = tmp * p + rpivot; + + while( pivot < nextp && nextp >= gmt ) + nextp -= p; + } else { + nextp = start - p; /*(lstart - 1) * p + rpivot;*/ + } + assert(nextp < gmt); + if ( pivot < nextp ) + return nextp; + + default: + return gmt; + } +}; + +/** + ******************************************************************************* + * + * @ingroup libhqr + * + * libhqr_svd_init - Create the tree structures that will describes the + * operation performed during QR/LQ reduction step of the gebrd_ge2gb operation. + * + * Trees available parameters are described below. It is recommended to: + * - set p to the same value than the P-by-Q process grid used to distribute + * the data. (P for QR factorization, Q for LQ factorization). + * - set the low level tree to LIBHQR_GREEDY_TREE. + * - set the high level tree to: + * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides + * by two the volume of communication of any other tree. + * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or + * small and fat (LQ), because it reduces the critical path length. + * - Disable the domino effect when problem is square, to keep high efficiency + * kernel proportion high. + * - Enable the domino effect when problem is tall and skinny (QR) or + * small and fat (LQ) to increase parallelism and reduce critical path length. + * - Round-robin on TS domain (tsrr) option should be disabled. It is + * experimental and is not safe. + * + * These are the default when a parameter is set to -1; + * + * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf + * + ******************************************************************************* + * + * @param[in,out] qrtree + * On entry, an allocated structure uninitialized. + * On exit, the structure initialized according to the given parameters. + * + * @param[in] trans + * @arg LIBHQR_QR: Structure is initialized for the QR steps. + * @arg LIBHQR_LQ: Structure is initialized for the LQ steps. + * + * @param[in,out] A + * Descriptor of the distributed matrix A to be factorized, on which + * QR/LQ reduction steps will be performed. In case, of + * R-bidiagonalization, don't forget to provide the square submatrix + * that is concerned by those operations. + * The descriptor is untouched and only mt/nt/P parameters are used. + * + * @param[in] type_hlvl + * Defines the tree used to reduce the main tiles of each domain. This + * is a band lower diagonal matrix of width p. + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * tiles. + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first + * column and then duplicated on all others. + * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) + * + * @param[in] p + * Defines the number of distributed domains, ie the width of the high + * level reduction tree. If p == 1, no high level reduction tree is + * used. If p == mt, this enforce the high level reduction tree to be + * performed on the full matrix. + * By default, it is recommended to set p to P if trans == + * LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid + * used to distributed the data. (p > 0) + * + * @param[in] nbthread_per_node + * Define the number of working threads per node to configure the + * adaptativ local tree to provide at least (ratio * nbthread_per_node) + * tasks per step when possible by creating the right amount of TS and + * TT kernels. + * + * @param[in] ratio + * Define the minimal number of tasks per thread that the adaptiv tree + * must provide at the lowest level of the tree. + * + ******************************************************************************* + * + * @retval -i if the ith parameters is incorrect. + * @retval 0 on success. + * + ******************************************************************************* + * + * @sa libhqr_hqr_finalize + * @sa libhqr_hqr_init + * @sa dplasma_zgeqrf_param + * @sa dplasma_cgeqrf_param + * @sa dplasma_dgeqrf_param + * @sa dplasma_sgeqrf_param + * + ******************************************************************************/ +int +libhqr_initfct_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + int low_mt, minMN, a = -1; + hqr_args_t *arg; + + if (qrtree == NULL) { + fprintf(stderr,"libhqr_svd_init, illegal value of qrtree"); + return -1; + } + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_svd_init, illegal value of trans"); + return -2; + } + if (A == NULL) { + fprintf(stderr, "libhqr_svd_init, illegal value of A"); + return -3; + } + + /* Compute parameters */ + p = libhqr_imax( p, 1 ); + + qrtree->getnbgeqrf = svd_getnbgeqrf; + qrtree->getm = svd_getm; + qrtree->geti = svd_geti; + qrtree->gettype = svd_gettype; + qrtree->currpiv = svd_currpiv; + qrtree->nextpiv = svd_nextpiv; + qrtree->prevpiv = svd_prevpiv; + + qrtree->init = LIBHQR_QRTREE_SVD; + + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; + + qrtree->a = a; + qrtree->p = p; + qrtree->args = NULL; + + arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); + arg->domino = 0; + arg->tsrr = 0; + arg->perm = NULL; + + arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + arg->hlvl = NULL; + + minMN = libhqr_imin(A->mt, A->nt); + low_mt = (qrtree->mt + p - 1) / ( p ); + + arg->llvl->minMN = minMN; + arg->llvl->ldd = low_mt; + arg->llvl->a = a; + arg->llvl->p = p; + arg->llvl->domino = 0; + + svd_low_adaptiv_init( arg->llvl, qrtree->mt, qrtree->nt, + nbthread_per_node * (A->nodes / p), ratio ); + + if ( p > 1 ) { + arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + + arg->llvl->minMN = minMN; + arg->hlvl->ldd = qrtree->mt; + arg->hlvl->a = a; + arg->hlvl->p = p; + arg->hlvl->domino = 0; + + switch( type_hlvl ) { + case LIBHQR_FLAT_TREE : + hqr_high_flat_init(arg->hlvl); + break; + case LIBHQR_GREEDY_TREE : + hqr_high_greedy_init(arg->hlvl, minMN); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_high_greedy1p_init(arg->hlvl); + break; + case LIBHQR_BINARY_TREE : + hqr_high_binary_init(arg->hlvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_high_fibonacci_init(arg->hlvl); + break; + default: + hqr_high_fibonacci_init(arg->hlvl); + } + } + + qrtree->args = (void*)arg; + + return 0; +} + +int +libhqr_initmtx_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + libhqr_tree_t qrtree_init; + int rc; + + rc = libhqr_initfct_svd( &qrtree_init, trans, A, type_hlvl, p, nbthread_per_node, ratio ); + if ( rc != 0 ) { + return rc; + } + + /* Initialize the final QR tree */ + memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); + qrtree->init = LIBHQR_QRTREE_MTX; + qrtree->gettype = rdmtx_gettype; + qrtree->currpiv = rdmtx_currpiv; + qrtree->nextpiv = rdmtx_nextpiv; + qrtree->prevpiv = rdmtx_prevpiv; + qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tile_info_t) ); + + /* Initialize the matrix */ + libhqr_matrix_init(qrtree, &qrtree_init); + + /* Free the initial qrtree */ + libhqr_finalize( &qrtree_init ); + + return 0; +} + +int +libhqr_init_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + return libhqr_initmtx_svd( qrtree, trans, A, type_hlvl, p, nbthread_per_node, ratio ); +} diff --git a/src/libhqr_systolic.c b/src/systolic.c similarity index 100% rename from src/libhqr_systolic.c rename to src/systolic.c