From 3da14c968fc3e297c329ae1c866df2cff8ddccdf Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 27 Jul 2017 11:47:26 +0200 Subject: [PATCH] Add low level trees functions inseparated files --- src/low_binary.c | 109 ++++++++++++++++++++++ src/low_fibonacci.c | 91 ++++++++++++++++++ src/low_flat.c | 81 ++++++++++++++++ src/low_greedy.c | 221 ++++++++++++++++++++++++++++++++++++++++++++ src/low_greedy1p.c | 179 +++++++++++++++++++++++++++++++++++ 5 files changed, 681 insertions(+) create mode 100644 src/low_binary.c create mode 100644 src/low_fibonacci.c create mode 100644 src/low_flat.c create mode 100644 src/low_greedy.c create mode 100644 src/low_greedy1p.c diff --git a/src/low_binary.c b/src/low_binary.c new file mode 100644 index 0000000..0b05a8f --- /dev/null +++ b/src/low_binary.c @@ -0,0 +1,109 @@ +/** + * + * @file low_binary.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level binary tree + * + */ +#include "libhqr_internal.h" +#include <math.h> + +static inline int +hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a; + int m_pa = (m / arg->p ) / arg->a; + + int tmp1 = m_pa - k_a; + int tmp2 = 1; + (void)arg; + + if ( tmp1 == 0) + return 0; + while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { + tmp1 = tmp1 >> 1; + tmp2 = tmp2 << 1; + } + assert( m_pa - tmp2 >= k_a ); + return m_pa - tmp2; +} + +static inline int +hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + + int tmpp, bit; + myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) ); + + if ( start_pa <= p_pa ) + return arg->ldd; + + int offset = p_pa-k_a; + bit = 0; + if (start_pa != arg->ldd) { + while( ( (start_pa-k_a) & (1 << bit ) ) == 0 ) + bit++; + bit++; + } + + tmpp = offset | (1 << bit); + if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) ) + return tmpp + k_a; + else + return arg->ldd; +} + +static inline int +hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + int offset = p_pa - k_a; + + myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino || + hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) ); + + if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) { + int i, bit, tmp; + if ((p_pa - k_a) == 0) + bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) ); + else { + bit = 0; + while( (offset & (1 << bit )) == 0 ) + bit++; + } + for( i=bit; i>-1; i--){ + tmp = offset | (1 << i); + if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) ) + return tmp+k_a; + } + return arg->ldd; + } + + if ( (start_pa - p_pa) > 1 ) + return p_pa + ( (start_pa - p_pa) >> 1 ); + else { + return arg->ldd; + } +} + +void +hqr_low_binary_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_low_binary_currpiv; + arg->nextpiv = hqr_low_binary_nextpiv; + arg->prevpiv = hqr_low_binary_prevpiv; + arg->ipiv = NULL; +} diff --git a/src/low_fibonacci.c b/src/low_fibonacci.c new file mode 100644 index 0000000..da6a6b5 --- /dev/null +++ b/src/low_fibonacci.c @@ -0,0 +1,91 @@ +/** + * + * @file low_fibonacci.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level fibonacci tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a; + return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = (p / qrpiv->p ) / qrpiv->a; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = (p / qrpiv->p ) / qrpiv->a; + + for( i=start_pa-1; i>k_a; i-- ) + if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) + return i; + return (qrpiv->ldd); +} + +void +hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt; + + arg->currpiv = hqr_low_fibonacci_currpiv; + arg->nextpiv = hqr_low_fibonacci_nextpiv; + arg->prevpiv = hqr_low_fibonacci_prevpiv; + + mt = arg->ldd; + + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + /* + * Fibonacci of order 1: + * u_(n+1) = u_(n) + 1 + */ + { + int f1, k, m; + + /* Fill in the first column */ + f1 = 1; + for (m=1; m < mt; ) { + for (k=0; (k < f1) && (m < mt); k++, m++) { + ipiv[m] = m - f1; + } + f1++; + } + + for( k=1; k<minMN; k++) { + for(m=k+1; m < mt; m++) { + ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1; + } + } + } +} diff --git a/src/low_flat.c b/src/low_flat.c new file mode 100644 index 0000000..09b6987 --- /dev/null +++ b/src/low_flat.c @@ -0,0 +1,81 @@ +/** + * + * @file low_flat.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level flat tree + * + */ +#include "libhqr_internal.h" + +static inline int +hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + (void)m; + if ( arg->domino ) + return k / arg->a; + else + return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ; +} + +static inline int +hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + +#ifdef FLAT_UP + if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) ) + return start_pa-1; + else +#else /* FLAT_DOWN */ + if ( start_pa <= p_pa ) + return arg->ldd; + + if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { + if ( start_pa == arg->ldd ) + return p_pa+1; + else if ( start_pa < arg->ldd ) + return start_pa+1; + } +#endif + return arg->ldd; +} + +static inline int +hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + +#ifdef FLAT_UP + if ( p_pa == k_a && (start_pa+1 < arg->ldd) ) + return start_pa+1; + else +#else + if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { + if ( start_pa == p_pa ) + return arg->ldd - 1; + else if ( start_pa > p_pa + 1 ) + return start_pa-1; + } +#endif + return arg->ldd; +} + +void hqr_low_flat_init(hqr_subpiv_t *arg){ + arg->currpiv = hqr_low_flat_currpiv; + arg->nextpiv = hqr_low_flat_nextpiv; + arg->prevpiv = hqr_low_flat_prevpiv; + arg->ipiv = NULL; +} diff --git a/src/low_greedy.c b/src/low_greedy.c new file mode 100644 index 0000000..497410b --- /dev/null +++ b/src/low_greedy.c @@ -0,0 +1,221 @@ +/** + * + * @file low_greedy.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level greedy tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> +#include <string.h> + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + if (qrpiv->domino) + return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; + else + return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) + + ( ( m / qrpiv->p ) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int p_pa = p / qrpiv->p / qrpiv->a; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + return i; + } + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int pa = qrpiv->p * qrpiv->a; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = p / pa; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + + return (qrpiv->ldd); +} + +void +hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, a, p, pa, domino; + + arg->currpiv = hqr_low_greedy_currpiv; + arg->nextpiv = hqr_low_greedy_nextpiv; + arg->prevpiv = hqr_low_greedy_prevpiv; + + mt = arg->ldd; + a = arg->a; + p = arg->p; + pa = p * a; + domino = arg->domino; + + if ( domino ) + { + int j, k, height, start, end, firstk = 0; + int *nT, *nZ; + + arg->minMN = libhqr_imin( minMN, mt*a ); + minMN = arg->minMN; + + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + nT = (int*)calloc(minMN, sizeof(int)); + nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + + k = 0; + while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - ( firstk / a ) ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + if ( (( firstk % a) != a-1 ) + && ( firstk < minMN-1 ) ) + nT[firstk+1]++; + firstk++; + } + k = firstk; + continue; + } + + if (k < minMN-1) nT[k+1] += height; + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + + for( j=start; j > end; j-- ) { + ipiv[ k*mt + j ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } + else + { + int j, k, myrank, height, start, end, firstk = 0; + int *nT2DO = (int*)malloc(minMN*sizeof(int)); + int *nT = (int*)malloc(minMN*sizeof(int)); + int *nZ = (int*)malloc(minMN*sizeof(int)); + + arg->ipiv = (int*)calloc( mt * minMN * p, sizeof(int) ); + ipiv = arg->ipiv; + + for ( myrank=0; myrank<p; myrank++ ) { + int lminMN = minMN; + + memset( nT2DO, 0, minMN*sizeof(int)); + memset( nT, 0, minMN*sizeof(int)); + memset( nZ, 0, minMN*sizeof(int)); + + nT[0] = mt; + + for(k=0; k<lminMN; k++) { + nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + if ( nT2DO[k] == 0 ) { + lminMN = k; + break; + } + } + + k = 0; firstk = 0; + while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) && + ( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) ) + && ( firstk < lminMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < lminMN) && + ( nT[firstk] == nT2DO[firstk] ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + if ( ( firstk < lminMN-1 ) && + (( (firstk) % pa) != ((a-1)*p+myrank) ) ) + nT[firstk+1]++; + firstk++; + } + k = firstk; + continue; + } + + if (k < lminMN-1) nT[k+1] += height; + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + + for( j=start; j > end; j-- ) { + ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); + } + + k++; + if (k > lminMN-1) k = firstk; + } + } + + free(nT2DO); + free(nT); + free(nZ); + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*mt + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += mt*minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k*mt + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} diff --git a/src/low_greedy1p.c b/src/low_greedy1p.c new file mode 100644 index 0000000..7d6fa5b --- /dev/null +++ b/src/low_greedy1p.c @@ -0,0 +1,179 @@ +/** + * + * @file low_greedy1p.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level duplicated greedy tree + * + */ +#include "libhqr_internal.h" + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + if (qrpiv->domino) + return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; + else + return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) + + ( ( m / qrpiv->p ) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int p_pa = p / qrpiv->p / qrpiv->a; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int pa = qrpiv->p * qrpiv->a; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = p / pa; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + + return (qrpiv->ldd); +} + +void +hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, a, p, pa, domino; + int j, k, height, start, end, nT, nZ; + + arg->currpiv = hqr_low_greedy1p_currpiv; + arg->nextpiv = hqr_low_greedy1p_nextpiv; + arg->prevpiv = hqr_low_greedy1p_prevpiv; + + mt = arg->ldd; + a = arg->a; + p = arg->p; + pa = p * a; + domino = arg->domino; + + /* This section has not been coded yet, and will perform a classic greedy */ + if ( domino ) + { + arg->minMN = libhqr_imin( minMN, mt*a ); + minMN = arg->minMN; + + arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); + ipiv = arg->ipiv; + memset(ipiv, 0, mt*minMN*sizeof(int)); + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++) { + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - (k / a), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ k*mt + j ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + else + { + int myrank; + end = 0; + + arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); + ipiv = arg->ipiv; + + memset( ipiv, 0, minMN*sizeof(int)*mt*p); + + for ( myrank=0; myrank<p; myrank++ ) { + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++) { + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + /* No more computations on this node */ + if ( nT == 0 ) { + break; + } + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*mt + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += mt*minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k*mt + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} -- GitLab