From 3da14c968fc3e297c329ae1c866df2cff8ddccdf Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Thu, 27 Jul 2017 11:47:26 +0200
Subject: [PATCH] Add low level trees functions inseparated files

---
 src/low_binary.c    | 109 ++++++++++++++++++++++
 src/low_fibonacci.c |  91 ++++++++++++++++++
 src/low_flat.c      |  81 ++++++++++++++++
 src/low_greedy.c    | 221 ++++++++++++++++++++++++++++++++++++++++++++
 src/low_greedy1p.c  | 179 +++++++++++++++++++++++++++++++++++
 5 files changed, 681 insertions(+)
 create mode 100644 src/low_binary.c
 create mode 100644 src/low_fibonacci.c
 create mode 100644 src/low_flat.c
 create mode 100644 src/low_greedy.c
 create mode 100644 src/low_greedy1p.c

diff --git a/src/low_binary.c b/src/low_binary.c
new file mode 100644
index 0000000..0b05a8f
--- /dev/null
+++ b/src/low_binary.c
@@ -0,0 +1,109 @@
+/**
+ *
+ * @file low_binary.c
+ *
+ * @copyright 2010-2017 The University of Tennessee and The University
+ *                      of Tennessee Research Foundation.  All rights
+ *                      reserved.
+ * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Raphael Boucherie
+ * @author Mathieu Faverge
+ * @date 2017-03-21
+ *
+ * Functions for low level binary tree
+ *
+ */
+#include "libhqr_internal.h"
+#include <math.h>
+
+static inline int
+hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m)
+{
+    int k_a = arg->domino ? k / arg->a :  (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a;
+    int m_pa = (m / arg->p ) / arg->a;
+
+    int tmp1 = m_pa - k_a;
+    int tmp2 = 1;
+    (void)arg;
+
+    if ( tmp1 == 0)
+        return 0;
+    while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) {
+        tmp1 = tmp1 >> 1;
+        tmp2 = tmp2 << 1;
+    }
+    assert( m_pa - tmp2 >= k_a );
+    return m_pa - tmp2;
+}
+
+static inline int
+hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
+{
+    int k_a = arg->domino ? k / arg->a :  (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
+    int p_pa = (p / arg->p ) / arg->a;
+
+    int tmpp, bit;
+    myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) );
+
+    if ( start_pa <= p_pa )
+        return arg->ldd;
+
+    int offset = p_pa-k_a;
+    bit = 0;
+    if (start_pa != arg->ldd) {
+        while( ( (start_pa-k_a) & (1 << bit ) ) == 0 )
+            bit++;
+        bit++;
+    }
+
+    tmpp = offset | (1 << bit);
+    if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) )
+        return tmpp + k_a;
+    else
+        return arg->ldd;
+}
+
+static inline int
+hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
+{
+    int k_a = arg->domino ? k / arg->a :  (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
+    int p_pa = (p / arg->p ) / arg->a;
+    int offset = p_pa - k_a;
+
+    myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino ||
+                                    hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) );
+
+    if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) {
+        int i, bit, tmp;
+        if ((p_pa - k_a) == 0)
+            bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) );
+        else {
+            bit = 0;
+            while( (offset & (1 << bit )) == 0 )
+                bit++;
+        }
+        for( i=bit; i>-1; i--){
+            tmp = offset | (1 << i);
+            if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) )
+                return tmp+k_a;
+        }
+        return arg->ldd;
+    }
+
+    if ( (start_pa - p_pa) > 1 )
+        return p_pa + ( (start_pa - p_pa) >> 1 );
+    else {
+        return arg->ldd;
+    }
+}
+
+void
+hqr_low_binary_init(hqr_subpiv_t *arg) {
+    arg->currpiv = hqr_low_binary_currpiv;
+    arg->nextpiv = hqr_low_binary_nextpiv;
+    arg->prevpiv = hqr_low_binary_prevpiv;
+    arg->ipiv = NULL;
+}
diff --git a/src/low_fibonacci.c b/src/low_fibonacci.c
new file mode 100644
index 0000000..da6a6b5
--- /dev/null
+++ b/src/low_fibonacci.c
@@ -0,0 +1,91 @@
+/**
+ *
+ * @file low_fibonacci.c
+ *
+ * @copyright 2010-2017 The University of Tennessee and The University
+ *                      of Tennessee Research Foundation.  All rights
+ *                      reserved.
+ * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Raphael Boucherie
+ * @author Mathieu Faverge
+ * @date 2017-03-21
+ *
+ * Functions for low level fibonacci tree
+ *
+ */
+#include "libhqr_internal.h"
+#include <stdlib.h>
+
+/* Return the pivot to use for the row m at step k */
+static inline int
+hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
+    int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a;
+    return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
+}
+
+/* Return the last row which has used the row m as a pivot in step k before the row start */
+static inline int
+hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int k_a = qrpiv->domino ? k / qrpiv->a :  (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
+    int p_pa = (p / qrpiv->p ) / qrpiv->a;
+
+    for( i=start_pa+1; i<(qrpiv->ldd); i++ )
+        if ( (qrpiv->ipiv)[i +  k_a * (qrpiv->ldd)] == p_pa )
+            return i;
+    return i;
+}
+
+/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
+static inline int
+hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int k_a = qrpiv->domino ? k / qrpiv->a :  (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
+    int p_pa = (p / qrpiv->p ) / qrpiv->a;
+
+    for( i=start_pa-1; i>k_a; i-- )
+        if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa )
+            return i;
+    return (qrpiv->ldd);
+}
+
+void
+hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN) {
+    int *ipiv;
+    int mt;
+
+    arg->currpiv = hqr_low_fibonacci_currpiv;
+    arg->nextpiv = hqr_low_fibonacci_nextpiv;
+    arg->prevpiv = hqr_low_fibonacci_prevpiv;
+
+    mt = arg->ldd;
+
+    arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) );
+    ipiv = arg->ipiv;
+
+    /*
+     * Fibonacci of order 1:
+     *    u_(n+1) = u_(n) + 1
+     */
+    {
+        int f1, k, m;
+
+        /* Fill in the first column */
+        f1 = 1;
+        for (m=1; m < mt; ) {
+            for (k=0; (k < f1) && (m < mt); k++, m++) {
+                ipiv[m] = m - f1;
+            }
+            f1++;
+        }
+
+        for( k=1; k<minMN; k++) {
+            for(m=k+1; m < mt; m++) {
+                ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1;
+            }
+        }
+    }
+}
diff --git a/src/low_flat.c b/src/low_flat.c
new file mode 100644
index 0000000..09b6987
--- /dev/null
+++ b/src/low_flat.c
@@ -0,0 +1,81 @@
+/**
+ *
+ * @file low_flat.c
+ *
+ * @copyright 2010-2017 The University of Tennessee and The University
+ *                      of Tennessee Research Foundation.  All rights
+ *                      reserved.
+ * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Raphael Boucherie
+ * @author Mathieu Faverge
+ * @date 2017-03-21
+ *
+ * Functions for low level flat tree
+ *
+ */
+#include "libhqr_internal.h"
+
+static inline int
+hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m)
+{
+    (void)m;
+    if ( arg->domino )
+        return k / arg->a;
+    else
+        return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ;
+}
+
+static inline int
+hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
+{
+    int k_a = arg->domino ? k / arg->a :  (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
+    int p_pa = (p / arg->p ) / arg->a;
+
+#ifdef FLAT_UP
+    if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) )
+        return start_pa-1;
+    else
+#else /* FLAT_DOWN */
+        if ( start_pa <= p_pa )
+            return arg->ldd;
+
+    if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) {
+        if ( start_pa == arg->ldd )
+            return p_pa+1;
+        else if ( start_pa < arg->ldd )
+            return start_pa+1;
+    }
+#endif
+    return arg->ldd;
+}
+
+static inline int
+hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
+{
+    int k_a = arg->domino ? k / arg->a :  (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
+    int p_pa = (p / arg->p ) / arg->a;
+
+#ifdef FLAT_UP
+    if ( p_pa == k_a && (start_pa+1 < arg->ldd) )
+        return start_pa+1;
+    else
+#else
+        if ( p_pa == k_a && ( arg->ldd - k_a ) > 1  ) {
+            if ( start_pa == p_pa )
+                return arg->ldd - 1;
+            else if ( start_pa > p_pa + 1 )
+                return start_pa-1;
+        }
+#endif
+    return arg->ldd;
+}
+
+void hqr_low_flat_init(hqr_subpiv_t *arg){
+    arg->currpiv = hqr_low_flat_currpiv;
+    arg->nextpiv = hqr_low_flat_nextpiv;
+    arg->prevpiv = hqr_low_flat_prevpiv;
+    arg->ipiv = NULL;
+}
diff --git a/src/low_greedy.c b/src/low_greedy.c
new file mode 100644
index 0000000..497410b
--- /dev/null
+++ b/src/low_greedy.c
@@ -0,0 +1,221 @@
+/**
+ *
+ * @file low_greedy.c
+ *
+ * @copyright 2010-2017 The University of Tennessee and The University
+ *                      of Tennessee Research Foundation.  All rights
+ *                      reserved.
+ * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Raphael Boucherie
+ * @author Mathieu Faverge
+ * @date 2017-03-21
+ *
+ * Functions for low level greedy tree
+ *
+ */
+#include "libhqr_internal.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Return the pivot to use for the row m at step k */
+static inline int
+hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
+    if (qrpiv->domino)
+        return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
+    else
+        return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd)
+                              + ( ( m  / qrpiv->p ) / qrpiv->a ) ];
+}
+
+/* Return the last row which has used the row m as a pivot in step k before the row start */
+static inline int
+hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int p_pa = p / qrpiv->p / qrpiv->a;
+    int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
+
+    for( i=start_pa+1; i<(qrpiv->ldd); i++ )
+        if ( ipiv[i +  k * (qrpiv->ldd)] == p_pa )
+            return i;
+    return i;
+ }
+
+/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
+static inline int
+hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int pa = qrpiv->p * qrpiv->a;
+    int k_a = qrpiv->domino ? k / qrpiv->a :  (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
+    int p_pa = p / pa;
+    int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
+
+    for( i=start_pa-1; i> k_a; i-- )
+        if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
+            return i;
+
+    return (qrpiv->ldd);
+}
+
+void
+hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN) {
+    int *ipiv;
+    int mt, a, p, pa, domino;
+
+    arg->currpiv = hqr_low_greedy_currpiv;
+    arg->nextpiv = hqr_low_greedy_nextpiv;
+    arg->prevpiv = hqr_low_greedy_prevpiv;
+
+    mt = arg->ldd;
+    a = arg->a;
+    p = arg->p;
+    pa = p * a;
+    domino = arg->domino;
+
+    if ( domino )
+    {
+        int j, k, height, start, end, firstk = 0;
+        int *nT, *nZ;
+
+        arg->minMN =  libhqr_imin( minMN, mt*a );
+        minMN = arg->minMN;
+
+        arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) );
+        ipiv = arg->ipiv;
+
+        nT = (int*)calloc(minMN, sizeof(int));
+        nZ = (int*)calloc(minMN, sizeof(int));
+
+        nT[0] = mt;
+
+        k = 0;
+        while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) &&
+                     ( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
+                && ( firstk < minMN ) ) {
+            height = (nT[k] - nZ[k]) / 2;
+            if ( height == 0 ) {
+                while ( (firstk < minMN) &&
+                        ( nT[firstk] == mt - ( firstk / a ) ) &&
+                        ( nZ[firstk]+1 == nT[firstk] ) ) {
+                    if (  (( firstk % a) != a-1 )
+                          && ( firstk < minMN-1 ) )
+                        nT[firstk+1]++;
+                    firstk++;
+                }
+                k = firstk;
+                continue;
+            }
+
+            if (k < minMN-1) nT[k+1] += height;
+            start = mt - nZ[k] - 1;
+            end = start - height;
+            nZ[k] += height;
+
+            for( j=start; j > end; j-- ) {
+                ipiv[ k*mt + j ] = (j - height);
+            }
+
+            k++;
+            if (k > minMN-1) k = firstk;
+        }
+
+        free(nT);
+        free(nZ);
+    }
+    else
+    {
+        int j, k, myrank, height, start, end, firstk = 0;
+        int *nT2DO = (int*)malloc(minMN*sizeof(int));
+        int *nT = (int*)malloc(minMN*sizeof(int));
+        int *nZ = (int*)malloc(minMN*sizeof(int));
+
+        arg->ipiv = (int*)calloc( mt * minMN * p, sizeof(int) );
+        ipiv = arg->ipiv;
+
+        for ( myrank=0; myrank<p; myrank++ ) {
+            int lminMN = minMN;
+
+            memset( nT2DO, 0, minMN*sizeof(int));
+            memset( nT,    0, minMN*sizeof(int));
+            memset( nZ,    0, minMN*sizeof(int));
+
+            nT[0] = mt;
+
+            for(k=0; k<lminMN; k++) {
+                nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
+                if ( nT2DO[k] == 0 ) {
+                    lminMN = k;
+                    break;
+                }
+            }
+
+            k = 0; firstk = 0;
+            while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) &&
+                        ( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) )
+                    && ( firstk < lminMN ) ) {
+                height = (nT[k] - nZ[k]) / 2;
+                if ( height == 0 ) {
+                    while ( (firstk < lminMN) &&
+                            ( nT[firstk] == nT2DO[firstk] ) &&
+                            ( nZ[firstk]+1 == nT[firstk] ) ) {
+                        if (  ( firstk < lminMN-1 )  &&
+                              (( (firstk) % pa) != ((a-1)*p+myrank) ) )
+                            nT[firstk+1]++;
+                        firstk++;
+                    }
+                    k = firstk;
+                    continue;
+                }
+
+                if (k < lminMN-1) nT[k+1] += height;
+                start = mt - nZ[k] - 1;
+                end = start - height;
+                nZ[k] += height;
+
+                for( j=start; j > end; j-- ) {
+                    ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height);
+                }
+
+                k++;
+                if (k > lminMN-1) k = firstk;
+            }
+        }
+
+        free(nT2DO);
+        free(nT);
+        free(nZ);
+    }
+
+#if 0
+    {
+        int m, k;
+        for(m=0; m<mt; m++) {
+            printf("%3d | ", m);
+            for (k=0; k<minMN; k++) {
+                printf( "%3d ", ipiv[k*mt + m] );
+            }
+            printf("\n");
+        }
+    }
+    if (!arg->domino) {
+        int m, k, myrank;
+        for ( myrank=1; myrank<p; myrank++ ) {
+            ipiv += mt*minMN;
+            printf("-------- rank %d ---------\n", myrank );
+            for(m=0; m<mt; m++) {
+                printf("%3d | ", m);
+                for (k=0; k<minMN; k++) {
+                  int k_a = (k + p - 1 - myrank) / p / a;
+                  if ( m >= k_a )
+                    printf( "%3d ", ipiv[k*mt + m] );
+                  else
+                    printf( "--- " );
+                }
+                printf("\n");
+            }
+        }
+    }
+#endif
+}
diff --git a/src/low_greedy1p.c b/src/low_greedy1p.c
new file mode 100644
index 0000000..7d6fa5b
--- /dev/null
+++ b/src/low_greedy1p.c
@@ -0,0 +1,179 @@
+/**
+ *
+ * @file low_greedy1p.c
+ *
+ * @copyright 2010-2017 The University of Tennessee and The University
+ *                      of Tennessee Research Foundation.  All rights
+ *                      reserved.
+ * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @version 1.0.0
+ * @author Raphael Boucherie
+ * @author Mathieu Faverge
+ * @date 2017-03-21
+ *
+ * Functions for low level duplicated greedy tree
+ *
+ */
+#include "libhqr_internal.h"
+
+/* Return the pivot to use for the row m at step k */
+static inline int
+hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
+    if (qrpiv->domino)
+        return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
+    else
+        return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd)
+                              + ( ( m  / qrpiv->p ) / qrpiv->a ) ];
+}
+
+/* Return the last row which has used the row m as a pivot in step k before the row start */
+static inline int
+hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int p_pa = p / qrpiv->p / qrpiv->a;
+    int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
+
+    for( i=start_pa+1; i<(qrpiv->ldd); i++ )
+        if ( ipiv[i +  k * (qrpiv->ldd)] == p_pa )
+            return i;
+    return i;
+}
+
+/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
+static inline int
+hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
+    int i;
+    int pa = qrpiv->p * qrpiv->a;
+    int k_a = qrpiv->domino ? k / qrpiv->a :  (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
+    int p_pa = p / pa;
+    int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
+
+    for( i=start_pa-1; i> k_a; i-- )
+        if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
+            return i;
+
+    return (qrpiv->ldd);
+}
+
+void
+hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN) {
+    int *ipiv;
+    int mt, a, p, pa, domino;
+    int j, k, height, start, end, nT, nZ;
+
+    arg->currpiv = hqr_low_greedy1p_currpiv;
+    arg->nextpiv = hqr_low_greedy1p_nextpiv;
+    arg->prevpiv = hqr_low_greedy1p_prevpiv;
+
+    mt = arg->ldd;
+    a = arg->a;
+    p = arg->p;
+    pa = p * a;
+    domino = arg->domino;
+
+    /* This section has not been coded yet, and will perform a classic greedy */
+    if ( domino )
+    {
+        arg->minMN =  libhqr_imin( minMN, mt*a );
+        minMN = arg->minMN;
+
+        arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) );
+        ipiv = arg->ipiv;
+        memset(ipiv, 0, mt*minMN*sizeof(int));
+
+        /**
+         * Compute the local greedy tree of each column, on each node
+         */
+        for(k=0; k<minMN; k++) {
+            /* Number of tiles to factorized in this column on this rank */
+            nT = libhqr_imax( mt - (k / a), 0 );
+            /* Number of tiles already killed */
+            nZ = 0;
+
+            while( nZ < (nT-1) ) {
+                height = (nT - nZ) / 2;
+                start = mt - nZ - 1;
+                end = start - height;
+                nZ += height;
+
+                for( j=start; j > end; j-- ) {
+                    ipiv[ k*mt + j ] = (j - height);
+                }
+            }
+            assert( nZ+1 == nT );
+        }
+    }
+    else
+    {
+        int myrank;
+        end = 0;
+
+        arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p );
+        ipiv = arg->ipiv;
+
+        memset( ipiv,  0, minMN*sizeof(int)*mt*p);
+
+        for ( myrank=0; myrank<p; myrank++ ) {
+
+            /**
+             * Compute the local greedy tree of each column, on each node
+             */
+            for(k=0; k<minMN; k++) {
+                /* Number of tiles to factorized in this column on this rank */
+                nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
+                /* Number of tiles already killed */
+                nZ = 0;
+
+                /* No more computations on this node */
+                if ( nT == 0 ) {
+                    break;
+                }
+
+                while( nZ < (nT-1) ) {
+                    height = (nT - nZ) / 2;
+                    start = mt - nZ - 1;
+                    end = start - height;
+                    nZ += height;
+
+                    for( j=start; j > end; j-- ) {
+                        ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height);
+                    }
+                }
+                assert( nZ+1 == nT );
+            }
+        }
+    }
+
+#if 0
+    {
+        int m, k;
+        for(m=0; m<mt; m++) {
+            printf("%3d | ", m);
+            for (k=0; k<minMN; k++) {
+                printf( "%3d ", ipiv[k*mt + m] );
+            }
+            printf("\n");
+        }
+    }
+    if (!arg->domino) {
+        int m, k, myrank;
+        for ( myrank=1; myrank<p; myrank++ ) {
+            ipiv += mt*minMN;
+            printf("-------- rank %d ---------\n", myrank );
+            for(m=0; m<mt; m++) {
+                printf("%3d | ", m);
+                for (k=0; k<minMN; k++) {
+                    int k_a = (k + p - 1 - myrank) / p / a;
+                    if ( m >= k_a )
+                        printf( "%3d ", ipiv[k*mt + m] );
+                    else
+                        printf( "--- " );
+                }
+                printf("\n");
+            }
+        }
+    }
+#endif
+}
-- 
GitLab