Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 80d67e9b authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

A large bunch of cleanup

parent 3da14c96
No related branches found
No related tags found
1 merge request!11Cleanup
/**
*
* @file libhqr.h
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
*/
#ifndef _LIBHQR_INTERNAL_H_
#define _LIBHQR_INTERNAL_H_
#include "libhqr.h"
#include <stdio.h>
#include <assert.h>
#define PRINT_PIVGEN 0
#ifdef PRINT_PIVGEN
#define myassert( test ) {if ( ! (test) ) return -1;}
#else
#define myassert(test) {assert((test)); return -1;}
#endif
typedef enum qrtree_type_ {
LIBHQR_QRTREE_UNSET = 0,
LIBHQR_QRTREE_HQR,
LIBHQR_QRTREE_SVD,
LIBHQR_QRTREE_SYS,
LIBHQR_QRTREE_MTX,
} qrtree_type_e;
struct hqr_args_s;
typedef struct hqr_args_s hqr_args_t;
struct hqr_subpiv_s;
typedef struct hqr_subpiv_s hqr_subpiv_t;
/**
* @brief jhj
*/
struct hqr_args_s {
int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */
int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */
hqr_subpiv_t *llvl;
hqr_subpiv_t *hlvl;
int *perm;
};
struct hqr_subpiv_s {
/**
* currpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] m line you want to eliminate
*
* @return the annihilator p used with m at step k
*/
int (*currpiv)(const hqr_subpiv_t *arg, int k, int m);
/*
* nextpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = MT to find the first time p is used as an annihilator during step k
*
* @return the next line m' using the line p as annihilator during step k
* mt if p will never be used again as an annihilator.
*/
int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
/*
* prevpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = p to find the last time p has been used as an annihilator during step k
*
* @return the previous line m' using the line p as annihilator during step k
* mt if p has never been used before that as an annihilator.
*/
int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
int *ipiv;
int minMN;
int ldd;
int a;
int p;
int domino;
};
/**
* @brief Minimal structure to store the information related to each tile.
*/
typedef struct libhqr_tile_info_s {
int type; /**< The type of the reduction applied to the tile (@sa libhqr_type_e) */
int currpiv; /**< The row index of the pivot for this tile */
int nextpiv; /**< The next tile for which the currpiv is a pivot */
int prevpiv; /**< The previous tile for which currpiv was a pivot */
int first_nextpiv; /**< If type != 0, the first tile for which this tile is a pivot */
int first_prevpiv; /**< If type != 0, the last tile for which this tile is a pivot */
} libhqr_tile_info_t;
static inline int libhqr_imin(int a, int b){
return (a > b) ? b : a;
}
static inline int libhqr_imax(int a, int b){
return (a > b) ? a : b;
}
static inline int libhqr_iceil(int a, int b){
return (a + b - 1) / b;
}
/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */
#define nbextra1_formula (( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0)
/*
* Common functions
*/
/* int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); */
/* int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); */
/* int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); */
/* int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); */
/*
* Subtree for low-level
*/
void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init);
int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
void libhqr_matrix_finalize(libhqr_tree_t *qrtree);
/*
* function for drawing the tree
*/
void draw_rectangle(int k, int p, int m, int step_m, FILE *file);
void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file);
void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file);
void draw_vertical_line( int k, int p, int m, int step_m, FILE *file);
void hqr_low_flat_init ( hqr_subpiv_t *arg );
void hqr_low_binary_init ( hqr_subpiv_t *arg );
void hqr_low_fibonacci_init( hqr_subpiv_t *arg, int minMN );
void hqr_low_greedy_init ( hqr_subpiv_t *arg, int minMN );
void hqr_low_greedy1p_init ( hqr_subpiv_t *arg, int minMN );
void svd_low_adaptiv_init ( hqr_subpiv_t *arg, int gmt, int gnt, int nbcores, int ratio );
void hqr_high_flat_init ( hqr_subpiv_t *arg );
void hqr_high_binary_init ( hqr_subpiv_t *arg );
void hqr_high_fibonacci_init( hqr_subpiv_t *arg );
void hqr_high_greedy_init ( hqr_subpiv_t *arg, int minMN );
void hqr_high_greedy1p_init ( hqr_subpiv_t *arg );
#endif /* _LIBHQR_INTERNAL_H_ */
File moved
/**
*
* @file gendot.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
*/
#include "libhqr_internal.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define DAG_HEADER "digraph G { orientation=portrait; \n"
#define DAG_FOOTER "} // close graph\n"
#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n"
#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n"
#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n"
#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n"
#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n"
char *color[] = {
"red",
"blue",
"green",
"orange",
"cyan",
"purple",
"yellow",
};
#define DAG_NBCOLORS 7
void
libhqr_print_dot( const libhqr_tree_t *qrtree,
const char *filename )
{
int *pos, *next, *done;
int k, m, n, lpos, prev, length;
int minMN = libhqr_imin( qrtree->mt, qrtree->nt );
FILE *f = fopen( filename, "w" );
done = (int*)malloc( qrtree->mt * sizeof(int) );
pos = (int*)malloc( qrtree->mt * sizeof(int) );
next = (int*)malloc( qrtree->mt * sizeof(int) );
memset(pos, 0, qrtree->mt * sizeof(int) );
memset(next, 0, qrtree->mt * sizeof(int) );
/* Print header */
fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/
for(m=0; m < qrtree->mt; m++) {
fprintf(f, DAG_LABELNODE, m, m, m);
}
for(k=0; k<minMN; k++ ) {
int nb2reduce = qrtree->mt - k - 1;
for(m=k; m < qrtree->mt; m++) {
fprintf(f, DAG_STARTNODE, m, qrtree->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, qrtree->mt);
}
while( nb2reduce > 0 ) {
memset(done, 0, qrtree->mt * sizeof(int) );
for(m=qrtree->mt-1; m > (k-1); m--) {
n = next[m];
if ( next[n] != qrtree->mt )
continue;
if ( n != qrtree->mt ) {
lpos = libhqr_imax( pos[m], pos[n] );
lpos++;
pos[m] = lpos;
pos[n] = lpos;
fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, m, n );
fprintf(f, DAG_EDGE_PIV,
m, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, n, n );
if ( qrtree->gettype(qrtree, k, n) == 0 )
fprintf(f, DAG_EDGE_TS,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
else
fprintf(f, DAG_EDGE_TT,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, n);
done[m] = done[n] = 1;
nb2reduce--;
}
}
}
}
length = 0;
for(m=0; m < qrtree->mt; m++) {
length = libhqr_imax(length, pos[m]);
}
length++;
for(k=0; k<length; k++)
fprintf(f, DAG_LENGTHNODE, k, k, k);
fprintf(f, DAG_FOOTER);
printf("Tic Max = %d\n", length-1);
fclose( f );
free(pos);
free(next);
}
File moved
/**
*
* @file high_binary.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level binary tree
*
*/
#include "libhqr_internal.h"
#include <math.h>
static int
hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
int tmp1 = m - k;
int tmp2 = 1;
(void)arg;
if ( tmp1 == 0)
return 0;
while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) {
tmp1 = tmp1 >> 1;
tmp2 = tmp2 << 1;
}
assert( m - tmp2 >= k );
return m - tmp2;
}
static int
hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int tmpp, bit;
myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) );
if ( start <= p )
return arg->ldd;
int offset = p - k;
bit = 0;
if (start != arg->ldd) {
while( ( (start - k) & (1 << bit ) ) == 0 )
bit++;
bit++;
}
tmpp = offset | (1 << bit);
if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) )
return tmpp + k;
else
return arg->ldd;
}
static int
hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int offset = p - k;
myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) );
if ( (start == p) && ( offset%2 == 0 ) ) {
int i, bit, tmp;
if ( offset == 0 )
bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) );
else {
bit = 0;
while( (offset & (1 << bit )) == 0 )
bit++;
}
for( i=bit; i>-1; i--){
tmp = offset | (1 << i);
if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) )
return tmp+k;
}
return arg->ldd;
}
if ( (start - p) > 1 )
return p + ( (start - p) >> 1 );
else {
return arg->ldd;
}
}
void
hqr_high_binary_init(hqr_subpiv_t *arg) {
arg->currpiv = hqr_high_binary_currpiv;
arg->nextpiv = hqr_high_binary_nextpiv;
arg->prevpiv = hqr_high_binary_prevpiv;
arg->ipiv = NULL;
}
/**
*
* @file high_fibonacci.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level fibonacci tree, and init for duplicated greedy.
*
*/
#include "libhqr_internal.h"
#include <stdlib.h>
/****************************************************
* HQR_HIGH_FIBONACCI_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
static inline int
hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
return (qrpiv->ipiv)[ m-k ] + k;
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
static inline int
hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p >= k && start >= p && start-k <= qrpiv->p);
int lp = p - k;
int lstart= start - k;
int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p);
for( i=lstart+1; i<end; i++ )
if ( (qrpiv->ipiv)[i] == lp )
return i+k;
return qrpiv->ldd;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
static inline int
hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) );
for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- )
if ( (qrpiv->ipiv)[i] == (p-k) )
return i + k;
return (qrpiv->ldd);
}
void
hqr_high_fibonacci_init(hqr_subpiv_t *arg) {
int *ipiv;
int p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
p = arg->p;
arg->ipiv = (int*)calloc( p, sizeof(int) );
ipiv = arg->ipiv;
/*
* Fibonacci of order 1:
* u_(n+1) = u_(n) + 1
*/
{
int f1, k, m;
/* Fill in the first column */
f1 = 1;
for (m=1; m < p; ) {
for (k=0; (k < f1) && (m < p); k++, m++) {
ipiv[m] = m - f1;
}
f1++;
}
}
}
/****************************************************
* HQR_HIGH_GREEDY_TREE (1 panel duplicated)
***************************************************/
void hqr_high_greedy1p_init(hqr_subpiv_t *arg){
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)calloc( p, sizeof(int) );
ipiv = arg->ipiv;
{
int minMN = 1;
int j, k, height, start, end, firstk = 0;
int *nT = (int*)calloc(minMN, sizeof(int));
int *nZ = (int*)calloc(minMN, sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
}
/**
*
* @file high_flat.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level flat tree
*
*/
#include "libhqr_internal.h"
static int
hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
(void)arg;
(void)m;
return k;
}
static int
hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
if ( p == k && arg->ldd > 1 ) {
if ( start == arg->ldd )
return p+1;
else if ( start < arg->ldd && (start-k < arg->p-1) )
return start+1;
}
return arg->ldd;
}
static int
hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
assert( arg->p > 1 );
if ( p == k && arg->ldd > 1 ) {
if ( start == p && p != arg->ldd-1 )
return libhqr_imin( p + arg->p - 1, arg->ldd - 1 );
else if ( start > p + 1 && (start-k < arg->p))
return start-1;
}
return arg->ldd;
}
void
hqr_high_flat_init(hqr_subpiv_t *arg) {
arg->currpiv = hqr_high_flat_currpiv;
arg->nextpiv = hqr_high_flat_nextpiv;
arg->prevpiv = hqr_high_flat_prevpiv;
arg->ipiv = NULL;
}
/**
*
* @file high_greedy.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level greedy tree
*
*/
#include "libhqr_internal.h"
#include <stdlib.h>
static int
hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
myassert( m >= k && m < k+arg->p );
return (arg->ipiv)[ k * (arg->p) + (m - k) ];
}
static int
hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == arg->ldd );
for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- )
if ( (arg->ipiv)[i-k + k* (arg->p)] == p )
return i;
return (arg->ldd);
}
static int
hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == p );
for( i=start-k+1; i<arg->p; i++ )
if ( (arg->ipiv)[i + k * (arg->p)] == p )
return k+i;
return arg->ldd;
}
void
hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN) {
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_greedy_currpiv;
arg->nextpiv = hqr_high_greedy_nextpiv;
arg->prevpiv = hqr_high_greedy_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)calloc( p * minMN, sizeof(int) );
ipiv = arg->ipiv;
{
int j, k, height, start, end, firstk = 0;
int *nT = (int*)calloc(minMN, sizeof(int));
int *nZ = (int*)calloc(minMN, sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
}
File moved
/**
*
* @file low_adaptiv.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for low level adaptiv tree for SVD/EVD reduction to band.
*
*/
#include "libhqr_internal.h"
/* Return the pivot to use for the row m at step k */
inline static int
svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg,
int k, int m )
{
int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
ipiv+=2;
return ipiv[ ( m / arg->p ) / a ];
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int
svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int p_pa = p / arg->p / a;
ipiv+=2;
for( i=start_pa+1; i<ldd; i++ )
if ( ipiv[i] == p_pa )
return i;
return i;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int
svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int pa = arg->p * a;
int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a;
int p_pa = p / pa;
ipiv+=2;
for( i=start_pa-1; i> k_a; i-- )
if ( ipiv[i] == p_pa )
return i;
return ldd;
}
void
svd_low_adaptiv_init( hqr_subpiv_t *arg,
int gmt, int gnt, int nbcores, int ratio )
{
int *ipiv;
int mt, a, p, pa, maxmt, myrank;
int j, k, height, start, end, nT, nZ;
int minMN = libhqr_imin(gmt, gnt);
arg->currpiv = svd_low_adaptiv_currpiv;
arg->nextpiv = svd_low_adaptiv_nextpiv;
arg->prevpiv = svd_low_adaptiv_prevpiv;
p = arg->p;
end = 0;
/**
* Compute the local greedy tree of each column, on each node
*/
maxmt = 1;
for(k=0; k<minMN; k++) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
/* Compute max dimension of the tree */
mt = libhqr_iceil( gmt, p * a );
maxmt = libhqr_imax( mt, maxmt );
}
arg->ldd = maxmt + 2;
arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p );
ipiv = arg->ipiv;
memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p );
for ( myrank=0; myrank<p; myrank++ ) {
/**
* Compute the local greedy tree of each column, on each node
*/
for(k=0; k<minMN; k++, ipiv += arg->ldd) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* (ldd / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
pa = p * a;
mt = libhqr_iceil( gmt, pa );
ipiv[0] = a;
ipiv[1] = mt;
assert( a > 0 );
assert( mt < arg->ldd-1 );
/* Number of tiles to factorized in this column on this rank */
nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
/* Number of tiles already killed */
nZ = 0;
assert( nT <= mt );
/* No more computations on this node */
if ( nT == 0 ) {
continue;
}
while( nZ < (nT-1) ) {
height = (nT - nZ) / 2;
start = mt - nZ - 1;
end = start - height;
nZ += height;
for( j=start; j > end; j-- ) {
ipiv[ j+2 ] = (j - height);
}
}
assert( nZ+1 == nT );
}
}
#if 0
{
int m, k;
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
printf( "%3d ", ipiv[k*(arg->ldd) + m] );
}
printf("\n");
}
}
if (!arg->domino) {
int m, k, myrank;
for ( myrank=1; myrank<p; myrank++ ) {
ipiv += arg->ldd * minMN;
printf("-------- rank %d ---------\n", myrank );
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
int k_a = (k + p - 1 - myrank) / p / a;
if ( m >= k_a )
printf( "%3d ", ipiv[k * arg->ldd + m] );
else
printf( "--- " );
}
printf("\n");
}
}
}
#endif
}
/**
*
* @file mtxtree.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* This file contains all the function to describe the dependencies
* used in the Xgeqrf_param.jdf file.
* The QR factorization done with this file relies on three levels:
* - the first one is using a flat tree with TS kernels. The
* height of this tree is defined by the parameter 'a'. If 'a'
* is set to A->mt, the factorization is identical to the one
* perform by PLASMA_zgeqrf.
* For all subdiagonal "macro-tiles", the line reduced is always
* the first. For all diagonal "macro-tiles", the factorization
* performed is identical to the one performed by PLASMA_zgeqrf.
*
* - the third level is using a reduction tree of size 'p'. By
* default, the parameter 'p' should be equal to the number of
* processors used for the computation, but can be set
* differently. (see further example). The type of tree used at
* this level is defined by the hlvl parameter. It can be flat
* or greedy.
* CODE DETAILS: This tree and all the function related to it
* are performing a QR factorization on a band matrix with 'p'
* the size of the band. All the functions take global indices
* as input and return global indices as output.
*
* - Finally, a second 'low' level of reduction tree is applied.
* The size of this tree is induced by the parameters 'a' and 'p'
* from the first and third levels and is A->mt / ( p * a ). This
* tree is reproduced p times for each subset of tiles
* S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1].
* The tree used for the reduction is defined by the llvl
* parameter and can be: flat, greedy, fibonacci or binary.
* CODE DETAILS: For commodity, the size of this tree is always
* ceil(A->mt / (p * a) ) inducing some extra tests in the code.
* All the functions related to this level of tree take as input
* the local indices in the A->mt / (p*a) matrix and the global
* k. They return the local index. The reductions are so
* performed on a trapezoidal matrices where the step is defined
* by a:
* <- min( lhlvl_mt, min( mt, nt ) ) ->
* __a__ a a
* | |_____
* | |_____
* | |_____
* llvl_mt = ceil(MT/ (a*p)) | |_____
* | |_____
* |___________________________________|
*
*
*
* At each step of the factorization, the lines of tiles are divided
* in 4 types:
* - QRPARAM_TILE_TS: They are the lines annihilated by a TS
* kernel, these lines are never used as an annihilator. They are
* the lines i, with 1 < (i/p)%a < a and i > (k+1)*p
* - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor
* in the TS kernels annihiling the QRPARAM_TILE_TS lines. They
* are themselves annihilated by the TT kernel of the low level
* reduction tree. The roots of the local trees are the lines i,
* with i/p = k.
* - QRPARAM_TILE_DOMINO: These are the lines that are
* annhilihated with a domino effect in the band defined by (i/p)
* <= k and i >= k
* - QRPARAM_TILE_DISTTT: These are the lines annihilated by the
* high level tree to reduce communications.
* These lines are defined by (i-k)/p = 0.
*/
#include "libhqr_internal.h"
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
/****************************************************
* Reading functions
**************************************************
*
* Common parameters for the following functions
* qrtree - tree used for the factorization
* k - Step k of the QR factorization
* m - line anhilated
*/
int mtxtree_gettype(const libhqr_tree_t *qrtree, int k, int m){
int id;
libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args);
id = (k * qrtree->mt) + m;
return arg[id].type;
}
int mtxtree_currpiv(const libhqr_tree_t *qrtree, int k, int m){
int id, perm_m, p, a;
libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args);
perm_m = m;
p = qrtree->p;
a = qrtree->a;
myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) );
myassert( (p==1) || (perm_m % p) == (m % p) );
id = (k * qrtree->mt) + m;
return arg[id].currpiv;
}
/*
* Extra parameter:
* p - line used as pivot
*/
int mtxtree_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
int id;
libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args);
int gmt = qrtree->mt;
myassert( m > p && p >= k );
myassert( m == gmt || p == mtxtree_currpiv( qrtree, k, m ) );
if(m == qrtree->mt){
id = (k * qrtree->mt) + p;
return arg[id].first_nextpiv;
}
else{
id = (k * qrtree->mt) + m;
return arg[id].nextpiv;
}
}
int mtxtree_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
int id;
libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args);
int gmt = qrtree->mt;
myassert( m >= p && p >= k && m < gmt );
myassert( m == p || p == mtxtree_currpiv( qrtree, k, m ) );
if(m == p){
id = (k * qrtree->mt) + p;
return arg[id].first_prevpiv;
}
else{
id = (k * qrtree->mt) + m;
return arg[id].prevpiv;
}
}
void
libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out )
{
libhqr_tile_info_t *tileinfo;
int i, minMN, p, k;
minMN = libhqr_imin( in->mt, in->nt );
/* Copy the input tree to the output one */
memcpy( out, in, sizeof(libhqr_tree_t) );
/* Switch to matrix storage format and functions */
out->init = LIBHQR_QRTREE_MTX;
out->gettype = mtxtree_gettype;
out->currpiv = mtxtree_currpiv;
out->nextpiv = mtxtree_nextpiv;
out->prevpiv = mtxtree_prevpiv;
out->args = malloc( out->mt * minMN * sizeof(libhqr_tile_info_t) );
tileinfo = (libhqr_tile_info_t*)(out->args);
/* Initialize the matrix */
for (k=0; k<minMN; k++)
{
for (i=0; i<in->mt; i++, tileinfo++)
{
tileinfo->type = in->gettype(in, k, i);
tileinfo->currpiv = p = in->currpiv(in, k, i);
tileinfo->first_nextpiv = in->nextpiv(in, k, i, in->mt);
tileinfo->first_prevpiv = in->prevpiv(in, k, i, i);
tileinfo->nextpiv = in->nextpiv(in, k, p, i);
tileinfo->prevpiv = in->prevpiv(in, k, p, i);
assert( ((tileinfo->nextpiv <= in->mt) && (tileinfo->nextpiv >= k)) || (tileinfo->nextpiv == -1) );
assert( ((tileinfo->prevpiv <= in->mt) && (tileinfo->prevpiv >= k)) || (tileinfo->prevpiv == -1) );
assert( ((tileinfo->first_nextpiv <= in->mt) && (tileinfo->first_nextpiv >= k)) || (tileinfo->first_nextpiv == -1) );
assert( ((tileinfo->first_prevpiv <= in->mt) && (tileinfo->first_prevpiv >= k)) || (tileinfo->first_prevpiv == -1) );
}
}
}
src/svd.c 0 → 100644
/**
*
* @file svd.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* This file contains all the function to describe the dependencies
* used in the Xgeqrf_param.jdf file.
* The QR factorization done with this file relies on three levels:
* - the first one is using a flat tree with TS kernels. The
* height of this tree is defined by the parameter 'a'. If 'a'
* is set to A->mt, the factorization is identical to the one
* perform by PLASMA_zgeqrf.
* For all subdiagonal "macro-tiles", the line reduced is always
* the first. For all diagonal "macro-tiles", the factorization
* performed is identical to the one performed by PLASMA_zgeqrf.
*
* - the third level is using a reduction tree of size 'p'. By
* default, the parameter 'p' should be equal to the number of
* processors used for the computation, but can be set
* differently. (see further example). The type of tree used at
* this level is defined by the hlvl parameter. It can be flat
* or greedy.
* CODE DETAILS: This tree and all the function related to it
* are performing a QR factorization on a band matrix with 'p'
* the size of the band. All the functions take global indices
* as input and return global indices as output.
*
* - Finally, a second 'low' level of reduction tree is applied.
* The size of this tree is induced by the parameters 'a' and 'p'
* from the first and third levels and is A->mt / ( p * a ). This
* tree is reproduced p times for each subset of tiles
* S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1].
* The tree used for the reduction is defined by the llvl
* parameter and can be: flat, greedy, fibonacci or binary.
* CODE DETAILS: For commodity, the size of this tree is always
* ceil(A->mt / (p * a) ) inducing some extra tests in the code.
* All the functions related to this level of tree take as input
* the local indices in the A->mt / (p*a) matrix and the global
* k. They return the local index. The reductions are so
* performed on a trapezoidal matrices where the step is defined
* by a:
* <- min( lhlvl_mt, min( mt, nt ) ) ->
* __a__ a a
* | |_____
* | |_____
* | |_____
* llvl_mt = ceil(MT/ (a*p)) | |_____
* | |_____
* |___________________________________|
*
*
*
* At each step of the factorization, the lines of tiles are divided
* in 4 types:
* - QRPARAM_TILE_TS: They are the lines annihilated by a TS
* kernel, these lines are never used as an annihilator. They are
* the lines i, with 1 < (i/p)%a < a and i > (k+1)*p
* - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor
* in the TS kernels annihiling the QRPARAM_TILE_TS lines. They
* are themselves annihilated by the TT kernel of the low level
* reduction tree. The roots of the local trees are the lines i,
* with i/p = k.
* - QRPARAM_TILE_DOMINO: These are the lines that are
* annhilihated with a domino effect in the band defined by (i/p)
* <= k and i >= k
* - QRPARAM_TILE_DISTTT: These are the lines annihilated by the
* high level tree to reduce communications.
* These lines are defined by (i-k)/p = 0.
*/
#include "libhqr_internal.h"
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
/*
* Common functions
*/
static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k );
static int svd_getm( const libhqr_tree_t *qrtree, int k, int i );
static int svd_geti( const libhqr_tree_t *qrtree, int k, int m );
static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m );
#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) )
#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] )
/*
* Extra parameter:
* gmt - Global number of tiles in a column of the complete distributed matrix
* Return:
* The number of geqrt to execute in the panel k
*/
static int
svd_getnbgeqrf( const libhqr_tree_t *qrtree,
int k )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int gmt = qrtree->mt;
int a = svd_geta(arg, k);
int pa = p * a;
int nb_1, nb_2, nb_3;
int nb_11, nb_12;
/* Number of tasks of type 3 */
nb_3 = p;
/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */
nb_2 = nbextra1_formula;
/* First multiple of p*a under the diagonal of step 1 */
nb_11 = ( (k + p + pa - 1 ) / pa ) * pa;
/* Last multiple of p*a lower than A->mt */
nb_12 = ( gmt / pa ) * pa;
/* Number of tasks of type 1 between nb_11 and nb_12 */
nb_1 = (nb_12 - nb_11) / a;
/* Add leftover */
nb_1 += libhqr_imin( p, gmt - nb_12 );
return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k);
}
/*
* Extra parameter:
* i - indice of the geqrt in the continuous space
* Return:
* The global indice m of the i th geqrt in the panel k
*/
static int
svd_getm( const libhqr_tree_t *qrtree,
int k, int i )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
int pos1, j, pa = p * a;
int nbextra1 = nbextra1_formula;
int nb23 = p + nbextra1;
/* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */
if ( i < nb23 )
return k+i;
/* Tile of type 1 */
else {
j = i - nb23;
pa = p * a;
pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa;
return pos1 + (j/p) * pa + j%p;
}
}
/*
* Extra parameter:
* m - The global indice m of a geqrt in the panel k
* Return:
* The index i of the geqrt in the panel k
*/
static int
svd_geti( const libhqr_tree_t *qrtree,
int k, int m )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
int pos1, j, pa = p * a;
int nbextra1 = nbextra1_formula;
int nb23 = p + nbextra1;
int end2 = p + k + nbextra1;
/* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */
if ( m < end2 )
return m-k;
/* Tile of type 1 */
else {
pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa;
j = m - pos1;
return nb23 + (j / pa) * p + j%pa;
}
}
/*
* Extra parameter:
* m - The global indice m of the row in the panel k
* Return
* -1 - Error
* 0 - if m is reduced thanks to a TS kernel
* 1 - if m is reduced thanks to the low level tree
* 2 - if m is reduced thanks to the bubble tree
* 3 - if m is reduced in distributed
*/
static int
svd_gettype( const libhqr_tree_t *qrtree,
int k, int m )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
/* Element to be reduce in distributed */
if (m < k + p) {
return 3;
}
/* Lower triangle of the matrix */
else {
if( (m / p) % a == 0 )
return 1;
else
return 0;
}
}
/****************************************************
*
* Generic functions currpiv,prevpiv,nextpiv
*
***************************************************/
static int
svd_currpiv(const libhqr_tree_t *qrtree, int k, int m)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, tmpk;
int lm, rank;
int a = svd_geta( arg, k );
int p = qrtree->p;
int gmt = qrtree->mt;
lm = m / p; /* Local index in the distribution over p domains */
rank = m % p; /* Staring index in this distribution */
/* TS level common to every case */
switch( svd_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank;
break;
case 2:
assert(0);
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
return -1;
};
/**
* svd_nextpiv - Computes the next row killed by the row p, after
* it has kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
*
* @param[in] start
* Starting point to search the next line killed by p after start
* start must be equal to A.mt to find the first row killed by p.
* if start != A.mt, start must be killed by p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the following row killed by p if it exists, A->mt otherwise
*/
static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int rpivot, lstart;
int *ipiv = svd_getipiv( arg, k );
int a = ipiv[0];
int ldd = ipiv[1];
int p = qrtree->p;
int gmt = qrtree->mt;
rpivot = pivot % p; /* Staring index in this distribution */
/* Local index in the distribution over p domains */
lstart = ( start == gmt ) ? ldd * a : start / p;
myassert( start > pivot && pivot >= k );
myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) );
/* TS level common to every case */
ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1;
lp = svd_gettype( qrtree, k, pivot );
switch( ls )
{
case LIBHQR_KILLED_BY_DOMINO:
assert(0);
case -1:
if ( lp == LIBHQR_KILLED_BY_TS ) {
myassert( start == gmt );
return gmt;
}
case LIBHQR_KILLED_BY_TS:
if ( start == gmt )
nextp = pivot + p;
else
nextp = start + p;
if ( ( nextp < gmt ) &&
( nextp < pivot + a*p ) &&
( (nextp/p)%a != 0 ) )
return nextp;
start = gmt;
lstart = ldd * a;
case LIBHQR_KILLED_BY_LOCALTREE:
/* Get the next pivot for the low level tree */
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a );
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == ldd-1) )
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp);
if ( tmp != ldd )
return tmp * a * p + rpivot;
/* no next of type 1, we reset start to search the next 2 */
start = gmt;
lstart = ldd * a;
case LIBHQR_KILLED_BY_DISTTREE:
if ( lp < LIBHQR_KILLED_BY_DISTTREE ) {
return gmt;
}
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
default:
return gmt;
}
}
/**
* svd_prevpiv - Computes the previous row killed by the row p, before
* to kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
*
* @param[in] start
* Starting point to search the previous line killed by p before start
* start must be killed by p, and start must be greater or equal to p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the previous row killed by p if it exists, A->mt otherwise
*/
static int
svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int lpivot, rpivot, lstart;
int *ipiv = svd_getipiv( arg, k );
int a = ipiv[0];
int ldd = ipiv[1];
int p = qrtree->p;
int gmt = qrtree->mt;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
lstart = start / p; /* Local index in the distribution over p domains */
myassert( start >= pivot && pivot >= k && start < gmt );
myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) );
/* TS level common to every case */
ls = svd_gettype( qrtree, k, start );
lp = svd_gettype( qrtree, k, pivot );
if ( lp == LIBHQR_KILLED_BY_TS )
return gmt;
myassert( lp >= ls );
switch( ls )
{
case LIBHQR_KILLED_BY_DOMINO:
assert(0);
case LIBHQR_KILLED_BY_DISTTREE:
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
start = pivot;
lstart = pivot / p;
case LIBHQR_KILLED_BY_LOCALTREE:
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a);
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == ldd-1) )
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp);
if ( tmp != ldd )
return tmp * a * p + rpivot;
start = pivot;
case LIBHQR_KILLED_BY_TS:
if ( start == pivot ) {
tmp = lpivot + a - 1 - lpivot%a;
nextp = tmp * p + rpivot;
while( pivot < nextp && nextp >= gmt )
nextp -= p;
} else {
nextp = start - p; /*(lstart - 1) * p + rpivot;*/
}
assert(nextp < gmt);
if ( pivot < nextp )
return nextp;
default:
return gmt;
}
};
/**
*******************************************************************************
*
* @ingroup libhqr
*
* libhqr_svd_init - Create the tree structures that will describes the
* operation performed during QR/LQ reduction step of the gebrd_ge2gb operation.
*
* Trees available parameters are described below. It is recommended to:
* - set p to the same value than the P-by-Q process grid used to distribute
* the data. (P for QR factorization, Q for LQ factorization).
* - set the low level tree to LIBHQR_GREEDY_TREE.
* - set the high level tree to:
* 1) LIBHQR_FLAT_TREE when the problem is square, because it divides
* by two the volume of communication of any other tree.
* 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or
* small and fat (LQ), because it reduces the critical path length.
* - Disable the domino effect when problem is square, to keep high efficiency
* kernel proportion high.
* - Enable the domino effect when problem is tall and skinny (QR) or
* small and fat (LQ) to increase parallelism and reduce critical path length.
* - Round-robin on TS domain (tsrr) option should be disabled. It is
* experimental and is not safe.
*
* These are the default when a parameter is set to -1;
*
* See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf
*
*******************************************************************************
*
* @param[in,out] qrtree
* On entry, an allocated structure uninitialized.
* On exit, the structure initialized according to the given parameters.
*
* @param[in] trans
* @arg LIBHQR_QR: Structure is initialized for the QR steps.
* @arg LIBHQR_LQ: Structure is initialized for the LQ steps.
*
* @param[in,out] A
* Descriptor of the distributed matrix A to be factorized, on which
* QR/LQ reduction steps will be performed. In case, of
* R-bidiagonalization, don't forget to provide the square submatrix
* that is concerned by those operations.
* The descriptor is untouched and only mt/nt/P parameters are used.
*
* @param[in] type_hlvl
* Defines the tree used to reduce the main tiles of each domain. This
* is a band lower diagonal matrix of width p.
* @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles.
* @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles.
* @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the
* tiles.
* @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles.
* @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first
* column and then duplicated on all others.
* @arg -1: The default is used (LIBHQR_FIBONACCI_TREE)
*
* @param[in] p
* Defines the number of distributed domains, ie the width of the high
* level reduction tree. If p == 1, no high level reduction tree is
* used. If p == mt, this enforce the high level reduction tree to be
* performed on the full matrix.
* By default, it is recommended to set p to P if trans ==
* LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid
* used to distributed the data. (p > 0)
*
* @param[in] nbthread_per_node
* Define the number of working threads per node to configure the
* adaptativ local tree to provide at least (ratio * nbthread_per_node)
* tasks per step when possible by creating the right amount of TS and
* TT kernels.
*
* @param[in] ratio
* Define the minimal number of tasks per thread that the adaptiv tree
* must provide at the lowest level of the tree.
*
*******************************************************************************
*
* @retval -i if the ith parameters is incorrect.
* @retval 0 on success.
*
*******************************************************************************
*
* @sa libhqr_hqr_finalize
* @sa libhqr_hqr_init
* @sa dplasma_zgeqrf_param
* @sa dplasma_cgeqrf_param
* @sa dplasma_dgeqrf_param
* @sa dplasma_sgeqrf_param
*
******************************************************************************/
int
libhqr_initfct_svd( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_hlvl, int p, int nbthread_per_node, int ratio )
{
int low_mt, minMN, a = -1;
hqr_args_t *arg;
if (qrtree == NULL) {
fprintf(stderr,"libhqr_svd_init, illegal value of qrtree");
return -1;
}
if ((trans != LIBHQR_QR) &&
(trans != LIBHQR_LQ)) {
fprintf(stderr, "libhqr_svd_init, illegal value of trans");
return -2;
}
if (A == NULL) {
fprintf(stderr, "libhqr_svd_init, illegal value of A");
return -3;
}
/* Compute parameters */
p = libhqr_imax( p, 1 );
qrtree->getnbgeqrf = svd_getnbgeqrf;
qrtree->getm = svd_getm;
qrtree->geti = svd_geti;
qrtree->gettype = svd_gettype;
qrtree->currpiv = svd_currpiv;
qrtree->nextpiv = svd_nextpiv;
qrtree->prevpiv = svd_prevpiv;
qrtree->init = LIBHQR_QRTREE_SVD;
qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt;
qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt;
qrtree->a = a;
qrtree->p = p;
qrtree->args = NULL;
arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) );
arg->domino = 0;
arg->tsrr = 0;
arg->perm = NULL;
arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->hlvl = NULL;
minMN = libhqr_imin(A->mt, A->nt);
low_mt = (qrtree->mt + p - 1) / ( p );
arg->llvl->minMN = minMN;
arg->llvl->ldd = low_mt;
arg->llvl->a = a;
arg->llvl->p = p;
arg->llvl->domino = 0;
svd_low_adaptiv_init( arg->llvl, qrtree->mt, qrtree->nt,
nbthread_per_node * (A->nodes / p), ratio );
if ( p > 1 ) {
arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->llvl->minMN = minMN;
arg->hlvl->ldd = qrtree->mt;
arg->hlvl->a = a;
arg->hlvl->p = p;
arg->hlvl->domino = 0;
switch( type_hlvl ) {
case LIBHQR_FLAT_TREE :
hqr_high_flat_init(arg->hlvl);
break;
case LIBHQR_GREEDY_TREE :
hqr_high_greedy_init(arg->hlvl, minMN);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_high_greedy1p_init(arg->hlvl);
break;
case LIBHQR_BINARY_TREE :
hqr_high_binary_init(arg->hlvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_high_fibonacci_init(arg->hlvl);
break;
default:
hqr_high_fibonacci_init(arg->hlvl);
}
}
qrtree->args = (void*)arg;
return 0;
}
int
libhqr_initmtx_svd( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_hlvl, int p, int nbthread_per_node, int ratio )
{
libhqr_tree_t qrtree_init;
int rc;
rc = libhqr_initfct_svd( &qrtree_init, trans, A, type_hlvl, p, nbthread_per_node, ratio );
if ( rc != 0 ) {
return rc;
}
/* Initialize the final QR tree */
memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) );
qrtree->init = LIBHQR_QRTREE_MTX;
qrtree->gettype = rdmtx_gettype;
qrtree->currpiv = rdmtx_currpiv;
qrtree->nextpiv = rdmtx_nextpiv;
qrtree->prevpiv = rdmtx_prevpiv;
qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tile_info_t) );
/* Initialize the matrix */
libhqr_matrix_init(qrtree, &qrtree_init);
/* Free the initial qrtree */
libhqr_finalize( &qrtree_init );
return 0;
}
int
libhqr_init_svd( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_hlvl, int p, int nbthread_per_node, int ratio )
{
return libhqr_initmtx_svd( qrtree, trans, A, type_hlvl, p, nbthread_per_node, ratio );
}
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment