diff --git a/Makefile b/Makefile index 3a7f943df664ce6aa46ab5052fcfd89e73a56649..4950cfd67bf05c7a4c1f844e5574284ebe587b6c 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Iinclude -Wall -g3 LDFLAGS = -lm LIBHQR=src/libhqr.a -default: testing_pivgen testing_treedraw testing_tileinit +default: testing_pivgen testing_treedraw src/queue.o: include/queue.h include/common.h src/libhqr.o: include/common.h include/libhqr.h @@ -24,9 +24,6 @@ testing_pivgen : testings/testing_pivgen.o ${LIBHQR} testing_treedraw : testings/testing_treedraw.o ${LIBHQR} $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) -testing_tileinit : testings/testing_tileinit.o ${LIBHQR} - $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS) - clean : rm -f include/*~ rm -f src/*.o src/*~ diff --git a/include/libhqr.h b/include/libhqr.h index 657f61c614f0096c9cd6d68171f7ed7ac0c2ddf5..579da90752a6b5a375db4c426a0f2437322cbcb1 100644 --- a/include/libhqr.h +++ b/include/libhqr.h @@ -89,19 +89,6 @@ typedef struct libhqr_tiledesc_s{ int p; /**< The number of nodes per column in the data distribution */ } libhqr_tiledesc_t; -/** - * @brief Minimal structure for stocking info of each tile - */ - -typedef struct libhqr_tileinfo_s{ - int type; - int currpiv; - int *nextpiv; - int *prevpiv; - int first_nextpiv; - int first_prevpiv; -} libhqr_tileinfo_t; - struct libhqr_tree_s; typedef struct libhqr_tree_s libhqr_tree_t; @@ -185,6 +172,7 @@ struct libhqr_tree_s { int a; /** Size of highest level tree (distributed one) */ int p; + int domino; void *args; }; @@ -213,12 +201,6 @@ void draw_horizontal_line(int k, int p, int m, int *tiles, FILE *file); void draw_vertical_line(int k, int p, int m, int *tiles, FILE *file); void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix); -/* - * functions for stocking the information of each tile - */ - -void libhqr_tile_init(const libhqr_tree_t *qrtree, int k, libhqr_tileinfo_t *tiles); - /* * Debugging functions */ diff --git a/src/libhqr.c b/src/libhqr.c index 74775c67a6c1813687024005a94847da92ea6b17..60b94fcecdb84dcfa5f9457aa808261883780893 100644 --- a/src/libhqr.c +++ b/src/libhqr.c @@ -108,6 +108,15 @@ struct hqr_args_s { int *perm; }; +typedef struct libhqr_tileinfo_s{ + int type; + int currpiv; + int nextpiv; + int prevpiv; + int first_nextpiv; + int first_prevpiv; +} libhqr_tileinfo_t; + struct hqr_subpiv_s { /** * currpiv @@ -170,6 +179,70 @@ static void hqr_low_greedy_init( hqr_subpiv_t *arg, int minMN); static void hqr_low_binary_init( hqr_subpiv_t *arg); static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN); +/* Stocking matrix info */ +void libhqr_matrix_init(libhqr_tree_t *qrtree); + +/* Function for getting the info on the matrix*/ +int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m); +int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m); + +/**************************************************** + * Reading functions + ************************************************** + * + * Common parameters for the following functions + * qrtree - tree used for the factorization + * k - Step k of the QR factorization + * m - line anhilated + * + */ + +int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m){ + int id; + libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); + id = (k * qrtree->mt) + m; + return arg[id].type; +} + +int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m){ + int id; + libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); + id = (k * qrtree->mt) + m; + return arg[id].currpiv; +} + +/* + * Extra parameter: + * p - line used as pivot + */ + +int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); + if(m == qrtree->mt){ + id = (k * qrtree->mt) + p; + return arg[id].first_nextpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].nextpiv; + } +} + +int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); + if(m == p){ + id = (k * qrtree->mt) + p; + return arg[id].first_prevpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].nextpiv; + } +} /**************************************************** * Common ipiv *************************************************** @@ -189,11 +262,10 @@ static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN); * The number of geqrt to execute in the panel k */ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; int p = qrtree->p; int gmt = qrtree->mt; - int domino = arg->domino; + int domino = qrtree->domino; int pa = p * a; int nb_1, nb_2, nb_3; int nb_11, nb_12; @@ -236,12 +308,9 @@ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { */ static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) { - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; int p = qrtree->p; - int gmt = qrtree->mt + 1; - int domino = arg->domino; - int *perm = arg->perm + gmt * k; + int domino = qrtree->domino; int pos1, j, pa = p * a; int nbextra1 = nbextra1_formula; @@ -258,7 +327,7 @@ static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) pos1 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; else pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - return perm[ pos1 + (j/p) * pa + j%p ]; + return pos1 + (j/p) * pa + j%p; } } @@ -270,10 +339,9 @@ static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) */ static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) { - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int a = qrtree->a; int p = qrtree->p; - int domino = arg->domino; + int domino = qrtree->domino; int lm = hqr_getinvperm( qrtree, k, m ); int pos1, j, pa = p * a; @@ -1268,7 +1336,7 @@ static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) { hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, tmpk, perm_m; - int lm, rank, *perm; + int lm, rank; int a = qrtree->a; int p = qrtree->p; int domino = arg->domino; @@ -1277,7 +1345,6 @@ static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) perm_m = hqr_getinvperm( qrtree, k, m ); lm = perm_m / p; /* Local index in the distribution over p domains */ rank = perm_m % p; /* Staring index in this distribution */ - perm = arg->perm + (gmt+1) * k; myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) ); myassert( (p==1) || (perm_m % p) == (m % p) ); @@ -1289,13 +1356,13 @@ static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) case 0: tmp = lm / a; if ( tmp == k / a ) - return perm[ k * p + rank ]; /* Below to the first bloc including the diagonal */ + return k * p + rank ; /* Below to the first bloc including the diagonal */ else - return perm[ tmp * a * p + rank ]; + return tmp * a * p + rank ; break; case 1: tmp = arg->llvl->currpiv(arg->llvl, k, perm_m); - return perm[ ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank ]; + return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank; break; case 2: return m - p; @@ -1314,20 +1381,20 @@ static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) tmp = lm / a; /* tmpk = (k + p - 1 - m%p) / p / a; */ tmpk = k / (p * a); - return perm[ ( tmp == tmpk ) ? k + (perm_m-k)%p : tmp * a * p + rank ]; + return ( tmp == tmpk ) ? k + (perm_m-k)%p : tmp * a * p + rank ; break; case 1: tmp = arg->llvl->currpiv(arg->llvl, k, perm_m); /* tmpk = (k + p - 1 - m%p) / p / a; */ tmpk = k / (p * a); - return perm[ ( tmp == tmpk ) ? k + (perm_m-k)%p : tmp * a * p + rank ]; + return ( tmp == tmpk ) ? k + (perm_m-k)%p : tmp * a * p + rank ; break; case 2: - return perm[ perm_m - p]; + return perm_m - p; break; case 3: if ( arg->hlvl != NULL ) - return perm[arg->hlvl->currpiv(arg->hlvl, k, perm_m)]; + return arg->hlvl->currpiv(arg->hlvl, k, perm_m); default: return gmt; } @@ -1358,7 +1425,7 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; int opivot, ostart; /* original values before permutation */ - int lpivot, rpivot, lstart, *perm; + int lpivot, rpivot, lstart; int a = qrtree->a; int p = qrtree->p; int gmt = qrtree->mt; @@ -1374,8 +1441,6 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) /* Local index in the distribution over p domains */ lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p; - perm = arg->perm + (gmt+1) * k; - myassert( start > pivot && pivot >= k ); myassert( start == gmt || opivot == hqr_currpiv( qrtree, k, ostart ) ); @@ -1406,7 +1471,7 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) if ( ( nextp < gmt ) && ( nextp < pivot + a*p ) && ( (nextp/p)%a != 0 ) ) - return perm[nextp]; + return nextp; start = gmt; lstart = arg->llvl->ldd * a; @@ -1424,7 +1489,7 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); if ( tmp != arg->llvl->ldd ) - return perm[tmp * a * p + rpivot]; + return tmp * a * p + rpivot; next_2: /* no next of type 1, we reset start to search the next 2 */ @@ -1442,7 +1507,7 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) (start == gmt) && (lpivot < k) && (pivot+p < gmt) ) { - return perm[pivot+p]; + return pivot+p; } /* no next of type 2, we reset start to search the next 3 */ @@ -1458,7 +1523,7 @@ static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) if( arg->hlvl != NULL ) { tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); if ( tmp != gmt ) - return perm[tmp]; + return tmp; } default: @@ -1490,7 +1555,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int tmp, ls, lp, nextp; int opivot, ostart; /* original values before permutation */ - int lpivot, rpivot, lstart, *perm; + int lpivot, rpivot, lstart; int a = qrtree->a; int p = qrtree->p; int gmt = qrtree->mt; @@ -1503,7 +1568,6 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) lpivot = pivot / p; /* Local index in the distribution over p domains */ rpivot = pivot % p; /* Staring index in this distribution */ lstart = start / p; /* Local index in the distribution over p domains */ - perm = arg->perm + (gmt+1) * k; myassert( start >= pivot && pivot >= k && start < gmt ); myassert( start == pivot || opivot == hqr_currpiv( qrtree, k, ostart ) ); @@ -1522,7 +1586,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) if( arg->hlvl != NULL ) { tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); if ( tmp != gmt ) - return perm[tmp]; + return tmp; } start = pivot; @@ -1534,7 +1598,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) if ( ( start == pivot ) && (start+p < gmt ) ) - return perm[start+p]; + return start+p; if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) return gmt; @@ -1558,7 +1622,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); if ( tmp != arg->llvl->ldd ) - return perm[tmp * a * p + rpivot]; + return tmp * a * p + rpivot; start = pivot; @@ -1566,7 +1630,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) /* Search for predecessor in TS tree */ /* if ( ( start+p < gmt ) && */ /* ( (((start+p) / p) % a) != 0 ) ) */ - /* return perm[start + p]; */ + /* return start + p; */ if ( start == pivot ) { tmp = lpivot + a - 1 - lpivot%a; @@ -1579,7 +1643,7 @@ hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) } assert(nextp < gmt); if ( pivot < nextp ) - return perm[nextp]; + return nextp; default: return gmt; @@ -1665,21 +1729,20 @@ hqr_genperm( libhqr_tree_t *qrtree ) static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) { - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); int gmt = qrtree->mt + 1; int a = qrtree->a; int p = qrtree->p; int pa = p * a; int start = m / pa * pa; int stop = libhqr_imin( start + pa, gmt ) - start; - int *perm = arg->perm + gmt * k + start; int i; if (a == 1) return m; for ( i=m%p; i < stop; i+=p ) { - if( perm[i] == m ) + //if( perm[i] == m ) + if( i == m ) return i+start; } @@ -1812,6 +1875,7 @@ libhqr_hqr_init( libhqr_tree_t *qrtree, int a, int p, int domino, int tsrr ) { + libhqr_tree_t qrtree_init; double ratio = 0.0; int low_mt, minMN; hqr_args_t *arg; @@ -1851,94 +1915,116 @@ libhqr_hqr_init( libhqr_tree_t *qrtree, } } - qrtree->getnbgeqrf = hqr_getnbgeqrf; - qrtree->getm = hqr_getm; - qrtree->geti = hqr_geti; - qrtree->gettype = hqr_gettype; - qrtree->currpiv = hqr_currpiv; - qrtree->nextpiv = hqr_nextpiv; - qrtree->prevpiv = hqr_prevpiv; - - qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - - a = libhqr_imin( a, qrtree->mt ); + minMN = libhqr_imin(A->mt, A->nt); - qrtree->a = a; - qrtree->p = p; - qrtree->args = NULL; + /* Create a temporary qrtree structure based on the functions */ + { + qrtree_init.domino = domino; + qrtree_init.getnbgeqrf = hqr_getnbgeqrf; + qrtree_init.getm = hqr_getm; + qrtree_init.geti = hqr_geti; + qrtree_init.gettype = hqr_gettype; + qrtree_init.currpiv = hqr_currpiv; + qrtree_init.nextpiv = hqr_nextpiv; + qrtree_init.prevpiv = hqr_prevpiv; - arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); - arg->domino = domino; - arg->tsrr = tsrr; - arg->perm = NULL; + qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree_init.nt = minMN; - arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->hlvl = NULL; + a = libhqr_imin( a, qrtree_init.mt ); - minMN = libhqr_imin(A->mt, A->nt); - low_mt = (qrtree->mt + p * a - 1) / ( p * a ); + qrtree_init.a = a; + qrtree_init.p = p; + qrtree_init.args = NULL; - arg->llvl->minMN = minMN; - arg->llvl->ldd = low_mt; - arg->llvl->a = a; - arg->llvl->p = p; - arg->llvl->domino = domino; + arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); + arg->domino = domino; + arg->tsrr = tsrr; + arg->perm = NULL; - switch( type_llvl ) { - case LIBHQR_FLAT_TREE : - hqr_low_flat_init(arg->llvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_low_fibonacci_init(arg->llvl, minMN); - break; - case LIBHQR_BINARY_TREE : - hqr_low_binary_init(arg->llvl); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_low_greedy1p_init(arg->llvl, minMN); - break; - case LIBHQR_GREEDY_TREE : - default: - hqr_low_greedy_init(arg->llvl, minMN); - } + arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + arg->hlvl = NULL; - if ( p > 1 ) { - arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + low_mt = (qrtree_init.mt + p * a - 1) / ( p * a ); arg->llvl->minMN = minMN; - arg->hlvl->ldd = qrtree->mt; - arg->hlvl->a = a; - arg->hlvl->p = p; - arg->hlvl->domino = domino; + arg->llvl->ldd = low_mt; + arg->llvl->a = a; + arg->llvl->p = p; + arg->llvl->domino = domino; - switch( type_hlvl ) { + switch( type_llvl ) { case LIBHQR_FLAT_TREE : - hqr_high_flat_init(arg->hlvl); + hqr_low_flat_init(arg->llvl); break; - case LIBHQR_GREEDY_TREE : - hqr_high_greedy_init(arg->hlvl, minMN); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_high_greedy1p_init(arg->hlvl); + case LIBHQR_FIBONACCI_TREE : + hqr_low_fibonacci_init(arg->llvl, minMN); break; case LIBHQR_BINARY_TREE : - hqr_high_binary_init(arg->hlvl); + hqr_low_binary_init(arg->llvl); break; - case LIBHQR_FIBONACCI_TREE : - hqr_high_fibonacci_init(arg->hlvl); + case LIBHQR_GREEDY1P_TREE : + hqr_low_greedy1p_init(arg->llvl, minMN); break; + case LIBHQR_GREEDY_TREE : default: - if ( ratio >= 0.5 ) { + hqr_low_greedy_init(arg->llvl, minMN); + } + + if ( p > 1 ) { + arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + + arg->llvl->minMN = minMN; + arg->hlvl->ldd = qrtree->mt; + arg->hlvl->a = a; + arg->hlvl->p = p; + arg->hlvl->domino = domino; + + switch( type_hlvl ) { + case LIBHQR_FLAT_TREE : hqr_high_flat_init(arg->hlvl); - } else { + break; + case LIBHQR_GREEDY_TREE : + hqr_high_greedy_init(arg->hlvl, minMN); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_high_greedy1p_init(arg->hlvl); + break; + case LIBHQR_BINARY_TREE : + hqr_high_binary_init(arg->hlvl); + break; + case LIBHQR_FIBONACCI_TREE : hqr_high_fibonacci_init(arg->hlvl); + break; + default: + if ( ratio >= 0.5 ) { + hqr_high_flat_init(arg->hlvl); + } else { + hqr_high_fibonacci_init(arg->hlvl); + } } } + + qrtree_init.args = (void*)arg; + hqr_genperm( &qrtree_init ); } - qrtree->args = (void*)arg; - hqr_genperm( qrtree ); + /* Initialize the final QR tree */ + memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); + qrtree->getnbgeqrf = hqr_getnbgeqrf; + qrtree->getm = hqr_getm; + qrtree->geti = hqr_geti; + qrtree->gettype = rdmtx_gettype; + qrtree->currpiv = rdmtx_currpiv; + qrtree->nextpiv = rdmtx_nextpiv; + qrtree->prevpiv = rdmtx_prevpiv; + qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); + + /* Initialize the matrix */ + libhqr_matrix_init(qrtree); + + /* Free the initial qrtree */ + libhqr_hqr_finalize( &qrtree_init ); return 0; } @@ -2740,15 +2826,21 @@ libhqr_svd_init( libhqr_tree_t *qrtree, return 0; } -void libhqr_tile_init(const libhqr_tree_t *qrtree, int k, libhqr_tileinfo_t *tiles){ - int i, maxMN; - maxMN = libhqr_imax(qrtree->nt, qrtree->mt); - for (i = 0; i < maxMN; i++){ - tiles[i].first_nextpiv = qrtree->nextpiv(qrtree, k, i, qrtree->mt); - tiles[i].first_prevpiv = qrtree->prevpiv(qrtree, k, i, i); - tiles[i].type = qrtree->gettype(qrtree, k, i); - tiles[i].currpiv = qrtree->currpiv(qrtree, k, i); - printf("Info on tiles %d on step %d :\n first_nextpiv = %d\n first_prevpiv = %d\n currpiv = %d\n", - i, k, tiles[i].first_nextpiv, tiles[i].first_prevpiv, tiles[i].currpiv); +void libhqr_matrix_init(libhqr_tree_t *qrtree){ + int i, minMN, tile_id, p, k; + libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); + minMN = libhqr_imin(qrtree->nt, qrtree->mt); + for (k = 0; k < minMN; k++){ + for (i = 0; i < minMN; i++){ + tile_id = (k * qrtree->mt) + i; + arg[tile_id].currpiv = qrtree->currpiv(qrtree, k, i); + p = arg[tile_id].currpiv; + arg[tile_id].first_nextpiv = qrtree->nextpiv(qrtree, k, i, qrtree->mt); + arg[tile_id].first_prevpiv = qrtree->prevpiv(qrtree, k, i, i); + arg[tile_id].type = qrtree->gettype(qrtree, k, i); + arg[tile_id].nextpiv = qrtree->nextpiv(qrtree, k, p, i); + arg[tile_id].prevpiv = qrtree->prevpiv(qrtree, k, p, i); + } } + qrtree->args = (void*)arg; }