sequential_zhetrf.c 4.64 KB
Newer Older
1
2
/**
 *
3
 * @file sopalin_zhetrf.c
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *
 *  PaStiX factorization routines
 *  PaStiX is a software package provided by Inria Bordeaux - Sud-Ouest,
 *  LaBRI, University of Bordeaux 1 and IPB.
 *
 * @version 5.1.0
 * @author Pascal Henon
 * @author Xavier Lacoste
 * @author Pierre Ramet
 * @author Mathieu Faverge
 * @date 2013-06-24
 *
 * @precisions normal z -> c
 *
 **/
#include "common.h"
20
#include "isched.h"
21
#include "solver.h"
22
#include "sopalin_data.h"
Mathieu Faverge's avatar
Mathieu Faverge committed
23
#include "sopalin/coeftab_z.h"
24
25
#include "pastix_zcores.h"

26
#if defined(PASTIX_WITH_PARSEC)
27
28
29
#include <parsec.h>
#include <parsec/data.h>
#include <parsec/data_distribution.h>
30
31
#endif

32
void
33
34
sequential_zhetrf( pastix_data_t  *pastix_data,
                   sopalin_data_t *sopalin_data )
35
{
36
37
38
39
    SolverMatrix       *datacode = pastix_data->solvmatr;
    SolverCblk         *cblk;
    double              threshold = sopalin_data->diagthreshold;
    pastix_complex64_t *work1, *work2;
40
    pastix_int_t  N, i;
41
    (void)pastix_data;
42

43
44
    MALLOC_INTERN( work1, pastix_imax(datacode->gemmmax, datacode->diagmax),
                   pastix_complex64_t );
45
46
    MALLOC_INTERN( work2, pastix_imax(datacode->gemmmax, datacode->arftmax),
                   pastix_complex64_t );
47

48
49
    cblk = datacode->cblktab;
    for (i=0; i<datacode->cblknbr; i++, cblk++){
50
51
52
53

        if ( cblk->cblktype & CBLK_IN_SCHUR )
            break;

54
55
        N = cblk_colnbr( cblk );

56
        /* Compute */
57
        cpucblk_zhetrfsp1d( datacode, cblk, threshold,
58
59
60
61
62
63
                            /*
                             * Workspace size has been computed without the
                             * diagonal block, thus in order to work with generic
                             * TRSM and GEMM kernels, we must shift the DLh workspace
                             * by the diagonal block size
                             */
64
                            work1 - (N*N), work2 );
65
    }
66

67
68
    memFree_null( work1 );
    memFree_null( work2 );
69
}
70
71

void
RAMET Pierre's avatar
RAMET Pierre committed
72
thread_pzhetrf( isched_thread_t *ctx, void *args )
73
{
74
75
76
77
78
    sopalin_data_t     *sopalin_data = (sopalin_data_t*)args;
    SolverMatrix       *datacode = sopalin_data->solvmtx;
    SolverCblk         *cblk;
    Task               *t;
    pastix_complex64_t *work1, *work2;
79
    pastix_int_t  N, i, ii;
80
    pastix_int_t  tasknbr, *tasktab;
RAMET Pierre's avatar
RAMET Pierre committed
81
    int rank = ctx->rank;
82

83
84
85
86
    MALLOC_INTERN( work1, pastix_imax(datacode->gemmmax, datacode->diagmax),
                   pastix_complex64_t );
    MALLOC_INTERN( work2, datacode->gemmmax, pastix_complex64_t );

87
88
89
90
91
92
93
94
    tasknbr = datacode->ttsknbr[rank];
    tasktab = datacode->ttsktab[rank];

    for (ii=0; ii<tasknbr; ii++) {
        i = tasktab[ii];
        t = datacode->tasktab + i;
        cblk = datacode->cblktab + t->cblknum;

95
96
97
        if ( cblk->cblktype & CBLK_IN_SCHUR )
            continue;

98
99
        N = cblk_colnbr( cblk );

100
101
102
        /* Wait */
        do {} while( cblk->ctrbcnt );

103
        /* Compute */
104
        cpucblk_zhetrfsp1d( datacode, cblk, sopalin_data->diagthreshold,
105
106
107
108
109
110
                            /*
                             * Workspace size has been computed without the
                             * diagonal block, thus in order to work with generic
                             * TRSM and GEMM kernels, we must shift the DLh workspace
                             * by the diagonal block size
                             */
111
                            work1 - (N*N), work2 );
112
113
    }

114
115
    memFree_null( work1 );
    memFree_null( work2 );
116
117
118
}

void
119
120
thread_zhetrf( pastix_data_t  *pastix_data,
               sopalin_data_t *sopalin_data )
121
{
122
    isched_parallel_call( pastix_data->isched, thread_pzhetrf, sopalin_data );
123
124
}

125
#if defined(PASTIX_WITH_PARSEC) && 0
126
127
128
129
void
parsec_zhetrf( pastix_data_t  *pastix_data,
               sopalin_data_t *sopalin_data )
{
130
    parsec_context_t *ctx;
131
132
133

    /* Start PaRSEC */
    if (pastix_data->parsec == NULL) {
134
135
        int argc = 0;
        pastix_parsec_init( pastix_data, &argc, NULL );
136
137
138
139
    }
    ctx = pastix_data->parsec;

    /* Run the facto */
140
    dsparse_zhetrf_sp( ctx, sopalin_data->solvmtx->parsec_desc, sopalin_data );
141
142
143
144
}
#endif

static void (*zhetrf_table[4])(pastix_data_t *, sopalin_data_t *) = {
145
146
    sequential_zhetrf,
    thread_zhetrf,
147
#if defined(PASTIX_WITH_PARSEC) && 0
148
149
    parsec_zhetrf,
#else
150
    NULL,
151
#endif
152
153
154
155
    NULL
};

void
156
157
sopalin_zhetrf( pastix_data_t  *pastix_data,
                sopalin_data_t *sopalin_data )
158
{
159
160
    int sched = pastix_data->iparm[IPARM_SCHEDULER];
    void (*zhetrf)(pastix_data_t *, sopalin_data_t *) = zhetrf_table[ sched ];
161
162
163
164

    if (zhetrf == NULL) {
        zhetrf = thread_zhetrf;
    }
165
    zhetrf( pastix_data, sopalin_data );
166
167
168
169

#if defined(PASTIX_DEBUG_FACTO)
    coeftab_zdump( pastix_data, sopalin_data->solvmtx, "hetrf.txt" );
#endif
170
}