time_zgetri_tile.c 8.99 KB
Newer Older
1
/**
2 3
 *
 * @file time_zgetri_tile.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
PRUVOST Florent's avatar
PRUVOST Florent committed
7
 * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
PRUVOST Florent's avatar
PRUVOST Florent committed
12 13 14
 * @version 0.9.2
 * @author Mathieu Faverge
 * @date 2014-11-16
15 16
 * @precisions normal z -> c d s
 *
17
 */
18
#define _TYPE  CHAMELEON_Complex64_t
19 20 21
#define _PREC  double
#define _LAMCH LAPACKE_dlamch_work

22
#define _NAME  "CHAMELEON_zgetri_Tile"
23 24 25 26 27 28 29 30 31 32 33 34
/* See Lawn 41 page 120 */
#define _FMULS (FMULS_GETRF(M, N) + FMULS_GETRI( N ))
#define _FADDS (FADDS_GETRF(M, N) + FADDS_GETRI( N ))

//#define GETRI_SYNC

#include "./timing.c"

/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
#if 0
35
static int check_getri_factorization(CHAM_desc_t *descA1, CHAM_desc_t *descA2, int *IPIV)
36 37 38 39 40
{
    int info_factorization;
    double Rnorm, Anorm, Xnorm, Bnorm, result;
    double *work = (double *)malloc((descA1->m)*sizeof(double));
    double eps = LAPACKE_dlamch_work('e');
41 42 43
    CHAM_desc_t        *descB, *descX;
    CHAMELEON_Complex64_t *b = (CHAMELEON_Complex64_t *)malloc((descA1->m)*sizeof(CHAMELEON_Complex64_t));
    CHAMELEON_Complex64_t *x = (CHAMELEON_Complex64_t *)malloc((descA1->m)*sizeof(CHAMELEON_Complex64_t));
44

45
    CHAMELEON_Desc_Create(&descB, b, ChamComplexDouble, descA1->mb, descA1->nb, descA1->bsiz,
46
                      descA1->m, 1, 0, 0, descA1->m, 1, 1, 1);
47
    CHAMELEON_Desc_Create(&descX, x, ChamComplexDouble, descA1->mb, descA1->nb, descA1->bsiz,
48
                      descA1->m, 1, 0, 0, descA1->m, 1, 1, 1);
49

50 51
    CHAMELEON_zplrnt_Tile( descX, 537 );
    CHAMELEON_zlacpy_Tile( ChamUpperLower, descX, descB);
52

53
    CHAMELEON_zgetrs_Tile( ChamNoTrans, descA2, IPIV, descX );
54

55 56 57
    Xnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descX,  work);
    Anorm = CHAMELEON_zlange_Tile(ChamInfNorm, descA1, work);
    Bnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descB,  work);
58

59 60 61
    CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans,
                       (CHAMELEON_Complex64_t)1.,  descA1, descX,
                       (CHAMELEON_Complex64_t)-1., descB);
62

63
    Rnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descB, work);
64

65
    if (getenv("CHAMELEON_TESTING_VERBOSE"))
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
      printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );

    result = Rnorm / ( (Anorm*Xnorm+Bnorm)*(descA1->m)*eps ) ;
    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);

    if (  isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- The factorization is suspicious ! \n");
        info_factorization = 1;
     }
    else{
        printf("-- The factorization is CORRECT ! \n");
        info_factorization = 0;
    }
    free(x); free(b); free(work);
82 83
    CHAMELEON_Desc_Destroy(&descB);
    CHAMELEON_Desc_Destroy(&descX);
84 85 86 87 88 89 90 91 92

    return info_factorization;
}
#endif

/*------------------------------------------------------------------------
 *  Check the accuracy of the computed inverse
 */

93
static int check_getri_inverse(CHAM_desc_t *descA1, CHAM_desc_t *descA2, int *IPIV, double *dparam )
94 95 96
{
    double Rnorm, Anorm, Ainvnorm, result;
    double *W = (double *)malloc(descA1->n*sizeof(double));
97
    CHAMELEON_Complex64_t *work = (CHAMELEON_Complex64_t *)malloc(descA1->n*descA1->n*sizeof(CHAMELEON_Complex64_t));
98
    double eps = LAPACKE_dlamch_work('e');
99
    CHAM_desc_t        *descW;
100

101
    CHAMELEON_Desc_Create(&descW, work, ChamComplexDouble,  descA1->mb, descA1->nb, descA1->bsiz,
102
                       descA1->m, descA1->n, 0, 0, descA1->m, descA1->n);
103

104 105 106 107
    CHAMELEON_zlaset_Tile( ChamUpperLower, (CHAMELEON_Complex64_t)0., (CHAMELEON_Complex64_t)1., descW);
    CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans,
                       (CHAMELEON_Complex64_t)-1., descA2, descA1,
                       (CHAMELEON_Complex64_t)1.,  descW);
108

109 110 111
    Anorm    = CHAMELEON_zlange_Tile(ChamInfNorm, descA1, W);
    Ainvnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descA2, W);
    Rnorm    = CHAMELEON_zlange_Tile(ChamInfNorm, descW,  W);
112 113 114

    dparam[IPARAM_ANORM] = Anorm;
    dparam[IPARAM_BNORM] = Ainvnorm;
115 116 117 118 119

    result = Rnorm / ( (Anorm*Ainvnorm)*descA1->m*eps ) ;
    dparam[IPARAM_RES] = Rnorm;

    if (  isnan(Ainvnorm) || isinf(Ainvnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
120
        dparam[IPARAM_XNORM] = -1.;
121 122 123 124 125
    }
    else{
        dparam[IPARAM_XNORM] = 0.;
    }

126
    CHAMELEON_Desc_Destroy(&descW);
127 128 129
    free(W);
    free(work);

130
    return CHAMELEON_SUCCESS;
131 132 133
}

static int
Mathieu Faverge's avatar
Mathieu Faverge committed
134
RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
135
{
136
    CHAM_desc_t descW;
137 138
    int ret = 0;
    PASTE_CODE_IPARAM_LOCALS( iparam );
139

140 141 142 143 144 145
    if ( M != N ) {
        fprintf(stderr, "This timing works only with M == N\n");
        return -1;
    }

    /* Allocate Data */
146 147
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA,      1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA2, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
148 149
    PASTE_CODE_ALLOCATE_MATRIX( piv, 1, int, N, 1 );

150 151
    CHAMELEON_Alloc_Workspace_zgetri_Tile_Async(descA, &descW);
    CHAMELEON_zplrnt_Tile( descA, 3453 );
152 153

    if ( check ) {
154
        CHAMELEON_zlacpy_Tile( ChamUpperLower, descA, descA2 );
155 156
    }

157
    /* CHAMELEON ZGETRF / ZTRTRI / ZTRSMRV  */
158 159
    {
#if defined(TRACE_BY_SEQUENCE)
160 161 162 163 164
        RUNTIME_sequence_t *sequence;
        RUNTIME_request_t request[4] = { RUNTIME_REQUEST_INITIALIZER,
                                         RUNTIME_REQUEST_INITIALIZER,
                                         RUNTIME_REQUEST_INITIALIZER,
                                         RUNTIME_REQUEST_INITIALIZER };
165

166
        CHAMELEON_Sequence_Create(&sequence);
167 168

        if ( ! iparam[IPARAM_ASYNC] ) {
169

170
            START_TIMING();
171 172
            CHAMELEON_zgetrf_Tile_Async( descA, piv, sequence, &request[0] );
            CHAMELEON_Sequence_Wait(sequence);
173

174 175
            CHAMELEON_ztrtri_Tile_Async( ChamUpper, ChamNonUnit, descA, sequence, &request[1] );
            CHAMELEON_Sequence_Wait(sequence);
176

177 178
            CHAMELEON_ztrsmrv_Tile_Async( ChamRight, ChamLower, ChamNoTrans, ChamUnit,
                                      (CHAMELEON_Complex64_t) 1.0, descA, &descW,
179
                                      sequence, &request[2] );
180
            CHAMELEON_Sequence_Wait(sequence);
181

182
            CHAMELEON_zlaswpc_Tile_Async( descA, 1, descA->m, piv, -1,
183
                                      sequence, &request[3] );
184 185
            CHAMELEON_Desc_Flush( descA, sequence );
            CHAMELEON_Sequence_Wait(sequence);
186 187 188 189 190
            STOP_TIMING();

        } else {

            START_TIMING();
191 192
            CHAMELEON_zgetrf_Tile_Async( descA, piv, sequence, &request[0]);
            CHAMELEON_ztrtri_Tile_Async( ChamUpper, ChamNonUnit,
193
                                     descA, sequence, &request[1] );
194 195
            CHAMELEON_ztrsmrv_Tile_Async( ChamRight, ChamLower, ChamNoTrans, ChamUnit,
                                      (CHAMELEON_Complex64_t) 1.0,
196
                                      descA, &descW, sequence, &request[2] );
197
            CHAMELEON_zlaswpc_Tile_Async( descA, 1, descA->m, piv, -1,
198 199
                                      sequence, &request[3] );

200
            /* Wait for everything */
201 202
            CHAMELEON_Desc_Flush( descA, sequence );
            CHAMELEON_Sequence_Wait( sequence );
203
            STOP_TIMING();
204

205 206
        }

207 208 209 210
        CHAMELEON_Sequence_Destroy(sequence[0]);
        CHAMELEON_Sequence_Destroy(sequence[1]);
        CHAMELEON_Sequence_Destroy(sequence[2]);
        CHAMELEON_Sequence_Destroy(sequence[3]);
211

212 213 214 215
#else
        if ( ! iparam[IPARAM_ASYNC] ) {

            START_TIMING();
216 217 218 219 220
            CHAMELEON_zgetrf_Tile(descA, piv);
            CHAMELEON_ztrtri_Tile(ChamUpper, ChamNonUnit, descA);
            CHAMELEON_ztrsmrv_Tile(ChamRight, ChamLower, ChamNoTrans, ChamUnit,
                                (CHAMELEON_Complex64_t) 1.0, descA, &descW);
            CHAMELEON_zlaswpc_Tile(descA, 1, descA->m, piv, -1);
221 222 223 224
            STOP_TIMING();

        } else {

225 226 227
            RUNTIME_sequence_t *sequence;
            RUNTIME_request_t request[2] = { RUNTIME_REQUEST_INITIALIZER,
                                             RUNTIME_REQUEST_INITIALIZER };
228

229
            CHAMELEON_Sequence_Create(&sequence);
230

231
            START_TIMING();
232 233 234 235
            CHAMELEON_zgetrf_Tile_Async(descA, piv, sequence, &request[0]);
            CHAMELEON_zgetri_Tile_Async(descA, piv, &descW, sequence, &request[1]);
            CHAMELEON_Desc_Flush( descA, sequence );
            CHAMELEON_Sequence_Wait(sequence);
236
            STOP_TIMING();
237

238
            CHAMELEON_Sequence_Destroy(sequence);
239 240 241
        }
#endif
    }
242

243 244 245 246
    /* Check the solution */
    if ( check )
    {
        ret = check_getri_inverse(descA2, descA, piv, dparam);
247

248 249 250 251 252 253 254 255 256
        PASTE_CODE_FREE_MATRIX( descA2 );
    }

    PASTE_CODE_FREE_MATRIX( descA );
    free(descW.mat);
    free( piv );

    return ret;
}