time_zgetri_tile.c 8.66 KB
Newer Older
1
/**
2 3
 *
 * @file time_zgetri_tile.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @version 1.0.0
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
 * @precisions normal z -> c d s
 *
 **/
#define _TYPE  MORSE_Complex64_t
#define _PREC  double
#define _LAMCH LAPACKE_dlamch_work

#define _NAME  "MORSE_zgetri_Tile"
/* See Lawn 41 page 120 */
#define _FMULS (FMULS_GETRF(M, N) + FMULS_GETRI( N ))
#define _FADDS (FADDS_GETRF(M, N) + FADDS_GETRI( N ))

//#define GETRI_SYNC

#include "./timing.c"

/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
#if 0
static int check_getri_factorization(MORSE_desc_t *descA1, MORSE_desc_t *descA2, int *IPIV)
{
    int info_factorization;
    double Rnorm, Anorm, Xnorm, Bnorm, result;
    double *work = (double *)malloc((descA1->m)*sizeof(double));
    double eps = LAPACKE_dlamch_work('e');
    MORSE_desc_t        *descB, *descX;
    MORSE_Complex64_t *b = (MORSE_Complex64_t *)malloc((descA1->m)*sizeof(MORSE_Complex64_t));
    MORSE_Complex64_t *x = (MORSE_Complex64_t *)malloc((descA1->m)*sizeof(MORSE_Complex64_t));

43 44 45 46
    MORSE_Desc_Create(&descB, b, MorseComplexDouble, descA1->mb, descA1->nb, descA1->bsiz,
                      descA1->m, 1, 0, 0, descA1->m, 1, 1, 1);
    MORSE_Desc_Create(&descX, x, MorseComplexDouble, descA1->mb, descA1->nb, descA1->bsiz,
                      descA1->m, 1, 0, 0, descA1->m, 1, 1, 1);
47 48 49 50 51 52 53 54 55 56 57

    MORSE_zplrnt_Tile( descX, 537 );
    MORSE_zlacpy_Tile( MorseUpperLower, descX, descB);

    MORSE_zgetrs_Tile( MorseNoTrans, descA2, IPIV, descX );

    Xnorm = MORSE_zlange_Tile(MorseInfNorm, descX,  work);
    Anorm = MORSE_zlange_Tile(MorseInfNorm, descA1, work);
    Bnorm = MORSE_zlange_Tile(MorseInfNorm, descB,  work);

    MORSE_zgemm_Tile( MorseNoTrans, MorseNoTrans,
58
                       (MORSE_Complex64_t)1.,  descA1, descX,
59
                       (MORSE_Complex64_t)-1., descB);
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    Rnorm = MORSE_zlange_Tile(MorseInfNorm, descB, work);

    if (getenv("MORSE_TESTING_VERBOSE"))
      printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );

    result = Rnorm / ( (Anorm*Xnorm+Bnorm)*(descA1->m)*eps ) ;
    printf("============\n");
    printf("Checking the Residual of the solution \n");
    printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);

    if (  isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- The factorization is suspicious ! \n");
        info_factorization = 1;
     }
    else{
        printf("-- The factorization is CORRECT ! \n");
        info_factorization = 0;
    }
    free(x); free(b); free(work);
    MORSE_Desc_Destroy(&descB);
    MORSE_Desc_Destroy(&descX);

    return info_factorization;
}
#endif

/*------------------------------------------------------------------------
 *  Check the accuracy of the computed inverse
 */

static int check_getri_inverse(MORSE_desc_t *descA1, MORSE_desc_t *descA2, int *IPIV, double *dparam )
{
    double Rnorm, Anorm, Ainvnorm, result;
    double *W = (double *)malloc(descA1->n*sizeof(double));
    MORSE_Complex64_t *work = (MORSE_Complex64_t *)malloc(descA1->n*descA1->n*sizeof(MORSE_Complex64_t));
    double eps = LAPACKE_dlamch_work('e');
    MORSE_desc_t        *descW;

99
    MORSE_Desc_Create(&descW, work, MorseComplexDouble,  descA1->mb, descA1->nb, descA1->bsiz,
100
                       descA1->m, descA1->n, 0, 0, descA1->m, descA1->n);
101

102
    MORSE_zlaset_Tile( MorseUpperLower, (MORSE_Complex64_t)0., (MORSE_Complex64_t)1., descW);
103 104
    MORSE_zgemm_Tile( MorseNoTrans, MorseNoTrans,
                       (MORSE_Complex64_t)-1., descA2, descA1,
105 106 107 108 109
                       (MORSE_Complex64_t)1.,  descW);

    Anorm    = MORSE_zlange_Tile(MorseInfNorm, descA1, W);
    Ainvnorm = MORSE_zlange_Tile(MorseInfNorm, descA2, W);
    Rnorm    = MORSE_zlange_Tile(MorseInfNorm, descW,  W);
110 111 112

    dparam[IPARAM_ANORM] = Anorm;
    dparam[IPARAM_BNORM] = Ainvnorm;
113 114 115 116 117

    result = Rnorm / ( (Anorm*Ainvnorm)*descA1->m*eps ) ;
    dparam[IPARAM_RES] = Rnorm;

    if (  isnan(Ainvnorm) || isinf(Ainvnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
118
        dparam[IPARAM_XNORM] = -1.;
119 120 121 122 123 124 125 126 127 128 129 130 131
    }
    else{
        dparam[IPARAM_XNORM] = 0.;
    }

    MORSE_Desc_Destroy(&descW);
    free(W);
    free(work);

    return MORSE_SUCCESS;
}

static int
132
RunTest(int *iparam, double *dparam, morse_time_t *t_)
133 134 135 136
{
    MORSE_desc_t descW;
    int ret = 0;
    PASTE_CODE_IPARAM_LOCALS( iparam );
137

138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
    if ( M != N ) {
        fprintf(stderr, "This timing works only with M == N\n");
        return -1;
    }

    /* Allocate Data */
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA,      1, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N );
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA2, check, MORSE_Complex64_t, MorseComplexDouble, LDA, N, N );
    PASTE_CODE_ALLOCATE_MATRIX( piv, 1, int, N, 1 );

    MORSE_Alloc_Workspace_zgetri_Tile_Async(descA, &descW);
    MORSE_zplrnt_Tile( descA, 3453 );

    if ( check ) {
        MORSE_zlacpy_Tile( MorseUpperLower, descA, descA2 );
    }

    /* MORSE ZGETRF / ZTRTRI / ZTRSMRV  */
    {
#if defined(TRACE_BY_SEQUENCE)
158 159 160 161 162 163 164
        MORSE_sequence_t *sequence;
        MORSE_request_t request[4] = { MORSE_REQUEST_INITIALIZER,
                                       MORSE_REQUEST_INITIALIZER,
                                       MORSE_REQUEST_INITIALIZER,
                                       MORSE_REQUEST_INITIALIZER };

        MORSE_Sequence_Create(&sequence);
165 166

        if ( ! iparam[IPARAM_ASYNC] ) {
167

168
            START_TIMING();
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
            MORSE_zgetrf_Tile_Async( descA, piv, sequence, &request[0] );
            MORSE_Sequence_Wait(sequence);

            MORSE_ztrtri_Tile_Async( MorseUpper, MorseNonUnit, descA, sequence, &request[1] );
            MORSE_Sequence_Wait(sequence);

            MORSE_ztrsmrv_Tile_Async( MorseRight, MorseLower, MorseNoTrans, MorseUnit,
                                      (MORSE_Complex64_t) 1.0, descA, &descW,
                                      sequence, &request[2] );
            MORSE_Sequence_Wait(sequence);

            MORSE_zlaswpc_Tile_Async( descA, 1, descA->m, piv, -1,
                                      sequence, &request[3] );
            MORSE_Sequence_Wait(sequence);
            MORSE_Desc_Flush( descA, sequence );
184 185 186 187 188
            STOP_TIMING();

        } else {

            START_TIMING();
189 190 191 192 193 194 195 196 197
            MORSE_zgetrf_Tile_Async( descA, piv, sequence, &request[0]);
            MORSE_ztrtri_Tile_Async( MorseUpper, MorseNonUnit,
                                     descA, sequence, &request[1] );
            MORSE_ztrsmrv_Tile_Async( MorseRight, MorseLower, MorseNoTrans, MorseUnit,
                                      (MORSE_Complex64_t) 1.0,
                                      descA, &descW, sequence, &request[2] );
            MORSE_zlaswpc_Tile_Async( descA, 1, descA->m, piv, -1,
                                      sequence, &request[3] );

198
            /* Wait for everything */
199 200
            MORSE_Sequence_Wait( sequence );
            MORSE_Desc_Flush( descA, sequence );
201
            STOP_TIMING();
202

203 204 205 206 207 208
        }

        MORSE_Sequence_Destroy(sequence[0]);
        MORSE_Sequence_Destroy(sequence[1]);
        MORSE_Sequence_Destroy(sequence[2]);
        MORSE_Sequence_Destroy(sequence[3]);
209

210 211 212 213 214 215
#else
        if ( ! iparam[IPARAM_ASYNC] ) {

            START_TIMING();
            MORSE_zgetrf_Tile(descA, piv);
            MORSE_ztrtri_Tile(MorseUpper, MorseNonUnit, descA);
216
            MORSE_ztrsmrv_Tile(MorseRight, MorseLower, MorseNoTrans, MorseUnit,
217 218 219 220 221 222 223
                                (MORSE_Complex64_t) 1.0, descA, &descW);
            MORSE_zlaswpc_Tile(descA, 1, descA->m, piv, -1);
            STOP_TIMING();

        } else {

            MORSE_sequence_t *sequence;
224
            MORSE_request_t request[2] = { MORSE_REQUEST_INITIALIZER,
225 226 227
                                          MORSE_REQUEST_INITIALIZER };

            MORSE_Sequence_Create(&sequence);
228

229 230 231 232
            START_TIMING();
            MORSE_zgetrf_Tile_Async(descA, piv, sequence, &request[0]);
            MORSE_zgetri_Tile_Async(descA, piv, &descW, sequence, &request[1]);
            MORSE_Sequence_Wait(sequence);
233
            MORSE_Desc_Flush( descA, sequence );
234
            STOP_TIMING();
235 236

            MORSE_Sequence_Destroy(sequence);
237 238 239
        }
#endif
    }
240

241 242 243 244
    /* Check the solution */
    if ( check )
    {
        ret = check_getri_inverse(descA2, descA, piv, dparam);
245

246 247 248 249 250 251 252 253 254
        PASTE_CODE_FREE_MATRIX( descA2 );
    }

    PASTE_CODE_FREE_MATRIX( descA );
    free(descW.mat);
    free( piv );

    return ret;
}