time_zgetrf_nopiv_tile.c 2.77 KB
Newer Older
1
/**
2 3
 *
 * @file time_zgetrf_nopiv_tile.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
PRUVOST Florent's avatar
PRUVOST Florent committed
7
 * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
PRUVOST Florent's avatar
PRUVOST Florent committed
12 13 14
 * @version 0.9.2
 * @author Mathieu Faverge
 * @date 2014-11-16
15 16
 * @precisions normal z -> c d s
 *
17
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
18
#define _TYPE  CHAMELEON_Complex64_t
19 20 21
#define _PREC  double
#define _LAMCH LAPACKE_dlamch_work

Mathieu Faverge's avatar
Mathieu Faverge committed
22
#define _NAME  "CHAMELEON_zgetrf_Tile"
23 24 25 26 27 28 29
/* See Lawn 41 page 120 */
#define _FMULS FMULS_GETRF(M, N)
#define _FADDS FADDS_GETRF(M, N)

#include "./timing.c"

static int
Mathieu Faverge's avatar
Mathieu Faverge committed
30
RunTest(int *iparam, double *dparam, chameleon_time_t *t_) 
31 32 33 34 35 36 37 38 39
{
    PASTE_CODE_IPARAM_LOCALS( iparam );

    if ( M != N && check ) {
        fprintf(stderr, "Check cannot be perfomed with M != N\n");
        check = 0;
    }

    /* Allocate Data */
Mathieu Faverge's avatar
Mathieu Faverge committed
40 41 42 43
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, N );
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descX,  check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, NRHS );
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, M, N    );
    PASTE_CODE_ALLOCATE_MATRIX_TILE( descB,  check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, M, NRHS );
44

Mathieu Faverge's avatar
Mathieu Faverge committed
45
    CHAMELEON_zplrnt_Tile(descA, 3456);
46 47 48

    /* Save A for check */
    if (check == 1){
Mathieu Faverge's avatar
Mathieu Faverge committed
49
        CHAMELEON_zlacpy_Tile(ChamUpperLower, descA, descAC);
50
    }
51

52 53 54 55 56 57
    /**
     * Consider this optimization on some heterogenous platforms and matrix sizes.
     * Often, TRSM kernel on GPU yields significantly less performance rate than GEMM,
     * while performances are similar on CPU. On this algorithm it is therefore
     * recommended to execute all TRSMs (~low amount) on CPU to increase GPU efficiency.
     */
Mathieu Faverge's avatar
Mathieu Faverge committed
58
    //RUNTIME_zlocality_onerestrict( CHAMELEON_TRSM, STARPU_CPU );
59

60
    START_TIMING();
Mathieu Faverge's avatar
Mathieu Faverge committed
61
    CHAMELEON_zgetrf_nopiv_Tile( descA );
62
    STOP_TIMING();
63

64 65 66
    /* Check the solution */
    if ( check )
    {
Mathieu Faverge's avatar
Mathieu Faverge committed
67 68
        CHAMELEON_zplrnt_Tile( descX, 7732 );
        CHAMELEON_zlacpy_Tile(ChamUpperLower, descX, descB);
69

Mathieu Faverge's avatar
Mathieu Faverge committed
70
        CHAMELEON_zgetrs_nopiv_Tile( descA, descX );
71

Mathieu Faverge's avatar
Mathieu Faverge committed
72 73 74 75 76
        dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descAC);
        dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descB);
        dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descX);
        CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB );
        dparam[IPARAM_RES] = CHAMELEON_zlange_Tile(ChamInfNorm, descB);
77 78 79 80 81 82 83 84 85
        PASTE_CODE_FREE_MATRIX( descX  );
        PASTE_CODE_FREE_MATRIX( descAC );
        PASTE_CODE_FREE_MATRIX( descB  );
    }

    PASTE_CODE_FREE_MATRIX( descA );
 
    return 0;
}