timing_zauxiliary.c 12.8 KB
Newer Older
1
/**
2 3
 *
 * @file timing_zauxiliary.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @version 1.0.0
13 14
 * @precisions normal z -> c d s
 *
15
 */
16 17 18 19
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
20
#include <chameleon.h>
21 22 23
#include <coreblas/cblas.h>
#include <coreblas/lapacke.h>
#include <coreblas.h>
24
#include "timing_zauxiliary.h"
25 26 27 28 29

/*-------------------------------------------------------------------
 * Check the orthogonality of Q
 */

30
int z_check_orthogonality(int M, int N, int LDQ, CHAMELEON_Complex64_t *Q)
31 32 33 34 35
{
    double alpha, beta;
    double normQ;
    int info_ortho;
    int i;
36
    int minMN = chameleon_min(M, N);
37 38 39 40 41 42 43 44
    double eps;
    double *work = (double *)malloc(minMN*sizeof(double));

    eps = LAPACKE_dlamch_work('e');
    alpha = 1.0;
    beta  = -1.0;

    /* Build the idendity matrix USE DLASET?*/
45 46
    CHAMELEON_Complex64_t *Id = (CHAMELEON_Complex64_t *) malloc(minMN*minMN*sizeof(CHAMELEON_Complex64_t));
    memset((void*)Id, 0, minMN*minMN*sizeof(CHAMELEON_Complex64_t));
47
    for (i = 0; i < minMN; i++)
48
        Id[i*minMN+i] = (CHAMELEON_Complex64_t)1.0;
49 50 51 52 53 54 55

    /* Perform Id - Q'Q */
    if (M >= N)
        cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N);
    else
        cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);

56
    normQ = LAPACKE_zlansy_work(LAPACK_COL_MAJOR, 'I', 'u', minMN, Id, minMN, work);
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

    printf("============\n");
    printf("Checking the orthogonality of Q \n");
    printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));

    if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) {
        printf("-- Orthogonality is suspicious ! \n");
        info_ortho=1;
    }
    else {
        printf("-- Orthogonality is CORRECT ! \n");
        info_ortho=0;
    }

    free(work); free(Id);

    return info_ortho;
}

/*------------------------------------------------------------
 *  Check the factorization QR
 */

80
int z_check_QRfactorization(int M, int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA, CHAMELEON_Complex64_t *Q)
81 82
{
    double Anorm, Rnorm;
83
    CHAMELEON_Complex64_t alpha, beta;
84 85 86 87 88 89
    int info_factorization;
    int i,j;
    double eps;

    eps = LAPACKE_dlamch_work('e');

90 91
    CHAMELEON_Complex64_t *Ql       = (CHAMELEON_Complex64_t *)malloc(M*N*sizeof(CHAMELEON_Complex64_t));
    CHAMELEON_Complex64_t *Residual = (CHAMELEON_Complex64_t *)malloc(M*N*sizeof(CHAMELEON_Complex64_t));
92
    double *work              = (double *)malloc(chameleon_max(M,N)*sizeof(double));
93 94 95 96 97 98

    alpha=1.0;
    beta=0.0;

    if (M >= N) {
        /* Extract the R */
99 100
        CHAMELEON_Complex64_t *R = (CHAMELEON_Complex64_t *)malloc(N*N*sizeof(CHAMELEON_Complex64_t));
        memset((void*)R, 0, N*N*sizeof(CHAMELEON_Complex64_t));
101 102 103
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);

        /* Perform Ql=Q*R */
104
        memset((void*)Ql, 0, M*N*sizeof(CHAMELEON_Complex64_t));
105 106 107 108 109
        cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M);
        free(R);
    }
    else {
        /* Extract the L */
110 111
        CHAMELEON_Complex64_t *L = (CHAMELEON_Complex64_t *)malloc(M*M*sizeof(CHAMELEON_Complex64_t));
        memset((void*)L, 0, M*M*sizeof(CHAMELEON_Complex64_t));
112 113 114
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);

    /* Perform Ql=LQ */
115
        memset((void*)Ql, 0, M*N*sizeof(CHAMELEON_Complex64_t));
116 117 118 119 120 121 122 123 124
        cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M);
        free(L);
    }

    /* Compute the Residual */
    for (i = 0; i < M; i++)
        for (j = 0 ; j < N; j++)
            Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];

125 126
    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Residual, M, work);
    Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, A2, LDA, work);
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156

    if (M >= N) {
        printf("============\n");
        printf("Checking the QR Factorization \n");
        printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }
    else {
        printf("============\n");
        printf("Checking the LQ Factorization \n");
        printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }

    if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else {
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(work); free(Ql); free(Residual);

    return info_factorization;
}

/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */

157
int z_check_LLTfactorization(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA, cham_uplo_t uplo)
158 159
{
    double Anorm, Rnorm;
160
    CHAMELEON_Complex64_t alpha;
161 162 163 164 165 166
    int info_factorization;
    int i,j;
    double eps;

    eps = LAPACKE_dlamch_work('e');

167 168 169
    CHAMELEON_Complex64_t *Residual = (CHAMELEON_Complex64_t *)malloc(N*N*sizeof(CHAMELEON_Complex64_t));
    CHAMELEON_Complex64_t *L1       = (CHAMELEON_Complex64_t *)malloc(N*N*sizeof(CHAMELEON_Complex64_t));
    CHAMELEON_Complex64_t *L2       = (CHAMELEON_Complex64_t *)malloc(N*N*sizeof(CHAMELEON_Complex64_t));
170 171
    double *work              = (double *)malloc(N*sizeof(double));

172 173
    memset((void*)L1, 0, N*N*sizeof(CHAMELEON_Complex64_t));
    memset((void*)L2, 0, N*N*sizeof(CHAMELEON_Complex64_t));
174 175 176 177 178 179

    alpha= 1.0;

    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);

    /* Dealing with L'L or U'U  */
180
    if (uplo == ChamUpper){
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
        cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);
    }
    else{
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
        cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N);
    }

    /* Compute the Residual || A -L'L|| */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
           Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];

196 197
    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', N, N, Residual, N, work);
    Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', N, N, A1, LDA, work);
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219

    printf("============\n");
    printf("Checking the Cholesky Factorization \n");
    printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));

    if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else{
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(Residual); free(L1); free(L2); free(work);

    return info_factorization;
}

/*--------------------------------------------------------------
 * Check the gemm
 */
220 221 222
double z_check_gemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int K,
                   CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA,
                   CHAMELEON_Complex64_t *B, int LDB,
Mathieu Faverge's avatar
Mathieu Faverge committed
223
                   CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *Ccham,
224
                   CHAMELEON_Complex64_t *Cref, int LDC,
Mathieu Faverge's avatar
Mathieu Faverge committed
225
                   double *Cinitnorm, double *Cchamnorm, double *Clapacknorm )
226
{
227
    CHAMELEON_Complex64_t beta_const = -1.0;
228
    double Rnorm;
229
    double *work = (double *)malloc(chameleon_max(K,chameleon_max(M, N))* sizeof(double));
230

231
    *Cinitnorm   = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref,    LDC, work);
Mathieu Faverge's avatar
Mathieu Faverge committed
232
    *Cchamnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Ccham, LDC, work);
233 234 235 236

    cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
                CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);

237
    *Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work);
238

Mathieu Faverge's avatar
Mathieu Faverge committed
239
    cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Ccham, 1, Cref, 1);
240

241
    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work);
242 243 244 245 246 247 248 249 250

    free(work);

    return Rnorm;
}

/*--------------------------------------------------------------
 * Check the trsm
 */
251 252 253
double z_check_trsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag,
                   int M, int NRHS, CHAMELEON_Complex64_t alpha,
                   CHAMELEON_Complex64_t *A, int LDA,
Mathieu Faverge's avatar
Mathieu Faverge committed
254 255
                   CHAMELEON_Complex64_t *Bcham, CHAMELEON_Complex64_t *Bref, int LDB,
                   double *Binitnorm, double *Bchamnorm, double *Blapacknorm )
256
{
257
    CHAMELEON_Complex64_t beta_const = -1.0;
258
    double Rnorm;
259
    double *work = (double *)malloc(chameleon_max(M, NRHS)* sizeof(double));
260 261
    /*double eps = LAPACKE_dlamch_work('e');*/

262 263
    *Binitnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'i', M, NRHS, Bref,  LDB, work );
    *Bchamnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'i', M, NRHS, Bcham, LDB, work );
264

265 266 267
    cblas_ztrsm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
                 (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, NRHS,
                 CBLAS_SADDR(alpha), A, LDA, Bref, LDB );
268

269
    *Blapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work );
270

271
    cblas_zaxpy( LDB * NRHS, CBLAS_SADDR(beta_const), Bcham, 1, Bref, 1 );
272

273
    Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work );
274
    Rnorm = Rnorm / *Blapacknorm;
275
    /* chameleon_max(M,NRHS) * eps);*/
276 277 278 279 280 281 282 283 284 285

    free(work);

    return Rnorm;
}

/*--------------------------------------------------------------
 * Check the solution
 */

286 287
double z_check_solution(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA,
                      CHAMELEON_Complex64_t *B,  CHAMELEON_Complex64_t *X, int LDB,
288 289 290 291
                      double *anorm, double *bnorm, double *xnorm )
{
/*     int info_solution; */
    double Rnorm = -1.00;
292 293
    CHAMELEON_Complex64_t zone  =  1.0;
    CHAMELEON_Complex64_t mzone = -1.0;
294
    double *work = (double *)malloc(chameleon_max(M, N)* sizeof(double));
295

296 297 298
    *anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, N,    A, LDA, work);
    *xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', M, NRHS, X, LDB, work);
    *bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', N, NRHS, B, LDB, work);
299

300 301 302
    cblas_zgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N,
                 CBLAS_SADDR(zone),  A, LDA, X, LDB,
                 CBLAS_SADDR(mzone), B, LDB );
303

304
    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'I', N, NRHS, B, LDB, work);
305 306 307 308 309 310 311 312 313 314

    free(work);

    return Rnorm;
}

/*------------------------------------------------------------------------
 *  *  Check the accuracy of the computed inverse
 *   */

315 316
int zcheck_inverse(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA,
                        cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm )
317 318 319 320
{
    int info_inverse;
    int i, j;
    double result;
321 322
    CHAMELEON_Complex64_t alpha, beta, zone;
    CHAMELEON_Complex64_t *workz = (CHAMELEON_Complex64_t *)malloc(N*N*sizeof(CHAMELEON_Complex64_t));
323 324 325 326 327 328 329 330 331 332
    double *workd = (double *)malloc(N*sizeof(double));
    double eps;

    eps = LAPACKE_dlamch_work('e');

    alpha = -1.0;
    beta  = 0.0;
    zone = 1.0;

    /* Rebuild the other part of the inverse matrix */
333
    if(uplo == ChamUpper){
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
       for(j=0; j<N; j++)
          for(i=0; i<j; i++)
             *(A2+j+i*LDA) = *(A2+i+j*LDA);
       cblas_zhemm(CblasColMajor, CblasLeft, CblasUpper, N, N, CBLAS_SADDR(alpha), A2, LDA, A1, LDA, CBLAS_SADDR(beta), workz, N);

    }
    else {
       for(j=0; j<N; j++)
          for(i=j; i<N; i++)
             *(A2+j+i*LDA) = *(A2+i+j*LDA);
       cblas_zhemm(CblasColMajor, CblasLeft, CblasLower, N, N, CBLAS_SADDR(alpha), A2, LDA, A1, LDA, CBLAS_SADDR(beta), workz, N);
    }

    /* Add the identity matrix to workz */
     for(i=0; i<N; i++)
        *(workz+i+i*N) = *(workz+i+i*N) + zone;


352 353 354
    *rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'O', N, N,    workz, N, workd);
    *anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'O', N, N, A1, LDA, workd);
    *ainvnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 'O', N, N, A2, LDA, workd);
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370


      result = *rnorm / ( ((*anorm) * (*ainvnorm))*N*eps ) ;

    if (  isnan(*ainvnorm) || isinf(*ainvnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
        info_inverse = 1;
     }
    else{
        info_inverse = 0;
    }

    free(workz);
    free(workd);

    return info_inverse;
}