pzpotrimm.c 16.6 KB
Newer Older
1
/**
2 3
 *
 * @file pzpotrimm.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11 12 13 14 15 16
 *
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
17
 * @version 1.0.0
18 19 20 21
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Hatem Ltaief
 * @author Mathieu Faverge
22
 * @author Ali M Charara
23 24 25 26
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
 **/
27
#include "control/common.h"
28 29 30 31

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n
#define C(m,n) C,  m,  n
Mathieu Faverge's avatar
Mathieu Faverge committed
32
/*******************************************************************************
33 34 35
 *  Parallel tile Cholesky factorization - dynamic scheduling
 **/
void morse_pzpotrimm(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *C,
36
                     MORSE_sequence_t *sequence, MORSE_request_t *request)
37 38 39 40 41
{
    MORSE_context_t *morse;
    MORSE_option_t options;

    int k, m, n;
42
    int ldbm, ldcm;
43 44 45
    int ldak, ldam, ldan;
    int tempkm, tempmm, tempnn, tempkn;

46
    MORSE_Complex64_t alpha = (MORSE_Complex64_t) 1.0;
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    MORSE_Complex64_t beta  = (MORSE_Complex64_t) 0.0;
    MORSE_Complex64_t zbeta;
    MORSE_Complex64_t zone  = (MORSE_Complex64_t) 1.0;
    MORSE_Complex64_t mzone = (MORSE_Complex64_t)-1.0;


    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    /*
     *  MorseLower
     */
    if (uplo == MorseLower) {
        /*
         *  ZPOTRF
         */
65
        for (k = 0; k < A->mt; k++) {
66
            RUNTIME_iteration_push(morse, k);
67

68 69
            tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
            ldak = BLKLDD(A, k);
70

71 72 73
            MORSE_TASK_zpotrf(
                &options,
                MorseLower, tempkm, A->mb,
74
                A(k, k), ldak, A->nb*k);
75

76 77 78 79 80 81 82 83 84 85
            for (m = k+1; m < A->mt; m++) {
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
                MORSE_TASK_ztrsm(
                    &options,
                    MorseRight, MorseLower, MorseConjTrans, MorseNonUnit,
                    tempmm, A->mb, A->mb,
                    zone, A(k, k), ldak,
                          A(m, k), ldam);
            }
86
            RUNTIME_data_flush( sequence, A(k, k) );
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
            for (n = k+1; n < A->nt; n++) {
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                ldan = BLKLDD(A, n);
                MORSE_TASK_zherk(
                    &options,
                    MorseLower, MorseNoTrans,
                    tempnn, A->nb, A->mb,
                    -1.0, A(n, k), ldan,
                     1.0, A(n, n), ldan);

                for (m = n+1; m < A->mt; m++) {
                    tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
                    ldam = BLKLDD(A, m);
                    MORSE_TASK_zgemm(
                        &options,
                        MorseNoTrans, MorseConjTrans,
                        tempmm, tempnn, A->mb, A->mb,
                        mzone, A(m, k), ldam,
                               A(n, k), ldan,
                        zone,  A(m, n), ldam);
                }
109
                RUNTIME_data_flush( sequence, A(n, k) );
110
            }
111 112

            RUNTIME_iteration_pop(morse);
113
        }
114 115 116
        /*
         *  ZTRTRI
         */
117
        for (k = 0; k < A->nt; k++) {
118
            RUNTIME_iteration_push(morse, A->nt + k);
119

120 121 122
            tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
            ldak = BLKLDD(A, k);
            for (m = k+1; m < A->mt; m++) {
123 124 125 126 127
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
                MORSE_TASK_ztrsm(
                    &options,
                    MorseRight, uplo, MorseNoTrans, MorseNonUnit,
128 129 130
                    tempmm, tempkn, A->mb,
                    mzone, A(k, k), ldak,
                           A(m, k), ldam);
131
            }
132
            for (m = k+1; m < A->mt; m++) {
133 134
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
135
                for (n = 0; n < k; n++) {
136 137 138
                    MORSE_TASK_zgemm(
                        &options,
                        MorseNoTrans, MorseNoTrans,
139 140 141 142
                        tempmm, A->nb, tempkn, A->mb,
                        zone, A(m, k), ldam,
                              A(k, n), ldak,
                        zone, A(m, n), ldam);
143
                }
144
                RUNTIME_data_flush( sequence, A(m, k) );
145
            }
146
            for (n = 0; n < k; n++) {
147
                RUNTIME_data_flush( sequence, A(k, n) );
148 149 150
                MORSE_TASK_ztrsm(
                    &options,
                    MorseLeft, uplo, MorseNoTrans, MorseNonUnit,
151 152 153
                    tempkn, A->nb, A->mb,
                    zone, A(k, k), ldak,
                          A(k, n), ldak);
154
            }
155
            RUNTIME_data_flush( sequence, A(k, k) );
156 157 158
            MORSE_TASK_ztrtri(
                &options,
                uplo, MorseNonUnit,
159 160
                tempkn, A->mb,
                A(k, k), ldak, A->nb*k);
161 162

            RUNTIME_iteration_pop(morse);
163 164 165 166
        }
        /*
         *  ZLAUUM
         */
167
        for (k = 0; k < A->mt; k++) {
168
            RUNTIME_iteration_push(morse, 2*A->nt + k);
169

170 171 172 173
            tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
            ldak = BLKLDD(A, k);
            for(n = 0; n < k; n++) {
                ldan = BLKLDD(A, n);
174 175 176
                MORSE_TASK_zherk(
                    &options,
                    uplo, MorseConjTrans,
177 178 179
                    A->mb, tempkm, A->mb,
                    1.0, A(k, n), ldak,
                    1.0, A(n, n), ldan);
180

181 182
                for(m = n+1; m < k; m++) {
                    ldam = BLKLDD(A, m);
183 184 185
                    MORSE_TASK_zgemm(
                        &options,
                        MorseConjTrans, MorseNoTrans,
186 187 188 189
                        A->mb, A->nb, tempkm, A->mb,
                        1.0, A(k, m), ldak,
                             A(k, n), ldak,
                        1.0, A(m, n), ldam);
190 191
                }
            }
192
            for (n = 0; n < k; n++) {
193
                RUNTIME_data_flush( sequence, A(k, n) );
194 195 196
                MORSE_TASK_ztrmm(
                    &options,
                    MorseLeft, uplo, MorseConjTrans, MorseNonUnit,
197 198 199
                    tempkm, A->nb, A->mb,
                    1.0, A(k, k), ldak,
                         A(k, n), ldak);
200
            }
201
            RUNTIME_data_flush( sequence, A(k, k) );
202 203
            MORSE_TASK_zlauum(
                &options,
204 205
                uplo, tempkm, A->mb,
                A(k, k), ldak);
206 207

            RUNTIME_iteration_pop(morse);
208 209
        }
        /*
210
         *  ZSYMM Right / Lower
211
         */
212
        for (k = 0; k < C->nt; k++) {
213
            RUNTIME_iteration_push(morse, 3*A->nt + k);
214

215 216 217 218 219 220 221 222 223 224 225 226 227
            tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
            ldak = BLKLDD(A, k);
            zbeta = k == 0 ? beta : zone;

            for (m = 0; m < C->mt; m++) {
                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                ldbm = BLKLDD(B, m);
                ldcm = BLKLDD(C, m);

                for (n = 0; n < C->nt; n++) {
                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                    ldan = BLKLDD(A, n);

228 229 230 231 232
                    if (k < n) {
                       MORSE_TASK_zgemm(
                           &options,
                           MorseNoTrans, MorseTrans,
                           tempmm, tempnn, tempkn, A->mb,
233 234 235
                           alpha, B(m, k), ldbm,  /* ldbm * K */
                                  A(n, k), ldan,  /* ldan * K */
                           zbeta, C(m, n), ldcm); /* ldcm * Y */
236 237 238 239 240 241 242
                    }
                    else {
                        if (k == n) {
                           MORSE_TASK_zsymm(
                               &options,
                               MorseRight, uplo,
                               tempmm, tempnn, A->mb,
243 244 245
                               alpha, A(k, k), ldak,  /* ldak * Y */
                                      B(m, k), ldbm,  /* ldbm * Y */
                               zbeta, C(m, n), ldcm); /* ldcm * Y */
246 247 248 249 250 251
                        }
                        else {
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseNoTrans,
                                tempmm, tempnn, tempkn, A->mb,
252 253 254
                                alpha, B(m, k), ldbm,  /* ldbm * K */
                                       A(k, n), ldak,  /* ldak * Y */
                                zbeta, C(m, n), ldcm); /* ldcm * Y */
255 256 257
                        }
                    }
                }
258
                RUNTIME_data_flush( sequence, B(m, k) );
259 260
            }
            for (n = 0; n <= k; n++) {
261
                RUNTIME_data_flush( sequence, A(k, n) );
262
            }
263 264

            RUNTIME_iteration_pop(morse);
265 266 267 268 269 270 271 272 273 274
        }
    }
    /*
     *  MorseUpper
     */
    else {
        /*
         *  ZPOTRF
         */
        for (k = 0; k < A->nt; k++) {
275
            RUNTIME_iteration_push(morse, k);
276

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
            tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb;
            ldak = BLKLDD(A, k);
            MORSE_TASK_zpotrf(
                &options,
                MorseUpper,
                tempkm, A->mb,
                A(k, k), ldak, A->nb*k);

            for (n = k+1; n < A->nt; n++) {
                tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb;
                MORSE_TASK_ztrsm(
                    &options,
                    MorseLeft, MorseUpper, MorseConjTrans, MorseNonUnit,
                    A->mb, tempnn, A->mb,
                    zone, A(k, k), ldak,
292
                          A(k, n), ldak);
293
            }
294
            RUNTIME_data_flush( sequence, A(k, k) );
295

296 297 298
            for (m = k+1; m < A->mt; m++) {
                tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
299 300 301 302 303 304

                MORSE_TASK_zherk(
                    &options,
                    MorseUpper, MorseConjTrans,
                    tempmm, A->mb, A->mb,
                    -1.0, A(k, m), ldak,
305 306 307 308
                     1.0, A(m, m), ldam);

                for (n = m+1; n < A->nt; n++) {
                    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
309 310 311 312 313 314

                    MORSE_TASK_zgemm(
                        &options,
                        MorseConjTrans, MorseNoTrans,
                        tempmm, tempnn, A->mb, A->mb,
                        mzone, A(k, m), ldak,
315
                               A(k, n), ldak,
316
                        zone,  A(m, n), ldam);
317
                }
318
                RUNTIME_data_flush( sequence, A(k, m) );
319
            }
320 321

            RUNTIME_iteration_pop(morse);
322
        }
323 324 325
        /*
         *  ZTRTRI
         */
326
        for (k = 0; k < A->mt; k++) {
327
            RUNTIME_iteration_push(morse, A->nt + k);
328

329 330 331
            tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
            ldak = BLKLDD(A, k);
            for (n = k+1; n < A->nt; n++) {
332 333 334 335
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                MORSE_TASK_ztrsm(
                    &options,
                    MorseLeft, uplo, MorseNoTrans, MorseNonUnit,
336 337 338
                    tempkm, tempnn, A->mb,
                    mzone, A(k, k), ldak,
                           A(k, n), ldak);
339
            }
340
            for (n = k+1; n < A->nt; n++) {
341
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
342 343
                for (m = 0; m < k; m++) {
                    ldam = BLKLDD(A, m);
344 345 346
                    MORSE_TASK_zgemm(
                        &options,
                        MorseNoTrans, MorseNoTrans,
347 348 349 350
                        A->mb, tempnn, tempkm, A->mb,
                        zone, A(m, k), ldam,
                              A(k, n), ldak,
                        zone, A(m, n), ldam);
351
                }
352
                RUNTIME_data_flush( sequence, A(k, n) );
353 354 355
            }
            for (m = 0; m < k; m++) {
                ldam = BLKLDD(A, m);
356
                RUNTIME_data_flush( sequence, A(m, k) );
357 358 359
                MORSE_TASK_ztrsm(
                    &options,
                    MorseRight, uplo, MorseNoTrans, MorseNonUnit,
360 361 362
                    A->mb, tempkm, A->mb,
                    zone, A(k, k), ldak,
                          A(m, k), ldam);
363
            }
364
            RUNTIME_data_flush( sequence, A(k, k) );
365 366 367
            MORSE_TASK_ztrtri(
                &options,
                uplo, MorseNonUnit,
368 369
                tempkm, A->mb,
                A(k, k), ldak, A->mb*k);
370 371

            RUNTIME_iteration_pop(morse);
372 373 374 375
        }
        /*
         *  ZLAUUM
         */
376
        for (k = 0; k < A->mt; k++) {
377
            RUNTIME_iteration_push(morse, 2*A->nt + k);
378

379 380 381 382 383
            tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
            ldak = BLKLDD(A, k);

            for (m = 0; m < k; m++) {
                ldam = BLKLDD(A, m);
384 385 386
                MORSE_TASK_zherk(
                    &options,
                    uplo, MorseNoTrans,
387 388 389
                    A->mb, tempkn, A->mb,
                    1.0, A(m, k), ldam,
                    1.0, A(m, m), ldam);
390

391 392
                for (n = m+1; n < k; n++){
                    ldan = BLKLDD(A, n);
393 394 395
                    MORSE_TASK_zgemm(
                        &options,
                        MorseNoTrans, MorseConjTrans,
396 397 398 399
                        A->mb, A->nb, tempkn, A->mb,
                        1.0, A(m, k), ldam,
                             A(n, k), ldan,
                        1.0, A(m, n), ldam);
400 401
                }
            }
402 403
            for (m = 0; m < k; m++) {
                ldam = BLKLDD(A, m);
404
                RUNTIME_data_flush( sequence, A(m, k) );
405 406 407
                MORSE_TASK_ztrmm(
                    &options,
                    MorseRight, uplo, MorseConjTrans, MorseNonUnit,
408 409 410
                    A->mb, tempkn, A->mb,
                    1.0, A(k, k), ldak,
                         A(m, k), ldam);
411
            }
412
            RUNTIME_data_flush( sequence, A(k, k) );
413 414
            MORSE_TASK_zlauum(
                &options,
415 416
                uplo, tempkn, A->mb,
                A(k, k), ldak);
417 418

            RUNTIME_iteration_pop(morse);
419 420
        }
        /*
421
         *  ZSYMM Right / Upper
422
         */
423
        for (k = 0; k < C->nt; k++) {
424
            RUNTIME_iteration_push(morse, 3*A->nt + k);
425

426 427 428 429 430 431 432 433 434 435 436 437 438
            tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
            ldak = BLKLDD(A, k);
            zbeta = k == 0 ? beta : zone;

            for (m = 0; m < C->mt; m++) {
                tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                ldbm = BLKLDD(B, m);
                ldcm = BLKLDD(C, m);

                for (n = 0; n < C->nt; n++) {
                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                    ldan = BLKLDD(A, n);

439 440 441 442 443
                    if (k < n) {
                        MORSE_TASK_zgemm(
                            &options,
                            MorseNoTrans, MorseNoTrans,
                            tempmm, tempnn, tempkn, A->mb,
444 445 446
                            alpha, B(m, k), ldbm,  /* ldbm * K */
                                   A(k, n), ldak,  /* ldak * Y */
                            zbeta, C(m, n), ldcm); /* ldcm * Y */
447 448 449 450 451 452 453
                    }
                    else {
                        if (k == n) {
                            MORSE_TASK_zsymm(
                                &options,
                                MorseRight, uplo,
                                tempmm, tempnn, A->mb,
454 455 456
                                alpha, A(k, k), ldak,  /* ldak * Y */
                                       B(m, k), ldbm,  /* ldbm * Y */
                                zbeta, C(m, n), ldcm); /* ldcm * Y */
457 458 459 460 461 462
                        }
                        else {
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseTrans,
                                tempmm, tempnn, tempkn, A->mb,
463 464 465
                                alpha, B(m, k), ldbm,  /* ldbm * K */
                                       A(n, k), ldan,  /* ldan * K */
                                zbeta, C(m, n), ldcm); /* ldcm * Y */
466 467 468
                        }
                    }
                }
469
                RUNTIME_data_flush( sequence, B(m, k) );
470 471
            }
            for (m = 0; m <= k; m++) {
472
                RUNTIME_data_flush( sequence, A(m, k) );
473
            }
474 475

            RUNTIME_iteration_pop(morse);
476 477 478 479 480
        }
    }

    RUNTIME_options_finalize(&options, morse);
}