pztpgqrt.c 4.92 KB
Newer Older
1 2
/**
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
3 4
 * @copyright 2009-2016 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
5 6 7
 * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                          Univ. Bordeaux. All rights reserved.
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
8
 ***
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * @file pztpgqrt.c
 *
 *  MORSE computational routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 0.9.0
 * @author Mathieu Faverge
 * @date 2016-12-15
 * @precisions normal z -> s d c
 *
 **/
#include "control/common.h"

24 25 26 27 28 29 30
#define V1(m,n) V1,  m,  n
#define T1(m,n) T1,  m,  n
#define V2(m,n) V2,  m,  n
#define T2(m,n) T2,  m,  n
#define Q1(m,n) Q1,  m,  n
#define Q2(m,n) Q2,  m,  n
#if defined(CHAMELEON_COPY_DIAG)
31
#define D(k)    D, k, 0
32
#else
33
#define D(k)    V1, k, k
34
#endif
35

Mathieu Faverge's avatar
Mathieu Faverge committed
36
/*******************************************************************************
37 38
 *  Parallel tile QR factorization - dynamic scheduling
 **/
39 40 41 42
void morse_pztpgqrt( int L,
                     MORSE_desc_t *V1, MORSE_desc_t *T1,
                     MORSE_desc_t *V2, MORSE_desc_t *T2,
                     MORSE_desc_t *Q1, MORSE_desc_t *Q2,
43
                     MORSE_desc_t *D,
44 45 46 47 48 49 50 51
                     MORSE_sequence_t *sequence, MORSE_request_t *request )
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
52 53 54
    int ldvk, ldvm;
    int ldqk, ldqm;
    int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
55
    int ib;
56 57

    /* Dimension of the first column */
58
    int maxm  = chameleon_max( Q2->m - L, 1 );
59
    int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
60 61 62 63 64 65 66 67 68
    int maxmtk;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;
    /*
69
     * ztpmqrt = Q1->nb * ib
70
     */
71
    ws_worker = Q1->nb * ib;
72 73 74 75 76

    /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
77
     * ztpmqrt = 2 * Q1->nb * ib
78
     */
79
    ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
80 81 82 83 84 85 86
#endif

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

87
    for (k = V1->nt-1; k >= 0; k--) {
88
        RUNTIME_iteration_push(morse, k);
89

90 91 92 93 94
        tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
        tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
        tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
        ldvk = BLKLDD(V1, k);
        ldqk = BLKLDD(Q1, k);
95

96 97
        /* Equivalent to the tsmqr step on Q1,Q2 */
        maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
98
        for (m = maxmtk; m > -1; m--) {
99 100 101 102 103 104 105
            tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
            templm = m == maxmtk ? tempmm : 0;
            ldvm = BLKLDD(V2, m);
            ldqm = BLKLDD(Q2, m);

            for (n = k; n < Q2->nt; n++) {
                tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
106
                /* TT kernel */
107 108 109 110 111 112 113 114 115 116
                MORSE_TASK_ztpmqrt(
                    &options,
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, templm, ib, T2->nb,
                    V2(m, k), ldvm,
                    T2(m, k), T2->mb,
                    Q1(k, n), ldqk,
                    Q2(m, n), ldqm );
            }
        }
117

118 119 120 121 122 123
        for (m = Q1->mt - 1; m > k; m--) {
            tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
            ldvm = BLKLDD(V1, m);
            ldqm = BLKLDD(Q1, m);
            for (n = k; n < Q1->nt; n++) {
                tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
124
                /* TS kernel */
125 126
                MORSE_TASK_ztpmqrt(
                    &options,
127 128 129 130 131 132
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, 0, ib, T1->nb,
                    V1(m, k), ldvm,
                    T1(m, k), T1->mb,
                    Q1(k, n), ldqk,
                    Q1(m, n), ldqm );
133 134
            }
        }
135 136 137 138 139 140

#if defined(CHAMELEON_COPY_DIAG)
        MORSE_TASK_zlacpy(
            &options,
            MorseLower, tempkm, tempkk, V1->nb,
            V1(k, k), ldvk,
141
            D(k), ldvk );
142 143 144 145 146
#if defined(CHAMELEON_USE_CUDA)
        MORSE_TASK_zlaset(
            &options,
            MorseUpper, tempkm, tempkk,
            0., 1.,
147
            D(k), ldvk );
148 149 150 151 152 153 154 155
#endif
#endif
        for (n = k; n < Q1->nt; n++) {
            tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
            MORSE_TASK_zunmqr(
                &options,
                MorseLeft, MorseNoTrans,
                tempkm, tempnn, tempkk, ib, T1->nb,
156
                D(k), ldvk,
157 158 159
                T1(k, k), T1->mb,
                Q1(k, n), ldqk);
        }
160 161

        RUNTIME_iteration_pop(morse);
162
    }
163

164 165
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
166
    (void)D;
167
}