pztpgqrt.c 4.96 KB
Newer Older
1
/**
2 3
 *
 * @file pztpgqrt.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2016 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8 9
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 * @copyright 2016-2018 KAUST. All rights reserved.
10
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
11
 ***
12 13 14 15 16
 *
 *  MORSE computational routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
17
 * @version 1.0.0
18 19 20 21 22 23 24
 * @author Mathieu Faverge
 * @date 2016-12-15
 * @precisions normal z -> s d c
 *
 **/
#include "control/common.h"

25 26 27 28 29 30 31
#define V1(m,n) V1,  m,  n
#define T1(m,n) T1,  m,  n
#define V2(m,n) V2,  m,  n
#define T2(m,n) T2,  m,  n
#define Q1(m,n) Q1,  m,  n
#define Q2(m,n) Q2,  m,  n
#if defined(CHAMELEON_COPY_DIAG)
32
#define D(k)    D, k, 0
33
#else
34
#define D(k)    V1, k, k
35
#endif
36

Mathieu Faverge's avatar
Mathieu Faverge committed
37
/*******************************************************************************
38 39
 *  Parallel tile QR factorization - dynamic scheduling
 **/
40 41 42 43
void morse_pztpgqrt( int L,
                     MORSE_desc_t *V1, MORSE_desc_t *T1,
                     MORSE_desc_t *V2, MORSE_desc_t *T2,
                     MORSE_desc_t *Q1, MORSE_desc_t *Q2,
44
                     MORSE_desc_t *D,
45 46 47 48 49 50 51 52
                     MORSE_sequence_t *sequence, MORSE_request_t *request )
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
53 54 55
    int ldvk, ldvm;
    int ldqk, ldqm;
    int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
56
    int ib;
57 58

    /* Dimension of the first column */
59
    int maxm  = chameleon_max( Q2->m - L, 1 );
60
    int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
61 62 63 64 65 66 67 68 69
    int maxmtk;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;
    /*
70
     * ztpmqrt = Q1->nb * ib
71
     */
72
    ws_worker = Q1->nb * ib;
73 74 75 76 77

    /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
78
     * ztpmqrt = 2 * Q1->nb * ib
79
     */
80
    ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
81 82 83 84 85 86 87
#endif

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

88
    for (k = V1->nt-1; k >= 0; k--) {
89
        RUNTIME_iteration_push(morse, k);
90

91 92 93 94 95
        tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
        tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
        tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
        ldvk = BLKLDD(V1, k);
        ldqk = BLKLDD(Q1, k);
96

97 98
        /* Equivalent to the tsmqr step on Q1,Q2 */
        maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
99
        for (m = maxmtk; m > -1; m--) {
100 101 102 103 104 105 106
            tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
            templm = m == maxmtk ? tempmm : 0;
            ldvm = BLKLDD(V2, m);
            ldqm = BLKLDD(Q2, m);

            for (n = k; n < Q2->nt; n++) {
                tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
107
                /* TT kernel */
108 109 110 111 112 113 114 115 116 117
                MORSE_TASK_ztpmqrt(
                    &options,
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, templm, ib, T2->nb,
                    V2(m, k), ldvm,
                    T2(m, k), T2->mb,
                    Q1(k, n), ldqk,
                    Q2(m, n), ldqm );
            }
        }
118

119 120 121 122 123 124
        for (m = Q1->mt - 1; m > k; m--) {
            tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
            ldvm = BLKLDD(V1, m);
            ldqm = BLKLDD(Q1, m);
            for (n = k; n < Q1->nt; n++) {
                tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
125
                /* TS kernel */
126 127
                MORSE_TASK_ztpmqrt(
                    &options,
128 129 130 131 132 133
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, 0, ib, T1->nb,
                    V1(m, k), ldvm,
                    T1(m, k), T1->mb,
                    Q1(k, n), ldqk,
                    Q1(m, n), ldqm );
134 135
            }
        }
136 137 138 139 140 141

#if defined(CHAMELEON_COPY_DIAG)
        MORSE_TASK_zlacpy(
            &options,
            MorseLower, tempkm, tempkk, V1->nb,
            V1(k, k), ldvk,
142
            D(k), ldvk );
143 144 145 146 147
#if defined(CHAMELEON_USE_CUDA)
        MORSE_TASK_zlaset(
            &options,
            MorseUpper, tempkm, tempkk,
            0., 1.,
148
            D(k), ldvk );
149 150 151 152 153 154 155 156
#endif
#endif
        for (n = k; n < Q1->nt; n++) {
            tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
            MORSE_TASK_zunmqr(
                &options,
                MorseLeft, MorseNoTrans,
                tempkm, tempnn, tempkk, ib, T1->nb,
157
                D(k), ldvk,
158 159 160
                T1(k, k), T1->mb,
                Q1(k, n), ldqk);
        }
161 162

        RUNTIME_iteration_pop(morse);
163
    }
164

165 166
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
167
    (void)D;
168
}