pztpgqrt.c 4.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/**
 *
 * @copyright (c) 2009-2016 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
 *                          All rights reserved.
 * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                          Univ. Bordeaux. All rights reserved.
 *
 **/

/**
 *
 * @file pztpgqrt.c
 *
 *  MORSE computational routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 0.9.0
 * @author Mathieu Faverge
 * @date 2016-12-15
 * @precisions normal z -> s d c
 *
 **/
#include "control/common.h"

27 28 29 30 31 32 33
#define V1(m,n) V1,  m,  n
#define T1(m,n) T1,  m,  n
#define V2(m,n) V2,  m,  n
#define T2(m,n) T2,  m,  n
#define Q1(m,n) Q1,  m,  n
#define Q2(m,n) Q2,  m,  n
#if defined(CHAMELEON_COPY_DIAG)
34
#define D(k)    D, k, 0
35
#else
36
#define D(k)    V1, k, k
37
#endif
38 39 40 41

/***************************************************************************//**
 *  Parallel tile QR factorization - dynamic scheduling
 **/
42 43 44 45
void morse_pztpgqrt( int L,
                     MORSE_desc_t *V1, MORSE_desc_t *T1,
                     MORSE_desc_t *V2, MORSE_desc_t *T2,
                     MORSE_desc_t *Q1, MORSE_desc_t *Q2,
46
                     MORSE_desc_t *D,
47 48 49 50 51 52 53 54
                     MORSE_sequence_t *sequence, MORSE_request_t *request )
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
55 56 57
    int ldvk, ldvm;
    int ldqk, ldqm;
    int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
58
    int ib;
59 60

    /* Dimension of the first column */
61
    int maxm  = chameleon_max( Q2->m - L, 1 );
62
    int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
63 64 65 66 67 68 69 70 71
    int maxmtk;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;
    /*
72
     * ztpmqrt = Q1->nb * ib
73
     */
74
    ws_worker = Q1->nb * ib;
75 76 77 78 79

    /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
80
     * ztpmqrt = 2 * Q1->nb * ib
81
     */
82
    ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
83 84 85 86 87 88 89
#endif

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

90
    for (k = V1->nt-1; k >= 0; k--) {
91
        RUNTIME_iteration_push(morse, k);
92

93 94 95 96 97
        tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
        tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
        tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
        ldvk = BLKLDD(V1, k);
        ldqk = BLKLDD(Q1, k);
98

99 100
        /* Equivalent to the tsmqr step on Q1,Q2 */
        maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
101
        for (m = maxmtk; m > -1; m--) {
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
            tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
            templm = m == maxmtk ? tempmm : 0;
            ldvm = BLKLDD(V2, m);
            ldqm = BLKLDD(Q2, m);

            for (n = k; n < Q2->nt; n++) {
                tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
                MORSE_TASK_ztpmqrt(
                    &options,
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, templm, ib, T2->nb,
                    V2(m, k), ldvm,
                    T2(m, k), T2->mb,
                    Q1(k, n), ldqk,
                    Q2(m, n), ldqm );
            }
        }
119

120 121 122 123 124 125
        for (m = Q1->mt - 1; m > k; m--) {
            tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
            ldvm = BLKLDD(V1, m);
            ldqm = BLKLDD(Q1, m);
            for (n = k; n < Q1->nt; n++) {
                tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
126 127
                MORSE_TASK_ztpmqrt(
                    &options,
128 129 130 131 132 133
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, 0, ib, T1->nb,
                    V1(m, k), ldvm,
                    T1(m, k), T1->mb,
                    Q1(k, n), ldqk,
                    Q1(m, n), ldqm );
134 135
            }
        }
136 137 138 139 140 141

#if defined(CHAMELEON_COPY_DIAG)
        MORSE_TASK_zlacpy(
            &options,
            MorseLower, tempkm, tempkk, V1->nb,
            V1(k, k), ldvk,
142
            D(k), ldvk );
143 144 145 146 147
#if defined(CHAMELEON_USE_CUDA)
        MORSE_TASK_zlaset(
            &options,
            MorseUpper, tempkm, tempkk,
            0., 1.,
148
            D(k), ldvk );
149 150 151 152 153 154 155 156
#endif
#endif
        for (n = k; n < Q1->nt; n++) {
            tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
            MORSE_TASK_zunmqr(
                &options,
                MorseLeft, MorseNoTrans,
                tempkm, tempnn, tempkk, ib, T1->nb,
157
                D(k), ldvk,
158 159 160
                T1(k, k), T1->mb,
                Q1(k, n), ldqk);
        }
161 162

        RUNTIME_iteration_pop(morse);
163
    }
164

165 166
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
167
    (void)D;
168
}