pztpgqrt.c 5.38 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/**
 *
 * @copyright (c) 2009-2016 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
 *                          All rights reserved.
 * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                          Univ. Bordeaux. All rights reserved.
 *
 **/

/**
 *
 * @file pztpgqrt.c
 *
 *  MORSE computational routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 0.9.0
 * @author Mathieu Faverge
 * @date 2016-12-15
 * @precisions normal z -> s d c
 *
 **/
#include "control/common.h"

27 28 29 30 31 32 33 34 35 36 37
#define V1(m,n) V1,  m,  n
#define T1(m,n) T1,  m,  n
#define V2(m,n) V2,  m,  n
#define T2(m,n) T2,  m,  n
#define Q1(m,n) Q1,  m,  n
#define Q2(m,n) Q2,  m,  n
#if defined(CHAMELEON_COPY_DIAG)
#define DIAG(k) DIAG, k, 0
#else
#define DIAG(k) V1, k, k
#endif
38 39 40 41

/***************************************************************************//**
 *  Parallel tile QR factorization - dynamic scheduling
 **/
42 43 44 45
void morse_pztpgqrt( int L,
                     MORSE_desc_t *V1, MORSE_desc_t *T1,
                     MORSE_desc_t *V2, MORSE_desc_t *T2,
                     MORSE_desc_t *Q1, MORSE_desc_t *Q2,
46 47 48 49 50 51
                     MORSE_sequence_t *sequence, MORSE_request_t *request )
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;
52
    MORSE_desc_t *DIAG = NULL;
53 54

    int k, m, n;
55 56 57 58
    int ldvk, ldvm;
    int ldqk, ldqm;
    int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
    int ib, minMT;
59 60

    /* Dimension of the first column */
61 62
    int maxm  = Q2->m - L;
    int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
63 64 65 66 67 68 69 70 71
    int maxmtk;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

72 73 74 75 76 77
    if (V1->m > V1->n) {
        minMT = V1->nt;
    } else {
        minMT = V1->mt;
    }

78
    /*
79
     * ztpmqrt = Q1->nb * ib
80
     */
81
    ws_worker = Q1->nb * ib;
82 83 84 85 86

    /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
87
     * ztpmqrt = 2 * Q1->nb * ib
88
     */
89
    ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
90 91 92 93 94 95 96
#endif

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

97 98 99 100 101 102 103
#if defined(CHAMELEON_COPY_DIAG)
    /* necessary to avoid dependencies between tasks regarding the diag tile */
    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
    morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
#endif

    for (k = V1->nt-1; k >= 0; k--) {
THIBAULT Samuel's avatar
THIBAULT Samuel committed
104
        RUNTIME_set_iteration(morse, k);
105

106 107 108 109 110
        tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb;
        tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
        tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
        ldvk = BLKLDD(V1, k);
        ldqk = BLKLDD(Q1, k);
111

112 113
        /* Equivalent to the tsmqr step on Q1,Q2 */
        maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
114
        for (m = maxmtk; m > -1; m--) {
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
            tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
            templm = m == maxmtk ? tempmm : 0;
            ldvm = BLKLDD(V2, m);
            ldqm = BLKLDD(Q2, m);

            for (n = k; n < Q2->nt; n++) {
                tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
                MORSE_TASK_ztpmqrt(
                    &options,
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, templm, ib, T2->nb,
                    V2(m, k), ldvm,
                    T2(m, k), T2->mb,
                    Q1(k, n), ldqk,
                    Q2(m, n), ldqm );
            }
        }
132

133 134 135 136 137 138
        for (m = Q1->mt - 1; m > k; m--) {
            tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb;
            ldvm = BLKLDD(V1, m);
            ldqm = BLKLDD(Q1, m);
            for (n = k; n < Q1->nt; n++) {
                tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
139 140
                MORSE_TASK_ztpmqrt(
                    &options,
141 142 143 144 145 146
                    MorseLeft, MorseNoTrans,
                    tempmm, tempnn, tempkn, 0, ib, T1->nb,
                    V1(m, k), ldvm,
                    T1(m, k), T1->mb,
                    Q1(k, n), ldqk,
                    Q1(m, n), ldqm );
147 148
            }
        }
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

#if defined(CHAMELEON_COPY_DIAG)
        MORSE_TASK_zlacpy(
            &options,
            MorseLower, tempkm, tempkk, V1->nb,
            V1(k, k), ldvk,
            DIAG(k), ldvk );
#if defined(CHAMELEON_USE_CUDA)
        MORSE_TASK_zlaset(
            &options,
            MorseUpper, tempkm, tempkk,
            0., 1.,
            DIAG(k), ldvk );
#endif
#endif
        for (n = k; n < Q1->nt; n++) {
            tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb;
            MORSE_TASK_zunmqr(
                &options,
                MorseLeft, MorseNoTrans,
                tempkm, tempnn, tempkk, ib, T1->nb,
                DIAG(k), ldvk,
                T1(k, k), T1->mb,
                Q1(k, n), ldqk);
        }
174
    }
175

176 177 178
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
    MORSE_TASK_dataflush_all();
179 180 181 182 183 184 185

#if defined(CHAMELEON_COPY_DIAG)
    MORSE_Sequence_Wait(sequence);
    morse_desc_mat_free(DIAG);
    free(DIAG);
#endif
    (void)DIAG;
186
}