pzgeqrfrh.c 6.97 KB
Newer Older
1
/**
2 3
 *
 * @file pzgeqrfrh.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @brief Chameleon zgeqrfrh parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15 16 17 18 19 20 21 22 23 24 25
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Jakub Kurzak
 * @author Hatem Ltaief
 * @author Dulceneia Becker
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
26
 */
27
#include "control/common.h"
28

29 30
#define A(m,n)  A,  (m),  (n)
#define T(m,n)  T,  (m),  (n)
31
#define T2(m,n) T,  (m), ((n)+A->nt)
32
#if defined(CHAMELEON_COPY_DIAG)
33
#define D(m,n) D, ((m)/BS), 0
34
#else
35
#define D(m,n) A,  (m),  (n)
36
#endif
37

38
/**
39
 *  Parallel tile QR factorization (reduction Householder) - dynamic scheduling
40
 */
41
void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
                     MORSE_sequence_t *sequence, MORSE_request_t *request)
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
    int K, M, RD;
    int ldaM, ldam, ldaMRD;
    int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
    int ib;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

    /*
     * zgeqrt = A->nb * (ib+1)
     * zunmqr = A->nb * ib
     * ztsqrt = A->nb * (ib+1)
     * zttqrt = A->nb * (ib+1)
     * ztsmqr = A->nb * ib
     * zttmqr = A->nb * ib
     */
    ws_worker = A->nb * (ib+1);

    /* Allocation of temporary (scratch) working space */
Mathieu Faverge's avatar
Mathieu Faverge committed
73 74 75 76 77 78
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
     * zunmqr = A->nb * ib
     * ztsmqr = 2 * A->nb * ib
     */
79
    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
Mathieu Faverge's avatar
Mathieu Faverge committed
80 81
#endif

82 83 84 85 86
    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

87
    K = chameleon_min(A->mt, A->nt);
88
    for (k = 0; k < K; k++) {
89
        RUNTIME_iteration_push(morse, k);
90

91 92 93
        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
        for (M = k; M < A->mt; M += BS) {
            tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
94
            tempkmin = chameleon_min(tempMm, tempkn);
95
            ldaM = BLKLDD(A, M);
96

97 98 99 100 101 102
            MORSE_TASK_zgeqrt(
                &options,
                tempMm, tempkn, ib, T->nb,
                A(M, k), ldaM,
                T(M, k), T->mb);
            if ( k < (A->nt-1) ) {
103 104 105 106 107
#if defined(CHAMELEON_COPY_DIAG)
            MORSE_TASK_zlacpy(
                &options,
                MorseLower, tempMm, A->nb, A->nb,
                A(M, k), ldaM,
108
                D(M, k), ldaM );
Mathieu Faverge's avatar
Mathieu Faverge committed
109
#if defined(CHAMELEON_USE_CUDA)
110 111 112 113
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempMm, A->nb,
                    0., 1.,
114
                    D(M, k), ldaM );
115
#endif
116
#endif
117
            }
118 119 120 121 122 123
            for (n = k+1; n < A->nt; n++) {
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                MORSE_TASK_zunmqr(
                    &options,
                    MorseLeft, MorseConjTrans,
                    tempMm, tempnn, tempkmin, ib, T->nb,
124
                    D(M, k), ldaM,
125 126 127
                    T(M, k), T->mb,
                    A(M, n), ldaM);
            }
128 129
            MORSE_TASK_dataflush( &options, D(M, k) );
            MORSE_TASK_dataflush( &options, T(M, k) );
130

131
            for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
132 133
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
134 135 136 137

                RUNTIME_data_migrate( sequence, A(M, k),
                                      A->get_rankof( A, m, k ) );

138
                /* TS kernel */
139
                MORSE_TASK_ztpqrt(
140
                    &options,
141
                    tempmm, tempkn, 0, ib, T->nb,
142 143 144 145 146 147
                    A(M, k), ldaM,
                    A(m, k), ldam,
                    T(m, k), T->mb);

                for (n = k+1; n < A->nt; n++) {
                    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
148 149 150 151 152

                    RUNTIME_data_migrate( sequence, A(M, n),
                                          A->get_rankof( A, m, n ) );

                    MORSE_TASK_ztpmqrt(
153 154
                        &options,
                        MorseLeft, MorseConjTrans,
155
                        tempmm, tempnn, A->nb, 0, ib, T->nb,
156
                        A(m, k), ldam,
157 158 159
                        T(m, k), T->mb,
                        A(M, n), ldaM,
                        A(m, n), ldam);
160
                }
161 162
                MORSE_TASK_dataflush( &options, A(m, k) );
                MORSE_TASK_dataflush( &options, T(m, k) );
163 164 165 166 167 168 169
            }
        }
        for (RD = BS; RD < A->mt-k; RD *= 2) {
            for (M = k; M+RD < A->mt; M += 2*RD) {
                tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
                ldaM   = BLKLDD(A, M   );
                ldaMRD = BLKLDD(A, M+RD);
170 171 172 173 174 175

                RUNTIME_data_migrate( sequence, A(M, k),
                                      A->get_rankof( A, M+RD, k ) );
                RUNTIME_data_migrate( sequence, A(M+RD, k),
                                      A->get_rankof( A, M+RD, k ) );

176
                /* TT kernel */
177
                MORSE_TASK_ztpqrt(
178
                    &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
179
                    tempMRDm, tempkn, chameleon_min( tempMRDm, tempkn ), ib, T->nb,
180 181 182 183 184 185
                    A (M   , k), ldaM,
                    A (M+RD, k), ldaMRD,
                    T2(M+RD, k), T->mb);

                for (n = k+1; n < A->nt; n++) {
                    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
186 187 188 189 190 191 192

                    RUNTIME_data_migrate( sequence, A(M, n),
                                          A->get_rankof( A, M+RD, n ) );
                    RUNTIME_data_migrate( sequence, A(M+RD, n),
                                          A->get_rankof( A, M+RD, n ) );

                    MORSE_TASK_ztpmqrt(
193 194
                        &options,
                        MorseLeft, MorseConjTrans,
195
                        tempMRDm, tempnn, A->nb, tempMRDm, ib, T->nb,
196
                        A (M+RD, k), ldaMRD,
197 198 199
                        T2(M+RD, k), T->mb,
                        A (M,    n), ldaM,
                        A (M+RD, n), ldaMRD);
200
                }
201 202
                MORSE_TASK_dataflush( &options, A (M+RD, k) );
                MORSE_TASK_dataflush( &options, T2(M+RD, k) );
203 204
            }
        }
205 206 207 208 209 210 211

        /* Restore the original location of the tiles */
        for (n = k; n < A->nt; n++) {
            RUNTIME_data_migrate( sequence, A(k, n),
                                  A->get_rankof( A, k, n ) );
        }

212
        RUNTIME_iteration_pop(morse);
213
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
214

215 216
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
217
    (void)D;
218
}