pzgeqrfrh.c 6.81 KB
Newer Older
1
/**
2 3
 *
 * @file pzgeqrfrh.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11 12 13 14 15 16
 *
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
17
 * @version 1.0.0
18 19 20 21 22 23 24 25 26 27 28 29
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Jakub Kurzak
 * @author Hatem Ltaief
 * @author Dulceneia Becker
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
 **/
30
#include "control/common.h"
31

32 33
#define A(m,n)  A,  (m),  (n)
#define T(m,n)  T,  (m),  (n)
34
#define T2(m,n) T,  (m), ((n)+A->nt)
35
#if defined(CHAMELEON_COPY_DIAG)
36
#define D(m,n) D, ((m)/BS), 0
37
#else
38
#define D(m,n) A,  (m),  (n)
39
#endif
40

Mathieu Faverge's avatar
Mathieu Faverge committed
41
/*******************************************************************************
42 43
 *  Parallel tile QR factorization (reduction Householder) - dynamic scheduling
 **/
44
void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
                     MORSE_sequence_t *sequence, MORSE_request_t *request)
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
    int K, M, RD;
    int ldaM, ldam, ldaMRD;
    int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
    int ib;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

    /*
     * zgeqrt = A->nb * (ib+1)
     * zunmqr = A->nb * ib
     * ztsqrt = A->nb * (ib+1)
     * zttqrt = A->nb * (ib+1)
     * ztsmqr = A->nb * ib
     * zttmqr = A->nb * ib
     */
    ws_worker = A->nb * (ib+1);

    /* Allocation of temporary (scratch) working space */
Mathieu Faverge's avatar
Mathieu Faverge committed
76 77 78 79 80 81
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
     * zunmqr = A->nb * ib
     * ztsmqr = 2 * A->nb * ib
     */
82
    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
Mathieu Faverge's avatar
Mathieu Faverge committed
83 84
#endif

85 86 87 88 89
    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

90
    K = chameleon_min(A->mt, A->nt);
91
    for (k = 0; k < K; k++) {
92
        RUNTIME_iteration_push(morse, k);
93

94 95 96
        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
        for (M = k; M < A->mt; M += BS) {
            tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
97
            tempkmin = chameleon_min(tempMm, tempkn);
98
            ldaM = BLKLDD(A, M);
99

100 101 102 103 104 105
            MORSE_TASK_zgeqrt(
                &options,
                tempMm, tempkn, ib, T->nb,
                A(M, k), ldaM,
                T(M, k), T->mb);
            if ( k < (A->nt-1) ) {
106 107 108 109 110
#if defined(CHAMELEON_COPY_DIAG)
            MORSE_TASK_zlacpy(
                &options,
                MorseLower, tempMm, A->nb, A->nb,
                A(M, k), ldaM,
111
                D(M, k), ldaM );
Mathieu Faverge's avatar
Mathieu Faverge committed
112
#if defined(CHAMELEON_USE_CUDA)
113 114 115 116
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempMm, A->nb,
                    0., 1.,
117
                    D(M, k), ldaM );
118
#endif
119
#endif
120
            }
121 122 123 124 125 126
            for (n = k+1; n < A->nt; n++) {
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                MORSE_TASK_zunmqr(
                    &options,
                    MorseLeft, MorseConjTrans,
                    tempMm, tempnn, tempkmin, ib, T->nb,
127
                    D(M, k), ldaM,
128 129 130
                    T(M, k), T->mb,
                    A(M, n), ldaM);
            }
131

132
            for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
133 134
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
135 136 137 138

                RUNTIME_data_migrate( sequence, A(M, k),
                                      A->get_rankof( A, m, k ) );

139
                /* TS kernel */
140
                MORSE_TASK_ztpqrt(
141
                    &options,
142
                    tempmm, tempkn, 0, ib, T->nb,
143 144 145 146 147 148
                    A(M, k), ldaM,
                    A(m, k), ldam,
                    T(m, k), T->mb);

                for (n = k+1; n < A->nt; n++) {
                    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
149 150 151 152 153

                    RUNTIME_data_migrate( sequence, A(M, n),
                                          A->get_rankof( A, m, n ) );

                    MORSE_TASK_ztpmqrt(
154 155
                        &options,
                        MorseLeft, MorseConjTrans,
156
                        tempmm, tempnn, A->nb, 0, ib, T->nb,
157
                        A(m, k), ldam,
158 159 160
                        T(m, k), T->mb,
                        A(M, n), ldaM,
                        A(m, n), ldam);
161 162 163 164 165 166 167 168
                }
            }
        }
        for (RD = BS; RD < A->mt-k; RD *= 2) {
            for (M = k; M+RD < A->mt; M += 2*RD) {
                tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
                ldaM   = BLKLDD(A, M   );
                ldaMRD = BLKLDD(A, M+RD);
169 170 171 172 173 174

                RUNTIME_data_migrate( sequence, A(M, k),
                                      A->get_rankof( A, M+RD, k ) );
                RUNTIME_data_migrate( sequence, A(M+RD, k),
                                      A->get_rankof( A, M+RD, k ) );

175
                /* TT kernel */
176
                MORSE_TASK_ztpqrt(
177
                    &options,
178
                    tempMRDm, tempkn, chameleon_min( tempMRDm, tempkn ), ib, T->nb,
179 180 181 182 183 184
                    A (M   , k), ldaM,
                    A (M+RD, k), ldaMRD,
                    T2(M+RD, k), T->mb);

                for (n = k+1; n < A->nt; n++) {
                    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
185 186 187 188 189 190 191

                    RUNTIME_data_migrate( sequence, A(M, n),
                                          A->get_rankof( A, M+RD, n ) );
                    RUNTIME_data_migrate( sequence, A(M+RD, n),
                                          A->get_rankof( A, M+RD, n ) );

                    MORSE_TASK_ztpmqrt(
192 193
                        &options,
                        MorseLeft, MorseConjTrans,
194
                        tempMRDm, tempnn, A->nb, tempMRDm, ib, T->nb,
195
                        A (M+RD, k), ldaMRD,
196 197 198
                        T2(M+RD, k), T->mb,
                        A (M,    n), ldaM,
                        A (M+RD, n), ldaMRD);
199 200 201
                }
            }
        }
202 203 204 205 206 207 208

        /* Restore the original location of the tiles */
        for (n = k; n < A->nt; n++) {
            RUNTIME_data_migrate( sequence, A(k, n),
                                  A->get_rankof( A, k, n ) );
        }

209
        RUNTIME_iteration_pop(morse);
210
    }
211

212 213
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
214
    (void)D;
215
}