pzunmqr.c 12 KB
Newer Older
1
/**
2 3
 *
 * @file pzunmqr.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11 12 13 14 15 16
 *
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
17
 * @version 1.0.0
18 19 20 21 22 23 24 25 26 27 28 29
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Hatem Ltaief
 * @author Jakub Kurzak
 * @author Azzam Haidar
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
 **/
30
#include "control/common.h"
31 32 33 34

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n
#define T(m,n) T,  m,  n
35
#if defined(CHAMELEON_COPY_DIAG)
36
#define D(k)   D,  k,  0
37
#else
38
#define D(k)   D,  k,  k
39
#endif
40

41
/**
42 43 44
 *  Parallel application of Q using tile V - QR factorization - dynamic scheduling
 **/
void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
45
                   MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_desc_t *D,
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
                   MORSE_sequence_t *sequence, MORSE_request_t *request)
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
    int ldak, ldbk, ldam, ldan, ldbm;
    int tempkm, tempnn, tempkmin, tempmm, tempkn;
    int ib, minMT, minM;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

65 66 67 68 69 70 71 72
    if (A->m > A->n) {
        minM  = A->n;
        minMT = A->nt;
    } else {
        minM  = A->m;
        minMT = A->mt;
    }

73 74 75 76
    if (D == NULL) {
        D = A;
    }

77 78 79 80 81 82
    /*
     * zunmqr = A->nb * ib
     * ztsmqr = A->nb * ib
     */
    ws_worker = A->nb * ib;

Mathieu Faverge's avatar
Mathieu Faverge committed
83
#if defined(CHAMELEON_USE_CUDA)
84 85 86 87 88
    /* Worker space
     *
     * zunmqr = A->nb * ib
     * ztsmqr = 2 * A->nb * ib
     */
89
    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
90 91 92 93 94 95 96 97 98
#endif

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

    if (side == MorseLeft ) {
        if (trans == MorseConjTrans) {
99 100 101
            /*
             *  MorseLeft / MorseConjTrans
             */
102
            for (k = 0; k < minMT; k++) {
103
                RUNTIME_iteration_push(morse, k);
104

105 106 107 108
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
109
#if defined(CHAMELEON_COPY_DIAG)
110 111 112 113
                MORSE_TASK_zlacpy(
                    &options,
                    MorseLower, tempkm, tempkmin, A->nb,
                    A(k, k), ldak,
114
                    D(k), ldak );
Mathieu Faverge's avatar
Mathieu Faverge committed
115
#if defined(CHAMELEON_USE_CUDA)
116 117 118 119
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempkm, tempkmin,
                    0., 1.,
120
                    D(k), ldak );
121
#endif
122 123 124 125 126 127 128
#endif
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    MORSE_TASK_zunmqr(
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
129
                        D(k), ldak,
130 131 132 133 134 135 136 137 138
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
                for (m = k+1; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldam = BLKLDD(A, m);
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
139 140 141 142

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

143
                        /* TS kernel */
144
                        MORSE_TASK_ztpmqrt(
145 146
                            &options,
                            side, trans,
147
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
148
                            A(m, k), ldam,
149 150 151
                            T(m, k), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
152 153
                    }
                }
154

155 156 157 158 159 160
                /* Restore the original location of the tiles */
                for (n = 0; n < B->nt; n++) {
                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );
                }

161
                RUNTIME_iteration_pop(morse);
162 163 164 165 166 167 168
            }
        }
        /*
         *  MorseLeft / MorseNoTrans
         */
        else {
            for (k = minMT-1; k >= 0; k--) {
169
                RUNTIME_iteration_push(morse, k);
170

Mathieu Faverge's avatar
Mathieu Faverge committed
171
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
172 173 174 175 176 177 178 179 180
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
                for (m = B->mt-1; m > k; m--) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldam = BLKLDD(A, m);
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
181 182 183 184

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

185
                        /* TS kernel */
186
                        MORSE_TASK_ztpmqrt(
187 188
                            &options,
                            side, trans,
189
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
190
                            A(m, k), ldam,
191 192 193
                            T(m, k), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
194 195
                    }
                }
196
#if defined(CHAMELEON_COPY_DIAG)
197 198 199 200
                MORSE_TASK_zlacpy(
                    &options,
                    MorseLower, tempkm, tempkmin, A->nb,
                    A(k, k), ldak,
201
                    D(k), ldak );
Mathieu Faverge's avatar
Mathieu Faverge committed
202
#if defined(CHAMELEON_USE_CUDA)
203 204 205 206
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempkm, tempkmin,
                    0., 1.,
207
                    D(k), ldak );
208
#endif
209 210 211
#endif
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
212 213 214 215

                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );

216 217 218 219
                    MORSE_TASK_zunmqr(
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
220
                        D(k), ldak,
221 222 223
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
224
                RUNTIME_iteration_pop(morse);
225 226 227 228 229 230 231 232 233
            }
        }
    }
    /*
     *  MorseRight / MorseConjTrans
     */
    else {
        if (trans == MorseConjTrans) {
            for (k = minMT-1; k >= 0; k--) {
234
                RUNTIME_iteration_push(morse, k);
235

Mathieu Faverge's avatar
Mathieu Faverge committed
236 237
                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
238 239 240 241 242 243 244 245
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
                for (n = B->nt-1; n > k; n--) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    ldan = BLKLDD(A, n);
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
246 247 248 249

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

250
                        /* TS kernel */
251
                        MORSE_TASK_ztpmqrt(
252 253
                            &options,
                            side, trans,
254
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
255
                            A(n, k), ldan,
256 257 258
                            T(n, k), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
259 260
                    }
                }
261
#if defined(CHAMELEON_COPY_DIAG)
262 263 264 265
                MORSE_TASK_zlacpy(
                    &options,
                    MorseLower, tempkn, tempkmin, A->nb,
                    A(k, k), ldak,
266
                    D(k), ldak );
Mathieu Faverge's avatar
Mathieu Faverge committed
267
#if defined(CHAMELEON_USE_CUDA)
268 269 270 271
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempkn, tempkmin,
                    0., 1.,
272
                    D(k), ldak );
273
#endif
274 275 276 277
#endif
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
278 279 280 281

                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );

282 283 284 285
                    MORSE_TASK_zunmqr(
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
286
                        D(k), ldak,
287 288 289
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
290 291

                RUNTIME_iteration_pop(morse);
292 293 294 295 296 297 298
            }
        }
        /*
         *  MorseRight / MorseNoTrans
         */
        else {
            for (k = 0; k < minMT; k++) {
299
                RUNTIME_iteration_push(morse, k);
300

301 302 303
                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
304
#if defined(CHAMELEON_COPY_DIAG)
305 306 307 308
                MORSE_TASK_zlacpy(
                    &options,
                    MorseLower, tempkn, tempkmin, A->nb,
                    A(k, k), ldak,
309
                    D(k), ldak );
Mathieu Faverge's avatar
Mathieu Faverge committed
310
#if defined(CHAMELEON_USE_CUDA)
311 312 313 314
                MORSE_TASK_zlaset(
                    &options,
                    MorseUpper, tempkn, tempkmin,
                    0., 1.,
315
                    D(k), ldak );
316
#endif
317 318 319 320 321 322 323 324
#endif
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
                    MORSE_TASK_zunmqr(
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
325
                        D(k), ldak,
326 327 328 329 330 331 332 333 334
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
                for (n = k+1; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    ldan = BLKLDD(A, n);
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
335 336 337 338

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

339
                        /* TS kernel */
340
                        MORSE_TASK_ztpmqrt(
341 342
                            &options,
                            side, trans,
343
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
344
                            A(n, k), ldan,
345 346 347
                            T(n, k), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
348 349
                    }
                }
350

351 352 353 354 355 356
                /* Restore the original location of the tiles */
                for (m = 0; m < B->mt; m++) {
                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );
                }

357
                RUNTIME_iteration_pop(morse);
358 359 360
            }
        }
    }
361

362 363 364
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
}