pzunmlq.c 13.1 KB
Newer Older
1
/**
2 3
 *
 * @file pzunmlq.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zunmlq parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22
 * @author Hatem Ltaief
 * @author Jakub Kurzak
 * @author Azzam Haidar
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
23
 * @date 2018-11-09
24 25
 * @precisions normal z -> s d c
 *
26
 */
27
#include "control/common.h"
28 29 30 31

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n
#define T(m,n) T,  m,  n
32
#define D(k)   D,  k,  k
33

34
/**
35
 *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
36
 */
37 38 39
void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                        CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
40
{
Mathieu Faverge's avatar
Mathieu Faverge committed
41
    CHAM_context_t *chamctxt;
42
    RUNTIME_option_t options;
43 44 45 46
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
47
    int ldak, ldbk, ldbm, lddk;
48 49 50
    int tempmm, tempnn, tempkn, tempkm, tempkmin;
    int ib, minMT, minM;

Mathieu Faverge's avatar
Mathieu Faverge committed
51
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
52
    if (sequence->status != CHAMELEON_SUCCESS) {
53
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
54
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
55
    RUNTIME_options_init(&options, chamctxt, sequence, request);
56

57
    ib = CHAMELEON_IB;
58 59 60 61 62 63 64 65 66

    if (A->m > A->n) {
        minM  = A->n;
        minMT = A->nt;
    } else {
        minM  = A->m;
        minMT = A->mt;
    }

67 68 69
    if ( D == NULL ) {
        D    = A;
        genD = 0;
70 71
    }

72
    /*
73 74
     * zunmlq  = A->mb * ib
     * ztpmlqt = A->mb * ib
75
     */
76
    ws_worker = A->mb * ib;
77

Mathieu Faverge's avatar
Mathieu Faverge committed
78
#if defined(CHAMELEON_USE_CUDA)
79 80
    /* Worker space
     *
81 82
     * zunmlq  =     A->mb * ib
     * ztpmlqt = 2 * A->mb * ib
83
     */
84
    ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 );
85 86
#endif

87 88
    ws_worker *= sizeof(CHAMELEON_Complex64_t);
    ws_host   *= sizeof(CHAMELEON_Complex64_t);
89 90 91

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

92 93
    if (side == ChamLeft ) {
        if (trans == ChamNoTrans) {
94
            /*
95
             *  ChamLeft / ChamNoTrans
96 97
             */
            for (k = 0; k < minMT; k++) {
Mathieu Faverge's avatar
Mathieu Faverge committed
98
                RUNTIME_iteration_push(chamctxt, k);
99

100
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
101
                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
102 103 104
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
105 106
                lddk = BLKLDD(D, k);

107 108 109
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
110
                        ChamUpper, tempkmin, tempkn, A->nb,
111
                        A(k, k), ldak,
112
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
113
#if defined(CHAMELEON_USE_CUDA)
114 115
                    INSERT_TASK_zlaset(
                        &options,
116
                        ChamLower, tempkmin, tempkn,
117
                        0., 1.,
118
                        D(k), lddk );
119
#endif
120
                }
121 122
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
123
                    INSERT_TASK_zunmlq(
124 125 126
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
127
                        D(k),    lddk,
128 129 130
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
131

Mathieu Faverge's avatar
Mathieu Faverge committed
132 133
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
134

135 136 137 138 139
                for (m = k+1; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
140 141 142 143

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

144
                        /* TS kernel */
145
                        INSERT_TASK_ztpmlqt(
146 147
                            &options,
                            side, trans,
148
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
149
                            A(k, m), ldak,
150 151 152
                            T(k, m), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
153
                    }
154

Mathieu Faverge's avatar
Mathieu Faverge committed
155 156
                    RUNTIME_data_flush( sequence, A(k, m) );
                    RUNTIME_data_flush( sequence, T(k, m) );
157
                }
158

159 160 161 162 163 164
                /* Restore the original location of the tiles */
                for (n = 0; n < B->nt; n++) {
                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );
                }

Mathieu Faverge's avatar
Mathieu Faverge committed
165
                RUNTIME_iteration_pop(chamctxt);
166 167
            }
        }
168
        /*
169
         *  ChamLeft / ChamConjTrans
170
         */
171 172
        else {
            for (k = minMT-1; k >= 0; k--) {
Mathieu Faverge's avatar
Mathieu Faverge committed
173
                RUNTIME_iteration_push(chamctxt, k);
174

175
                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
176
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
177 178 179
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
180 181
                lddk = BLKLDD(D, k);

182 183 184 185
                for (m = B->mt-1; m > k; m--) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
186 187 188 189 190
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

191
                        /* TS kernel */
192
                        INSERT_TASK_ztpmlqt(
193 194
                            &options,
                            side, trans,
195
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
196
                            A(k, m), ldak,
197 198 199
                            T(k, m), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
200
                    }
201

Mathieu Faverge's avatar
Mathieu Faverge committed
202 203
                    RUNTIME_data_flush( sequence, A(k, m) );
                    RUNTIME_data_flush( sequence, T(k, m) );
204
                }
205 206 207
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
208
                        ChamUpper, tempkmin, tempkn, A->nb,
209
                        A(k, k), ldak,
210
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
211
#if defined(CHAMELEON_USE_CUDA)
212 213
                    INSERT_TASK_zlaset(
                        &options,
214
                        ChamLower, tempkmin, tempkn,
215
                        0., 1.,
216
                        D(k), lddk );
217
#endif
218
                }
219 220
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
221 222 223 224

                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );

225
                    INSERT_TASK_zunmlq(
226 227 228
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
229
                        D(k),    lddk,
230 231 232
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
Mathieu Faverge's avatar
Mathieu Faverge committed
233 234
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
Mathieu Faverge's avatar
Mathieu Faverge committed
235
                RUNTIME_iteration_pop(chamctxt);
236 237 238
            }
        }
    }
239
    /*
240
     *  ChamRight / ChamNoTrans
241
     */
242
    else {
243
        if (trans == ChamNoTrans) {
244
            for (k = minMT-1; k >= 0; k--) {
Mathieu Faverge's avatar
Mathieu Faverge committed
245
                RUNTIME_iteration_push(chamctxt, k);
246

247 248
                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
249
                ldak = BLKLDD(A, k);
250 251
                lddk = BLKLDD(D, k);

252 253 254 255 256
                for (n = B->nt-1; n > k; n--) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
257 258 259 260

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

261
                        /* TS kernel */
262
                        INSERT_TASK_ztpmlqt(
263 264
                            &options,
                            side, trans,
265
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
266
                            A(k, n), ldak,
267 268 269
                            T(k, n), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
270
                    }
271

Mathieu Faverge's avatar
Mathieu Faverge committed
272 273
                    RUNTIME_data_flush( sequence, A(k, n) );
                    RUNTIME_data_flush( sequence, T(k, n) );
274
                }
275 276 277 278 279
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
                        ChamUpper, tempkmin, tempkn, A->nb,
                        A(k, k), ldak,
280
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
281
#if defined(CHAMELEON_USE_CUDA)
282 283 284 285
                    INSERT_TASK_zlaset(
                        &options,
                        ChamLower, tempkmin, tempkn,
                        0., 1.,
286
                        D(k), lddk );
287
#endif
288
                }
289 290 291
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
292 293 294 295

                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );

296
                    INSERT_TASK_zunmlq(
297 298 299
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
300
                        D(k),    lddk,
301 302 303
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
304

Mathieu Faverge's avatar
Mathieu Faverge committed
305 306
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
307

Mathieu Faverge's avatar
Mathieu Faverge committed
308
                RUNTIME_iteration_pop(chamctxt);
309 310
            }
        }
311
        /*
312
         *  ChamRight / ChamConjTrans
313
         */
314 315
        else {
            for (k = 0; k < minMT; k++) {
Mathieu Faverge's avatar
Mathieu Faverge committed
316
                RUNTIME_iteration_push(chamctxt, k);
317

318
                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
319 320
                tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
                ldak = BLKLDD(A, k);
321 322
                lddk = BLKLDD(D, k);

323 324 325 326 327
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
                        ChamUpper, tempkmin, tempkn, A->nb,
                        A(k, k), ldak,
328
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
329
#if defined(CHAMELEON_USE_CUDA)
330 331 332 333
                    INSERT_TASK_zlaset(
                        &options,
                        ChamLower, tempkmin, tempkn,
                        0., 1.,
334
                        D(k), lddk );
335
#endif
336
                }
337 338 339
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
340
                    INSERT_TASK_zunmlq(
341 342 343
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
344
                        D(k),    lddk,
345 346 347
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
348

Mathieu Faverge's avatar
Mathieu Faverge committed
349 350
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
351

352 353 354 355 356
                for (n = k+1; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
357 358 359 360

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

361
                        /* TS kernel */
362
                        INSERT_TASK_ztpmlqt(
363 364
                            &options,
                            side, trans,
365
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
366
                            A(k, n), ldak,
367 368 369
                            T(k, n), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
370
                    }
371

Mathieu Faverge's avatar
Mathieu Faverge committed
372 373
                    RUNTIME_data_flush( sequence, A(k, n) );
                    RUNTIME_data_flush( sequence, T(k, n) );
374
                }
375

376 377 378 379 380 381
                /* Restore the original location of the tiles */
                for (m = 0; m < B->mt; m++) {
                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );
                }

Mathieu Faverge's avatar
Mathieu Faverge committed
382
                RUNTIME_iteration_pop(chamctxt);
383 384 385
            }
        }
    }
386

387
    RUNTIME_options_ws_free(&options);
Mathieu Faverge's avatar
Mathieu Faverge committed
388
    RUNTIME_options_finalize(&options, chamctxt);
389
}