pzunmlq.c 13.1 KB
Newer Older
1
/**
2 3
 *
 * @file pzunmlq.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @brief Chameleon zunmlq parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
Mathieu Faverge's avatar
Mathieu Faverge committed
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22 23 24 25
 * @author Hatem Ltaief
 * @author Jakub Kurzak
 * @author Azzam Haidar
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
26
 */
27
#include "control/common.h"
28 29 30 31

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n
#define T(m,n) T,  m,  n
32
#define D(k)   D,  k,  k
33

34
/**
35
 *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
36
 */
37 38 39
void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                        CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
40
{
Mathieu Faverge's avatar
Mathieu Faverge committed
41
    CHAM_context_t *chamctxt;
Mathieu Faverge's avatar
Mathieu Faverge committed
42
    RUNTIME_option_t options;
43 44 45 46
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
Mathieu Faverge's avatar
Mathieu Faverge committed
47
    int ldak, ldbk, ldbm, lddk;
48 49 50
    int tempmm, tempnn, tempkn, tempkm, tempkmin;
    int ib, minMT, minM;

Mathieu Faverge's avatar
Mathieu Faverge committed
51
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
52
    if (sequence->status != CHAMELEON_SUCCESS)
53
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
54
    RUNTIME_options_init(&options, chamctxt, sequence, request);
55

Mathieu Faverge's avatar
Mathieu Faverge committed
56
    ib = CHAMELEON_IB;
57 58 59 60 61 62 63 64 65

    if (A->m > A->n) {
        minM  = A->n;
        minMT = A->nt;
    } else {
        minM  = A->m;
        minMT = A->mt;
    }

66 67 68
    if ( D == NULL ) {
        D    = A;
        genD = 0;
69 70
    }

71
    /*
72 73
     * zunmlq = A->mb * ib
     * ztsmlq = A->mb * ib
74
     */
75
    ws_worker = A->mb * ib;
76

Mathieu Faverge's avatar
Mathieu Faverge committed
77
#if defined(CHAMELEON_USE_CUDA)
78 79
    /* Worker space
     *
80 81
     * zunmlq = A->mb * ib
     * ztsmlq = 2 * A->mb * ib
82
     */
83
    ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 );
84 85
#endif

Mathieu Faverge's avatar
Mathieu Faverge committed
86 87
    ws_worker *= sizeof(CHAMELEON_Complex64_t);
    ws_host   *= sizeof(CHAMELEON_Complex64_t);
88 89 90

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

Mathieu Faverge's avatar
Mathieu Faverge committed
91 92
    if (side == ChamLeft ) {
        if (trans == ChamNoTrans) {
93
            /*
Mathieu Faverge's avatar
Mathieu Faverge committed
94
             *  ChamLeft / ChamNoTrans
95 96
             */
            for (k = 0; k < minMT; k++) {
Mathieu Faverge's avatar
Mathieu Faverge committed
97
                RUNTIME_iteration_push(chamctxt, k);
98

99
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
Mathieu Faverge's avatar
Mathieu Faverge committed
100
                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
101 102 103
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
Mathieu Faverge's avatar
Mathieu Faverge committed
104 105
                lddk = BLKLDD(D, k);

106 107 108
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
109
                        ChamUpper, tempkmin, tempkn, A->nb,
110
                        A(k, k), ldak,
Mathieu Faverge's avatar
Mathieu Faverge committed
111
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
112
#if defined(CHAMELEON_USE_CUDA)
113 114
                    INSERT_TASK_zlaset(
                        &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
115
                        ChamLower, tempkmin, tempkn,
116
                        0., 1.,
Mathieu Faverge's avatar
Mathieu Faverge committed
117
                        D(k), lddk );
118
#endif
119
                }
120 121
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
Mathieu Faverge's avatar
Mathieu Faverge committed
122
                    INSERT_TASK_zunmlq(
123 124 125
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
Mathieu Faverge's avatar
Mathieu Faverge committed
126
                        D(k),    lddk,
127 128 129
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
130

Mathieu Faverge's avatar
Mathieu Faverge committed
131 132
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
133

134 135 136 137 138
                for (m = k+1; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
139 140 141 142

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

143
                        /* TS kernel */
Mathieu Faverge's avatar
Mathieu Faverge committed
144
                        INSERT_TASK_ztpmlqt(
145 146
                            &options,
                            side, trans,
147
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
148
                            A(k, m), ldak,
149 150 151
                            T(k, m), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
152
                    }
153

Mathieu Faverge's avatar
Mathieu Faverge committed
154 155
                    RUNTIME_data_flush( sequence, A(k, m) );
                    RUNTIME_data_flush( sequence, T(k, m) );
156
                }
157

158 159 160 161 162 163
                /* Restore the original location of the tiles */
                for (n = 0; n < B->nt; n++) {
                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );
                }

Mathieu Faverge's avatar
Mathieu Faverge committed
164
                RUNTIME_iteration_pop(chamctxt);
165 166
            }
        }
167
        /*
Mathieu Faverge's avatar
Mathieu Faverge committed
168
         *  ChamLeft / ChamConjTrans
169
         */
170 171
        else {
            for (k = minMT-1; k >= 0; k--) {
Mathieu Faverge's avatar
Mathieu Faverge committed
172
                RUNTIME_iteration_push(chamctxt, k);
173

Mathieu Faverge's avatar
Mathieu Faverge committed
174
                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
175
                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
176 177 178
                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
Mathieu Faverge's avatar
Mathieu Faverge committed
179 180
                lddk = BLKLDD(D, k);

181 182 183 184
                for (m = B->mt-1; m > k; m--) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B->nt; n++) {
185 186 187 188 189
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;

                        RUNTIME_data_migrate( sequence, B(k, n),
                                              B->get_rankof( B, m, n ) );

190
                        /* TS kernel */
Mathieu Faverge's avatar
Mathieu Faverge committed
191
                        INSERT_TASK_ztpmlqt(
192 193
                            &options,
                            side, trans,
194
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
195
                            A(k, m), ldak,
196 197 198
                            T(k, m), T->mb,
                            B(k, n), ldbk,
                            B(m, n), ldbm);
199
                    }
200

Mathieu Faverge's avatar
Mathieu Faverge committed
201 202
                    RUNTIME_data_flush( sequence, A(k, m) );
                    RUNTIME_data_flush( sequence, T(k, m) );
203
                }
204 205 206
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
207
                        ChamUpper, tempkmin, tempkn, A->nb,
208
                        A(k, k), ldak,
Mathieu Faverge's avatar
Mathieu Faverge committed
209
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
210
#if defined(CHAMELEON_USE_CUDA)
211 212
                    INSERT_TASK_zlaset(
                        &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
213
                        ChamLower, tempkmin, tempkn,
214
                        0., 1.,
Mathieu Faverge's avatar
Mathieu Faverge committed
215
                        D(k), lddk );
216
#endif
217
                }
218 219
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
220 221 222 223

                    RUNTIME_data_migrate( sequence, B(k, n),
                                          B->get_rankof( B, k, n ) );

Mathieu Faverge's avatar
Mathieu Faverge committed
224
                    INSERT_TASK_zunmlq(
225 226 227
                        &options,
                        side, trans,
                        tempkm, tempnn, tempkmin, ib, T->nb,
Mathieu Faverge's avatar
Mathieu Faverge committed
228
                        D(k),    lddk,
229 230 231
                        T(k, k), T->mb,
                        B(k, n), ldbk);
                }
Mathieu Faverge's avatar
Mathieu Faverge committed
232 233
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
Mathieu Faverge's avatar
Mathieu Faverge committed
234
                RUNTIME_iteration_pop(chamctxt);
235 236 237
            }
        }
    }
238
    /*
Mathieu Faverge's avatar
Mathieu Faverge committed
239
     *  ChamRight / ChamNoTrans
240
     */
241
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
242
        if (trans == ChamNoTrans) {
243
            for (k = minMT-1; k >= 0; k--) {
Mathieu Faverge's avatar
Mathieu Faverge committed
244
                RUNTIME_iteration_push(chamctxt, k);
245

246 247
                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
248
                ldak = BLKLDD(A, k);
Mathieu Faverge's avatar
Mathieu Faverge committed
249 250
                lddk = BLKLDD(D, k);

251 252 253 254 255
                for (n = B->nt-1; n > k; n--) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
256 257 258 259

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

260
                        /* TS kernel */
Mathieu Faverge's avatar
Mathieu Faverge committed
261
                        INSERT_TASK_ztpmlqt(
262 263
                            &options,
                            side, trans,
264
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
265
                            A(k, n), ldak,
266 267 268
                            T(k, n), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
269
                    }
270

Mathieu Faverge's avatar
Mathieu Faverge committed
271 272
                    RUNTIME_data_flush( sequence, A(k, n) );
                    RUNTIME_data_flush( sequence, T(k, n) );
273
                }
274 275 276 277 278
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
                        ChamUpper, tempkmin, tempkn, A->nb,
                        A(k, k), ldak,
Mathieu Faverge's avatar
Mathieu Faverge committed
279
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
280
#if defined(CHAMELEON_USE_CUDA)
281 282 283 284
                    INSERT_TASK_zlaset(
                        &options,
                        ChamLower, tempkmin, tempkn,
                        0., 1.,
Mathieu Faverge's avatar
Mathieu Faverge committed
285
                        D(k), lddk );
286
#endif
287
                }
288 289 290
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
291 292 293 294

                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );

Mathieu Faverge's avatar
Mathieu Faverge committed
295
                    INSERT_TASK_zunmlq(
296 297 298
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
Mathieu Faverge's avatar
Mathieu Faverge committed
299
                        D(k),    lddk,
300 301 302
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
303

Mathieu Faverge's avatar
Mathieu Faverge committed
304 305
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
306

Mathieu Faverge's avatar
Mathieu Faverge committed
307
                RUNTIME_iteration_pop(chamctxt);
308 309
            }
        }
310
        /*
Mathieu Faverge's avatar
Mathieu Faverge committed
311
         *  ChamRight / ChamConjTrans
312
         */
313 314
        else {
            for (k = 0; k < minMT; k++) {
Mathieu Faverge's avatar
Mathieu Faverge committed
315
                RUNTIME_iteration_push(chamctxt, k);
316

317
                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
318 319
                tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
                ldak = BLKLDD(A, k);
Mathieu Faverge's avatar
Mathieu Faverge committed
320 321
                lddk = BLKLDD(D, k);

322 323 324 325 326
                if ( genD ) {
                    INSERT_TASK_zlacpy(
                        &options,
                        ChamUpper, tempkmin, tempkn, A->nb,
                        A(k, k), ldak,
Mathieu Faverge's avatar
Mathieu Faverge committed
327
                        D(k),    lddk );
Mathieu Faverge's avatar
Mathieu Faverge committed
328
#if defined(CHAMELEON_USE_CUDA)
329 330 331 332
                    INSERT_TASK_zlaset(
                        &options,
                        ChamLower, tempkmin, tempkn,
                        0., 1.,
Mathieu Faverge's avatar
Mathieu Faverge committed
333
                        D(k), lddk );
334
#endif
335
                }
336 337 338
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                    ldbm = BLKLDD(B, m);
Mathieu Faverge's avatar
Mathieu Faverge committed
339
                    INSERT_TASK_zunmlq(
340 341 342
                        &options,
                        side, trans,
                        tempmm, tempkn, tempkmin, ib, T->nb,
Mathieu Faverge's avatar
Mathieu Faverge committed
343
                        D(k),    lddk,
344 345 346
                        T(k, k), T->mb,
                        B(m, k), ldbm);
                }
347

Mathieu Faverge's avatar
Mathieu Faverge committed
348 349
                RUNTIME_data_flush( sequence, D(k)    );
                RUNTIME_data_flush( sequence, T(k, k) );
350

351 352 353 354 355
                for (n = k+1; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                        ldbm = BLKLDD(B, m);
356 357 358 359

                        RUNTIME_data_migrate( sequence, B(m, k),
                                              B->get_rankof( B, m, n ) );

360
                        /* TS kernel */
Mathieu Faverge's avatar
Mathieu Faverge committed
361
                        INSERT_TASK_ztpmlqt(
362 363
                            &options,
                            side, trans,
364
                            tempmm, tempnn, tempkmin, 0, ib, T->nb,
365
                            A(k, n), ldak,
366 367 368
                            T(k, n), T->mb,
                            B(m, k), ldbm,
                            B(m, n), ldbm);
369
                    }
370

Mathieu Faverge's avatar
Mathieu Faverge committed
371 372
                    RUNTIME_data_flush( sequence, A(k, n) );
                    RUNTIME_data_flush( sequence, T(k, n) );
373
                }
374

375 376 377 378 379 380
                /* Restore the original location of the tiles */
                for (m = 0; m < B->mt; m++) {
                    RUNTIME_data_migrate( sequence, B(m, k),
                                          B->get_rankof( B, m, k ) );
                }

Mathieu Faverge's avatar
Mathieu Faverge committed
381
                RUNTIME_iteration_pop(chamctxt);
382 383 384
            }
        }
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
385

386
    RUNTIME_options_ws_free(&options);
Mathieu Faverge's avatar
Mathieu Faverge committed
387
    RUNTIME_options_finalize(&options, chamctxt);
388
}