pztrsm.c 16.4 KB
Newer Older
1 2
/**
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
3 4
 * @copyright 2009-2015 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
5 6
 * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 *
 **/

/**
 *
 * @file pztrsm.c
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.5.0
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Jakub Kurzak
 * @author Hatem Ltaief
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
 **/
30
#include "control/common.h"
31 32 33

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n
Mathieu Faverge's avatar
Mathieu Faverge committed
34
/*******************************************************************************
35 36 37 38 39 40 41 42 43 44
 *  Parallel tile triangular solve - dynamic scheduling
 **/
void morse_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum trans, MORSE_enum diag,
                         MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B,
                         MORSE_sequence_t *sequence, MORSE_request_t *request)
{
    MORSE_context_t *morse;
    MORSE_option_t options;

    int k, m, n;
PRUVOST Florent's avatar
PRUVOST Florent committed
45
    int ldak, ldam, ldan, ldbk, ldbm;
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    int tempkm, tempkn, tempmm, tempnn;

    MORSE_Complex64_t zone       = (MORSE_Complex64_t) 1.0;
    MORSE_Complex64_t mzone      = (MORSE_Complex64_t)-1.0;
    MORSE_Complex64_t minvalpha  = (MORSE_Complex64_t)-1.0 / alpha;
    MORSE_Complex64_t lalpha;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);
    /*
     *  MorseLeft / MorseUpper / MorseNoTrans
     */
    if (side == MorseLeft) {
        if (uplo == MorseUpper) {
            if (trans == MorseNoTrans) {
                for (k = 0; k < B->mt; k++) {
                    tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
65 66
                    ldak = BLKLDD(A, B->mt-1-k);
                    ldbk = BLKLDD(B, B->mt-1-k);
67 68 69 70 71 72 73
                    lalpha = k == 0 ? alpha : zone;
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempkm, tempnn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
74 75
                            lalpha, A(B->mt-1-k, B->mt-1-k), ldak,  /* lda * tempkm */
                                    B(B->mt-1-k,        n), ldbk); /* ldb * tempnn */
76
                    }
77
                    RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
78
                    for (m = k+1; m < B->mt; m++) {
PRUVOST Florent's avatar
PRUVOST Florent committed
79 80
                        ldam = BLKLDD(A, B->mt-1-m);
                        ldbm = BLKLDD(B, B->mt-1-m);
81 82 83 84 85 86
                        for (n = 0; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseNoTrans,
                                B->mb, tempnn, tempkm, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
87 88 89
                                mzone,  A(B->mt-1-m, B->mt-1-k), ldam,
                                        B(B->mt-1-k, n       ), ldbk,
                                lalpha, B(B->mt-1-m, n       ), ldbm);
90
                        }
91
                        RUNTIME_data_flush( sequence, A(B->mt-1-m, B->mt-1-k) );
92 93
                    }
                    for (n = 0; n < B->nt; n++) {
94
                        RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
95 96 97 98 99 100 101 102 103
                    }
                }
            }
            /*
             *  MorseLeft / MorseUpper / Morse[Conj]Trans
             */
            else {
                for (k = 0; k < B->mt; k++) {
                    tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
104 105
                    ldak = BLKLDD(A, k);
                    ldbk = BLKLDD(B, k);
106 107 108 109 110 111 112
                    lalpha = k == 0 ? alpha : zone;
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempkm, tempnn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
113 114
                            lalpha, A(k, k), ldak,
                                    B(k, n), ldbk);
115
                    }
116
                    RUNTIME_data_flush( sequence, A(k, k) );
117 118
                    for (m = k+1; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
119
                        ldbm = BLKLDD(B, m);
120 121 122 123 124 125
                        for (n = 0; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            MORSE_TASK_zgemm(
                                &options,
                                trans, MorseNoTrans,
                                tempmm, tempnn, B->mb, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
126 127 128
                                mzone,  A(k, m), ldak,
                                        B(k, n), ldbk,
                                lalpha, B(m, n), ldbm);
129
                        }
130
                        RUNTIME_data_flush( sequence, A(k, m) );
131 132
                    }
                    for (n = 0; n < B->nt; n++) {
133
                        RUNTIME_data_flush( sequence, B(k, n) );
134
                    }
PRUVOST Florent's avatar
PRUVOST Florent committed
135

136 137 138 139 140 141 142 143 144 145
                }
            }
        }
        /*
         *  MorseLeft / MorseLower / MorseNoTrans
         */
        else {
            if (trans == MorseNoTrans) {
                for (k = 0; k < B->mt; k++) {
                    tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
146 147
                    ldak = BLKLDD(A, k);
                    ldbk = BLKLDD(B, k);
148 149 150 151 152 153 154
                    lalpha = k == 0 ? alpha : zone;
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempkm, tempnn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
155 156
                            lalpha, A(k, k), ldak,
                                    B(k, n), ldbk);
157
                    }
158
                    RUNTIME_data_flush( sequence, A(k, k) );
159 160
                    for (m = k+1; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
161 162
                        ldam = BLKLDD(A, m);
                        ldbm = BLKLDD(B, m);
163 164 165 166 167 168
                        for (n = 0; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseNoTrans,
                                tempmm, tempnn, B->mb, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
169 170 171
                                mzone,  A(m, k), ldam,
                                        B(k, n), ldbk,
                                lalpha, B(m, n), ldbm);
172
                        }
173
                        RUNTIME_data_flush( sequence, A(m, k) );
174 175
                    }
                    for (n = 0; n < B->nt; n++) {
176
                        RUNTIME_data_flush( sequence, B(k, n) );
177 178 179 180 181 182 183 184 185
                    }
                }
            }
            /*
             *  MorseLeft / MorseLower / Morse[Conj]Trans
             */
            else {
                for (k = 0; k < B->mt; k++) {
                    tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
186 187
                    ldak = BLKLDD(A, B->mt-1-k);
                    ldbk = BLKLDD(B, B->mt-1-k);
188 189 190 191 192 193 194
                    lalpha = k == 0 ? alpha : zone;
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempkm, tempnn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
195 196
                            lalpha, A(B->mt-1-k, B->mt-1-k), ldak,
                                    B(B->mt-1-k,        n), ldbk);
197
                    }
198
                    RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
199 200
                    for (m = k+1; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
201
                        ldbm = BLKLDD(B, B->mt-1-m);
202 203 204 205 206 207
                        for (n = 0; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            MORSE_TASK_zgemm(
                                &options,
                                trans, MorseNoTrans,
                                B->mb, tempnn, tempkm, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
208 209 210
                                mzone,  A(B->mt-1-k, B->mt-1-m), ldak,
                                        B(B->mt-1-k, n       ), ldbk,
                                lalpha, B(B->mt-1-m, n       ), ldbm);
211
                        }
212
                        RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-m) );
213 214
                    }
                    for (n = 0; n < B->nt; n++) {
215
                        RUNTIME_data_flush( sequence, B(B->mt-1-k, n) );
216 217 218 219 220 221 222 223 224 225 226 227 228
                    }
                }
            }
        }
    }
    /*
     *  MorseRight / MorseUpper / MorseNoTrans
     */
    else {
        if (uplo == MorseUpper) {
            if (trans == MorseNoTrans) {
                for (k = 0; k < B->nt; k++) {
                    tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
PRUVOST Florent's avatar
PRUVOST Florent committed
229
                    ldak = BLKLDD(A, k);
230 231 232
                    lalpha = k == 0 ? alpha : zone;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
233
                        ldbm = BLKLDD(B, m);
234 235 236 237
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
238 239
                            lalpha, A(k, k), ldak,  /* lda * tempkn */
                                    B(m, k), ldbm); /* ldb * tempkn */
240
                    }
241
                    RUNTIME_data_flush( sequence, A(k, k) );
242 243
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
244
                        ldbm = BLKLDD(B, m);
245 246 247 248 249 250
                        for (n = k+1; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseNoTrans,
                                tempmm, tempnn, B->mb, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
251 252 253
                                mzone,  B(m, k), ldbm,  /* ldb * B->mb   */
                                        A(k, n), ldak,  /* lda * tempnn */
                                lalpha, B(m, n), ldbm); /* ldb * tempnn */
254
                        }
255
                        RUNTIME_data_flush( sequence, B(m, k) );
256 257
                    }
                    for (n = k+1; n < B->nt; n++) {
258
                        RUNTIME_data_flush( sequence, A(k, n) );
259 260 261 262 263 264 265 266 267
                    }
                }
            }
            /*
             *  MorseRight / MorseUpper / Morse[Conj]Trans
             */
            else {
                for (k = 0; k < B->nt; k++) {
                    tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
PRUVOST Florent's avatar
PRUVOST Florent committed
268
                    ldak = BLKLDD(A, B->nt-1-k);
269 270
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
271
                        ldbm = BLKLDD(B, m);
272 273 274 275
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
276 277
                            alpha, A(B->nt-1-k, B->nt-1-k), ldak,  /* lda * tempkn */
                                   B(       m, B->nt-1-k), ldbm); /* ldb * tempkn */
278
                        RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
279 280

                        for (n = k+1; n < B->nt; n++) {
PRUVOST Florent's avatar
PRUVOST Florent committed
281
                            ldan = BLKLDD(A, B->nt-1-n);
282 283 284 285
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, trans,
                                tempmm, B->nb, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
286 287 288
                                minvalpha, B(m,        B->nt-1-k), ldbm,  /* ldb  * tempkn */
                                           A(B->nt-1-n, B->nt-1-k), ldan, /* A->mb * tempkn (Never last row) */
                                zone,      B(m,        B->nt-1-n), ldbm); /* ldb  * B->nb   */
289
                        }
290
                        RUNTIME_data_flush( sequence, B(m,        B->nt-1-k) );
291 292
                    }
                    for (n = k+1; n < B->nt; n++) {
293
                        RUNTIME_data_flush( sequence, A(B->nt-1-n, B->nt-1-k) );
294 295 296 297 298 299 300 301 302 303 304
                    }
                }
            }
        }
        /*
         *  MorseRight / MorseLower / MorseNoTrans
         */
        else {
            if (trans == MorseNoTrans) {
                for (k = 0; k < B->nt; k++) {
                    tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
PRUVOST Florent's avatar
PRUVOST Florent committed
305
                    ldak = BLKLDD(A, B->nt-1-k);
306 307 308
                    lalpha = k == 0 ? alpha : zone;
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
309
                        ldbm = BLKLDD(B, m);
310 311 312 313
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
314 315
                            lalpha, A(B->nt-1-k, B->nt-1-k), ldak,  /* lda * tempkn */
                                    B(       m, B->nt-1-k), ldbm); /* ldb * tempkn */
316
                        RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
317 318 319 320 321 322

                        for (n = k+1; n < B->nt; n++) {
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, MorseNoTrans,
                                tempmm, B->nb, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
323 324 325
                                mzone,  B(m,        B->nt-1-k), ldbm,  /* ldb * tempkn */
                                        A(B->nt-1-k, B->nt-1-n), ldak,  /* lda * B->nb   */
                                lalpha, B(m,        B->nt-1-n), ldbm); /* ldb * B->nb   */
326
                        }
327
                        RUNTIME_data_flush( sequence, B(m,        B->nt-1-k) );
328 329
                    }
                    for (n = k+1; n < B->nt; n++) {
330
                        RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-n) );
331 332 333 334 335 336 337 338 339
                    }
                }
            }
            /*
             *  MorseRight / MorseLower / Morse[Conj]Trans
             */
            else {
                for (k = 0; k < B->nt; k++) {
                    tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
PRUVOST Florent's avatar
PRUVOST Florent committed
340
                    ldak = BLKLDD(A, k);
341 342
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
PRUVOST Florent's avatar
PRUVOST Florent committed
343
                        ldbm = BLKLDD(B, m);
344 345 346 347
                        MORSE_TASK_ztrsm(
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempkn, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
348 349
                            alpha, A(k, k), ldak,  /* lda * tempkn */
                                   B(m, k), ldbm); /* ldb * tempkn */
350
                        RUNTIME_data_flush( sequence, A(k, k) );
351 352 353 354 355 356 357 358

                        for (n = k+1; n < B->nt; n++) {
                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                            ldan = BLKLDD(A, n);
                            MORSE_TASK_zgemm(
                                &options,
                                MorseNoTrans, trans,
                                tempmm, tempnn, B->mb, A->mb,
PRUVOST Florent's avatar
PRUVOST Florent committed
359
                                minvalpha, B(m, k), ldbm,  /* ldb  * tempkn */
360
                                           A(n, k), ldan, /* ldan * tempkn */
PRUVOST Florent's avatar
PRUVOST Florent committed
361
                                zone,      B(m, n), ldbm); /* ldb  * tempnn */
362
                        }
363
                        RUNTIME_data_flush( sequence, B(m, k) );
364 365
                    }
                    for (n = k+1; n < B->nt; n++) {
366
                        RUNTIME_data_flush( sequence, A(n, k) );
367
                    }
PRUVOST Florent's avatar
PRUVOST Florent committed
368

369 370 371 372 373 374
                }
            }
        }
    }
    RUNTIME_options_finalize(&options, morse);
}