pztrmm.c 12.1 KB
Newer Older
1
/**
2 3
 *
 * @file pztrmm.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon ztrmm parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
23
 */
24
#include "control/common.h"
25 26 27 28 29 30

#define A(m,n) A,  m,  n
#define B(m,n) B,  m,  n



31
/**
32
 *  Parallel tile triangular matrix-matrix multiplication - dynamic scheduling
33
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
34
void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
35 36 37
                         cham_trans_t trans, cham_diag_t diag,
                         CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
38
{
Mathieu Faverge's avatar
Mathieu Faverge committed
39
    CHAM_context_t *chamctxt;
40
    RUNTIME_option_t options;
41 42

    int k, m, n;
43
    int ldak, ldam, ldan, ldbk, ldbm;
44 45
    int tempkm, tempkn, tempmm, tempnn;

46
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
47

Mathieu Faverge's avatar
Mathieu Faverge committed
48
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
49
    if (sequence->status != CHAMELEON_SUCCESS) {
50
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
51
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
52
    RUNTIME_options_init(&options, chamctxt, sequence, request);
53
    /*
54
     *  ChamLeft / ChamUpper / ChamNoTrans
55
     */
56 57 58
    if (side == ChamLeft) {
        if (uplo == ChamUpper) {
            if (trans == ChamNoTrans) {
59 60
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
61 62
                    ldbm = BLKLDD(B, m);
                    ldam = BLKLDD(A, m);
63 64
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
65
                        INSERT_TASK_ztrmm(
66 67 68
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
69 70
                            alpha, A(m, m), ldam,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
71 72 73 74

                        for (k = m+1; k < A->mt; k++) {
                            tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                            ldbk = BLKLDD(B, k);
75
                            INSERT_TASK_zgemm(
76
                                &options,
77
                                trans, ChamNoTrans,
78
                                tempmm, tempnn, tempkn, A->mb,
79
                                alpha, A(m, k), ldam,
80
                                       B(k, n), ldbk,
81
                                zone,  B(m, n), ldbm);
82 83 84 85 86
                        }
                    }
                }
            }
            /*
87
             *  ChamLeft / ChamUpper / Cham[Conj]Trans
88 89 90 91
             */
            else {
                for (m = B->mt-1; m > -1; m--) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
92 93
                    ldbm = BLKLDD(B, m);
                    ldam = BLKLDD(A, m);
94 95
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
96
                        INSERT_TASK_ztrmm(
97 98 99
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
100 101
                            alpha, A(m, m), ldam,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
102 103

                        for (k = 0; k < m; k++) {
104 105
                            ldbk = BLKLDD(B, k);
                            ldak = BLKLDD(A, k);
106
                            INSERT_TASK_zgemm(
107
                                &options,
108
                                trans, ChamNoTrans,
109
                                tempmm, tempnn, B->mb, A->mb,
110 111 112
                                alpha, A(k, m), ldak,
                                       B(k, n), ldbk,
                                zone,  B(m, n), ldbm);
113 114 115 116 117 118
                        }
                    }
                }
            }
        }
        /*
119
         *  ChamLeft / ChamLower / ChamNoTrans
120 121
         */
        else {
122
            if (trans == ChamNoTrans) {
123 124
                for (m = B->mt-1; m > -1; m--) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
125 126
                    ldbm = BLKLDD(B, m);
                    ldam = BLKLDD(A, m);
127 128
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
129
                        INSERT_TASK_ztrmm(
130 131 132
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
133 134
                            alpha, A(m, m), ldam,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
135 136

                        for (k = 0; k < m; k++) {
137
                            ldbk = BLKLDD(B, k);
138
                            INSERT_TASK_zgemm(
139
                                &options,
140
                                trans, ChamNoTrans,
141
                                tempmm, tempnn, B->mb, A->mb,
142 143 144
                                alpha, A(m, k), ldam,
                                       B(k, n), ldbk,
                                zone,  B(m, n), ldbm);
145 146 147 148 149
                        }
                    }
                }
            }
            /*
150
             *  ChamLeft / ChamLower / Cham[Conj]Trans
151 152 153 154
             */
            else {
                for (m = 0; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
155 156
                    ldbm = BLKLDD(B, m);
                    ldam = BLKLDD(A, m);
157 158
                    for (n = 0; n < B->nt; n++) {
                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
159
                        INSERT_TASK_ztrmm(
160 161 162
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
163 164
                            alpha, A(m, m), ldam,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
165 166 167 168 169

                        for (k = m+1; k < A->mt; k++) {
                            tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                            ldak = BLKLDD(A, k);
                            ldbk = BLKLDD(B, k);
170
                            INSERT_TASK_zgemm(
171
                                &options,
172
                                trans, ChamNoTrans,
173 174 175
                                tempmm, tempnn, tempkm, A->mb,
                                alpha, A(k, m), ldak,
                                       B(k, n), ldbk,
176
                                zone,  B(m, n), ldbm);
177 178 179 180 181 182 183
                        }
                    }
                }
            }
        }
    }
    /*
184
     *  ChamRight / ChamUpper / ChamNoTrans
185 186
     */
    else {
187 188
        if (uplo == ChamUpper) {
            if (trans == ChamNoTrans) {
189 190
                for (n = B->nt-1; n > -1; n--) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
191
                    ldan = BLKLDD(A, n);
192 193
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
194
                        ldbm = BLKLDD(B, m);
195
                        INSERT_TASK_ztrmm(
196 197 198
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
199 200
                            alpha, A(n, n), ldan,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
201 202

                        for (k = 0; k < n; k++) {
203
                            ldak = BLKLDD(A, k);
204
                            INSERT_TASK_zgemm(
205
                                &options,
206
                                ChamNoTrans, trans,
207
                                tempmm, tempnn, B->mb, A->mb,
208 209 210
                                alpha, B(m, k), ldbm,
                                       A(k, n), ldak,
                                zone,  B(m, n), ldbm);
211 212 213 214 215
                        }
                    }
                }
            }
            /*
216
             *  ChamRight / ChamUpper / Cham[Conj]Trans
217 218 219 220
             */
            else {
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
221
                    ldan = BLKLDD(A, n);
222 223
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
224
                        ldbm = BLKLDD(B, m);
225
                        INSERT_TASK_ztrmm(
226 227 228
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
229 230
                            alpha, A(n, n), ldan,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
231 232 233

                        for (k = n+1; k < A->mt; k++) {
                            tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
234
                            INSERT_TASK_zgemm(
235
                                &options,
236
                                ChamNoTrans, trans,
237
                                tempmm, tempnn, tempkn, A->mb,
238 239 240
                                alpha, B(m, k), ldbm,
                                       A(n, k), ldan,
                                zone,  B(m, n), ldbm);
241 242 243 244 245 246
                        }
                    }
                }
            }
        }
        /*
247
         *  ChamRight / ChamLower / ChamNoTrans
248 249
         */
        else {
250
            if (trans == ChamNoTrans) {
251 252
                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
253
                    ldan = BLKLDD(A, n);
254 255
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
256
                        ldbm = BLKLDD(B, m);
257
                        INSERT_TASK_ztrmm(
258 259 260
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
261 262
                            alpha, A(n, n), ldan,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
263 264 265 266

                        for (k = n+1; k < A->mt; k++) {
                            tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                            ldak = BLKLDD(A, k);
267
                            INSERT_TASK_zgemm(
268
                                &options,
269
                                ChamNoTrans, trans,
270
                                tempmm, tempnn, tempkn, A->mb,
271
                                alpha, B(m, k), ldbm,
272
                                       A(k, n), ldak,
273
                                zone,  B(m, n), ldbm);
274 275 276 277 278
                        }
                    }
                }
            }
            /*
279
             *  ChamRight / ChamLower / Cham[Conj]Trans
280 281 282 283
             */
            else {
                for (n = B->nt-1; n > -1; n--) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
284
                    ldan = BLKLDD(A, n);
285 286
                    for (m = 0; m < B->mt; m++) {
                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
287
                        ldbm = BLKLDD(B, m);
288
                        INSERT_TASK_ztrmm(
289 290 291
                            &options,
                            side, uplo, trans, diag,
                            tempmm, tempnn, A->mb,
292 293
                            alpha, A(n, n), ldan,  /* lda * tempkm */
                                   B(m, n), ldbm); /* ldb * tempnn */
294 295

                        for (k = 0; k < n; k++) {
296
                            INSERT_TASK_zgemm(
297
                                &options,
298
                                ChamNoTrans, trans,
299
                                tempmm, tempnn, B->mb, A->mb,
300 301 302
                                alpha, B(m, k), ldbm,
                                       A(n, k), ldan,
                                zone,  B(m, n), ldbm);
303 304 305 306 307 308
                        }
                    }
                }
            }
        }
    }
309

Mathieu Faverge's avatar
Mathieu Faverge committed
310
    RUNTIME_options_finalize(&options, chamctxt);
311
}