pztradd.c 6.22 KB
Newer Older
1
/**
2 3
 *
 * @file pztradd.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon ztradd parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21
 * @author Emmanuel Agullo
 * @author Mathieu Faverge
 * @date 2011-11-03
 * @precisions normal z -> s d c
 *
22
 */
23 24 25 26 27
#include "control/common.h"

#define A(m, n) A,  m,  n
#define B(m, n) B,  m,  n

28
/**
29
 *  Parallel tile matrix-matrix multiplication - dynamic scheduling
30
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
31
void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
32 33 34
                   CHAMELEON_Complex64_t alpha, CHAM_desc_t *A,
                   CHAMELEON_Complex64_t beta,  CHAM_desc_t *B,
                   RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
35
{
Mathieu Faverge's avatar
Mathieu Faverge committed
36
    CHAM_context_t *chamctxt;
37
    RUNTIME_option_t options;
38 39 40 41 42

    int tempmm, tempnn, tempmn, tempnm;
    int m, n;
    int ldam, ldan, ldbm, ldbn;

Mathieu Faverge's avatar
Mathieu Faverge committed
43
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
44
    if (sequence->status != CHAMELEON_SUCCESS) {
45
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
46
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
47
    RUNTIME_options_init(&options, chamctxt, sequence, request);
48 49

    switch(uplo){
50 51
    case ChamLower:
        if (trans == ChamNoTrans) {
52
            for (n = 0; n < chameleon_min(B->mt,B->nt); n++) {
53 54 55 56 57
                tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb;
                tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                ldan = BLKLDD(A, n);
                ldbn = BLKLDD(B, n);

58
                INSERT_TASK_ztradd(
59 60 61 62 63 64 65 66 67 68
                    &options,
                    uplo, trans, tempnm, tempnn, B->mb,
                    alpha, A(n, n), ldan,
                    beta,  B(n, n), ldbn);

                for (m = n+1; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                    ldam = BLKLDD(A, m);
                    ldbm = BLKLDD(B, m);

69
                    INSERT_TASK_zgeadd(
70 71 72 73 74 75 76 77
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(m, n), ldam,
                        beta,  B(m, n), ldbm);
                }
            }
        }
        else {
78
            for (n = 0; n < chameleon_min(B->mt,B->nt); n++) {
79 80 81 82 83
                tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb;
                tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                ldan = BLKLDD(A, n);
                ldbn = BLKLDD(B, n);

84
                INSERT_TASK_ztradd(
85 86 87 88 89 90 91 92 93
                    &options,
                    uplo, trans, tempnm, tempnn, B->mb,
                    alpha, A(n, n), ldan,
                    beta,  B(n, n), ldbn);

                for (m = n+1; m < B->mt; m++) {
                    tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                    ldbm = BLKLDD(B, m);

94
                    INSERT_TASK_zgeadd(
95 96 97 98 99 100 101 102
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(n, m), ldan,
                        beta,  B(m, n), ldbm);
                }
            }
        }
        break;
103 104
    case ChamUpper:
        if (trans == ChamNoTrans) {
105
            for (m = 0; m < chameleon_min(B->mt,B->nt); m++) {
106 107 108 109 110
                tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb;
                ldam = BLKLDD(A, m);
                ldbm = BLKLDD(B, m);

111
                INSERT_TASK_ztradd(
112 113 114 115 116 117 118 119
                    &options,
                    uplo, trans, tempmm, tempmn, B->mb,
                    alpha, A(m, m), ldam,
                    beta,  B(m, m), ldbm);

                for (n = m+1; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;

120
                    INSERT_TASK_zgeadd(
121 122 123 124 125 126 127 128
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(m, n), ldam,
                        beta,  B(m, n), ldbm);
                }
            }
        }
        else {
129
            for (m = 0; m < chameleon_min(B->mt,B->nt); m++) {
130 131 132 133 134
                tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb;
                ldam = BLKLDD(A, m);
                ldbm = BLKLDD(B, m);

135
                INSERT_TASK_ztradd(
136 137 138 139 140 141 142 143 144
                    &options,
                    uplo, trans, tempmm, tempmn, B->mb,
                    alpha, A(m, m), ldam,
                    beta,  B(m, m), ldbm);

                for (n = m+1; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    ldan = BLKLDD(A, n);

145
                    INSERT_TASK_zgeadd(
146 147 148 149 150 151 152 153
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(n, m), ldan,
                        beta,  B(m, n), ldbm);
                }
            }
        }
        break;
154
    case ChamUpperLower:
155
    default:
156
        if (trans == ChamNoTrans) {
157 158 159 160 161 162 163 164
            for (m = 0; m < B->mt; m++) {
                tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                ldam = BLKLDD(A, m);
                ldbm = BLKLDD(B, m);

                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;

165
                    INSERT_TASK_zgeadd(
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(m, n), ldam,
                        beta,  B(m, n), ldbm);
                }
            }
        }
        else {
            for (m = 0; m < B->mt; m++) {
                tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                ldbm = BLKLDD(B, m);

                for (n = 0; n < B->nt; n++) {
                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                    ldan = BLKLDD(A, n);

182
                    INSERT_TASK_zgeadd(
183 184 185 186 187 188 189 190
                        &options,
                        trans, tempmm, tempnn, B->mb,
                        alpha, A(n, m), ldan,
                        beta,  B(m, n), ldbm);
                }
            }
        }
    }
191

Mathieu Faverge's avatar
Mathieu Faverge committed
192
    RUNTIME_options_finalize(&options, chamctxt);
193
}