pzgemm.c 5.3 KB
Newer Older
1
/**
2 3
 *
 * @file pzgemm.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
PRUVOST Florent's avatar
PRUVOST Florent committed
7
 * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zgemm parallel algorithm
13
 *
PRUVOST Florent's avatar
PRUVOST Florent committed
14
 * @version 0.9.2
15
 * @comment This file has been automatically generated
PRUVOST Florent's avatar
PRUVOST Florent committed
16
 *          from Plasma 2.5.0 for CHAMELEON 0.9.2
17 18 19
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
PRUVOST Florent's avatar
PRUVOST Florent committed
20
 * @date 2014-11-16
21 22
 * @precisions normal z -> s d c
 *
23
 */
24
#include "control/common.h"
25 26 27 28

#define A(m, n) A,  m,  n
#define B(m, n) B,  m,  n
#define C(m, n) C,  m,  n
29
/**
30
 *  Parallel tile matrix-matrix multiplication - dynamic scheduling
31
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
32
void chameleon_pzgemm(cham_trans_t transA, cham_trans_t transB,
33 34 35
                         CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                         CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
36
{
Mathieu Faverge's avatar
Mathieu Faverge committed
37
    CHAM_context_t *chamctxt;
38
    RUNTIME_option_t options;
39 40 41 42 43

    int m, n, k;
    int ldam, ldak, ldbn, ldbk, ldcm;
    int tempmm, tempnn, tempkn, tempkm;

44 45
    CHAMELEON_Complex64_t zbeta;
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
46

Mathieu Faverge's avatar
Mathieu Faverge committed
47
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
48
    if (sequence->status != CHAMELEON_SUCCESS) {
49
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
50
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
51
    RUNTIME_options_init(&options, chamctxt, sequence, request);
52 53 54 55 56 57 58

    for (m = 0; m < C->mt; m++) {
        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
        ldcm = BLKLDD(C, m);
        for (n = 0; n < C->nt; n++) {
            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
            /*
59
             *  A: ChamNoTrans / B: ChamNoTrans
60
             */
61
            if (transA == ChamNoTrans) {
62
                ldam = BLKLDD(A, m);
63
                if (transB == ChamNoTrans) {
64 65 66 67
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                        ldbk = BLKLDD(B, k);
                        zbeta = k == 0 ? beta : zone;
68
                        INSERT_TASK_zgemm(
69 70 71 72 73 74 75 76 77
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkn, A->mb,
                            alpha, A(m, k), ldam,  /* lda * Z */
                                   B(k, n), ldbk,  /* ldb * Y */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
                /*
78
                 *  A: ChamNoTrans / B: Cham[Conj]Trans
79 80 81 82 83 84
                 */
                else {
                    ldbn = BLKLDD(B, n);
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                        zbeta = k == 0 ? beta : zone;
85
                        INSERT_TASK_zgemm(
86 87 88 89 90 91 92 93 94 95
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkn, A->mb,
                            alpha, A(m, k), ldam,  /* lda * Z */
                                   B(n, k), ldbn,  /* ldb * Z */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
            }
            /*
96
             *  A: Cham[Conj]Trans / B: ChamNoTrans
97 98
             */
            else {
99
                if (transB == ChamNoTrans) {
100 101 102 103 104
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
                        ldbk = BLKLDD(B, k);
                        zbeta = k == 0 ? beta : zone;
105
                        INSERT_TASK_zgemm(
106 107 108 109 110 111 112 113 114
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkm, A->mb,
                            alpha, A(k, m), ldak,  /* lda * X */
                                   B(k, n), ldbk,  /* ldb * Y */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
                /*
115
                 *  A: Cham[Conj]Trans / B: Cham[Conj]Trans
116 117 118 119 120 121 122
                 */
                else {
                    ldbn = BLKLDD(B, n);
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
                        zbeta = k == 0 ? beta : zone;
123
                        INSERT_TASK_zgemm(
124 125 126 127 128 129 130 131 132
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkm, A->mb,
                            alpha, A(k, m), ldak,  /* lda * X */
                                   B(n, k), ldbn,  /* ldb * Z */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
            }
133
            RUNTIME_data_flush( sequence, C(m, n) );
134
        }
135
        if (transA == ChamNoTrans) {
136
            for (k = 0; k < A->nt; k++) {
137
                RUNTIME_data_flush( sequence, A(m, k) );
138 139 140
            }
        } else {
            for (k = 0; k < A->mt; k++) {
141
                RUNTIME_data_flush( sequence, A(k, m) );
142 143
            }
        }
144
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
145
    RUNTIME_options_finalize(&options, chamctxt);
146
}