pzgemm.c 5.29 KB
Newer Older
1
/**
2 3
 *
 * @file pzgemm.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @brief Chameleon zgemm parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
Mathieu Faverge's avatar
Mathieu Faverge committed
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
23
 */
24
#include "control/common.h"
25 26 27 28

#define A(m, n) A,  m,  n
#define B(m, n) B,  m,  n
#define C(m, n) C,  m,  n
29
/**
30
 *  Parallel tile matrix-matrix multiplication - dynamic scheduling
31
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
32
void chameleon_pzgemm(cham_trans_t transA, cham_trans_t transB,
Mathieu Faverge's avatar
Mathieu Faverge committed
33 34 35
                         CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
                         CHAMELEON_Complex64_t beta,  CHAM_desc_t *C,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
36
{
Mathieu Faverge's avatar
Mathieu Faverge committed
37
    CHAM_context_t *chamctxt;
Mathieu Faverge's avatar
Mathieu Faverge committed
38
    RUNTIME_option_t options;
39 40 41 42 43

    int m, n, k;
    int ldam, ldak, ldbn, ldbk, ldcm;
    int tempmm, tempnn, tempkn, tempkm;

Mathieu Faverge's avatar
Mathieu Faverge committed
44 45
    CHAMELEON_Complex64_t zbeta;
    CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
46

Mathieu Faverge's avatar
Mathieu Faverge committed
47
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
48
    if (sequence->status != CHAMELEON_SUCCESS)
49
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
50
    RUNTIME_options_init(&options, chamctxt, sequence, request);
51 52 53 54 55 56 57

    for (m = 0; m < C->mt; m++) {
        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
        ldcm = BLKLDD(C, m);
        for (n = 0; n < C->nt; n++) {
            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
            /*
Mathieu Faverge's avatar
Mathieu Faverge committed
58
             *  A: ChamNoTrans / B: ChamNoTrans
59
             */
Mathieu Faverge's avatar
Mathieu Faverge committed
60
            if (transA == ChamNoTrans) {
61
                ldam = BLKLDD(A, m);
Mathieu Faverge's avatar
Mathieu Faverge committed
62
                if (transB == ChamNoTrans) {
63 64 65 66
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                        ldbk = BLKLDD(B, k);
                        zbeta = k == 0 ? beta : zone;
Mathieu Faverge's avatar
Mathieu Faverge committed
67
                        INSERT_TASK_zgemm(
68 69 70 71 72 73 74 75 76
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkn, A->mb,
                            alpha, A(m, k), ldam,  /* lda * Z */
                                   B(k, n), ldbk,  /* ldb * Y */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
                /*
Mathieu Faverge's avatar
Mathieu Faverge committed
77
                 *  A: ChamNoTrans / B: Cham[Conj]Trans
78 79 80 81 82 83
                 */
                else {
                    ldbn = BLKLDD(B, n);
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                        zbeta = k == 0 ? beta : zone;
Mathieu Faverge's avatar
Mathieu Faverge committed
84
                        INSERT_TASK_zgemm(
85 86 87 88 89 90 91 92 93 94
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkn, A->mb,
                            alpha, A(m, k), ldam,  /* lda * Z */
                                   B(n, k), ldbn,  /* ldb * Z */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
            }
            /*
Mathieu Faverge's avatar
Mathieu Faverge committed
95
             *  A: Cham[Conj]Trans / B: ChamNoTrans
96 97
             */
            else {
Mathieu Faverge's avatar
Mathieu Faverge committed
98
                if (transB == ChamNoTrans) {
99 100 101 102 103
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
                        ldbk = BLKLDD(B, k);
                        zbeta = k == 0 ? beta : zone;
Mathieu Faverge's avatar
Mathieu Faverge committed
104
                        INSERT_TASK_zgemm(
105 106 107 108 109 110 111 112 113
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkm, A->mb,
                            alpha, A(k, m), ldak,  /* lda * X */
                                   B(k, n), ldbk,  /* ldb * Y */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
                /*
Mathieu Faverge's avatar
Mathieu Faverge committed
114
                 *  A: Cham[Conj]Trans / B: Cham[Conj]Trans
115 116 117 118 119 120 121
                 */
                else {
                    ldbn = BLKLDD(B, n);
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
                        zbeta = k == 0 ? beta : zone;
Mathieu Faverge's avatar
Mathieu Faverge committed
122
                        INSERT_TASK_zgemm(
123 124 125 126 127 128 129 130 131
                            &options,
                            transA, transB,
                            tempmm, tempnn, tempkm, A->mb,
                            alpha, A(k, m), ldak,  /* lda * X */
                                   B(n, k), ldbn,  /* ldb * Z */
                            zbeta, C(m, n), ldcm); /* ldc * Y */
                    }
                }
            }
132
            RUNTIME_data_flush( sequence, C(m, n) );
133
        }
Mathieu Faverge's avatar
Mathieu Faverge committed
134
        if (transA == ChamNoTrans) {
135
            for (k = 0; k < A->nt; k++) {
136
                RUNTIME_data_flush( sequence, A(m, k) );
137 138 139
            }
        } else {
            for (k = 0; k < A->mt; k++) {
140
                RUNTIME_data_flush( sequence, A(k, m) );
141 142
            }
        }
143
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
144
    RUNTIME_options_finalize(&options, chamctxt);
145
}