pzherk.c 6.32 KB
Newer Older
1
/**
2 3
 *
 * @file pzherk.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zherk parallel algorithm
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> c
 *
23
 */
24
#include "control/common.h"
25 26 27

#define A(m,n) A,  m,  n
#define C(m,n) C,  m,  n
28
/**
29
 *  Parallel tile Hermitian rank-k update - dynamic scheduling
30
 */
Mathieu Faverge's avatar
Mathieu Faverge committed
31
void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
32 33 34
                         double alpha, CHAM_desc_t *A,
                         double beta,  CHAM_desc_t *C,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
35
{
Mathieu Faverge's avatar
Mathieu Faverge committed
36
    CHAM_context_t *chamctxt;
37
    RUNTIME_option_t options;
38 39 40 41 42

    int m, n, k;
    int ldak, ldam, ldan, ldcm, ldcn;
    int tempnn, tempmm, tempkn, tempkm;

43 44 45
    CHAMELEON_Complex64_t zone   = (CHAMELEON_Complex64_t)1.0;
    CHAMELEON_Complex64_t zalpha = (CHAMELEON_Complex64_t)alpha;
    CHAMELEON_Complex64_t zbeta;
46 47
    double dbeta;

Mathieu Faverge's avatar
Mathieu Faverge committed
48
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
49
    if (sequence->status != CHAMELEON_SUCCESS) {
50
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
51
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
52
    RUNTIME_options_init(&options, chamctxt, sequence, request);
53 54 55 56 57 58

    for (n = 0; n < C->nt; n++) {
        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
        ldan = BLKLDD(A, n);
        ldcn = BLKLDD(C, n);
        /*
59
         *  ChamNoTrans
60
         */
61
        if (trans == ChamNoTrans) {
62 63 64
            for (k = 0; k < A->nt; k++) {
                tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                dbeta = k == 0 ? beta : 1.0;
65
                INSERT_TASK_zherk(
66 67 68 69 70 71 72
                    &options,
                    uplo, trans,
                    tempnn, tempkn, A->mb,
                    alpha, A(n, k), ldan, /* ldan * K */
                    dbeta, C(n, n), ldcn); /* ldc  * N */
            }
            /*
73
             *  ChamNoTrans / ChamLower
74
             */
75
            if (uplo == ChamLower) {
76 77 78 79 80 81
                for (m = n+1; m < C->mt; m++) {
                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                    ldam = BLKLDD(A, m);
                    ldcm = BLKLDD(C, m);
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
82 83
                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                        INSERT_TASK_zgemm(
84
                            &options,
85
                            trans, ChamConjTrans,
86 87 88 89 90 91 92 93
                            tempmm, tempnn, tempkn, A->mb,
                            zalpha, A(m, k), ldam,  /* ldam * K */
                                    A(n, k), ldan,  /* ldan * K */
                            zbeta,  C(m, n), ldcm); /* ldc  * N */
                    }
                }
            }
            /*
94
             *  ChamNoTrans / ChamUpper
95 96 97 98 99 100 101
             */
            else {
                for (m = n+1; m < C->mt; m++) {
                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                    ldam = BLKLDD(A, m);
                    for (k = 0; k < A->nt; k++) {
                        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
102 103
                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                        INSERT_TASK_zgemm(
104
                            &options,
105
                            trans, ChamConjTrans,
106 107 108 109 110 111 112 113 114
                            tempnn, tempmm, tempkn, A->mb,
                            zalpha, A(n, k), ldan,  /* ldan * K */
                                    A(m, k), ldam,  /* ldam * M */
                            zbeta,  C(n, m), ldcn); /* ldc  * M */
                    }
                }
            }
        }
        /*
115
         *  Cham[Conj]Trans
116 117 118 119 120 121
         */
        else {
            for (k = 0; k < A->mt; k++) {
                tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                ldak = BLKLDD(A, k);
                dbeta = k == 0 ? beta : 1.0;
122
                INSERT_TASK_zherk(
123 124 125 126 127 128 129
                    &options,
                    uplo, trans,
                    tempnn, tempkm, A->mb,
                    alpha, A(k, n), ldak,  /* lda * N */
                    dbeta, C(n, n), ldcn); /* ldc * N */
            }
            /*
130
             *  Cham[Conj]Trans / ChamLower
131
             */
132
            if (uplo == ChamLower) {
133 134 135 136 137 138
                for (m = n+1; m < C->mt; m++) {
                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                    ldcm = BLKLDD(C, m);
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
139 140
                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                        INSERT_TASK_zgemm(
141
                            &options,
142
                            trans, ChamNoTrans,
143 144 145 146 147 148 149 150
                            tempmm, tempnn, tempkm, A->mb,
                            zalpha, A(k, m), ldak,  /* lda * M */
                                    A(k, n), ldak,  /* lda * N */
                            zbeta,  C(m, n), ldcm); /* ldc * N */
                    }
                }
            }
            /*
151
             *  Cham[Conj]Trans / ChamUpper
152 153 154 155 156 157 158
             */
            else {
                for (m = n+1; m < C->mt; m++) {
                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                    for (k = 0; k < A->mt; k++) {
                        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                        ldak = BLKLDD(A, k);
159 160
                        zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                        INSERT_TASK_zgemm(
161
                            &options,
162
                            trans, ChamNoTrans,
163 164 165 166 167 168 169 170 171
                            tempnn, tempmm, tempkm, A->mb,
                            zalpha, A(k, n), ldak,  /* lda * K */
                                    A(k, m), ldak,  /* lda * M */
                            zbeta,  C(n, m), ldcn); /* ldc * M */
                    }
                }
            }
        }
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
172
    RUNTIME_options_finalize(&options, chamctxt);
173
}