pzunglq_param.c 4.84 KB
Newer Older
1
/**
2 3
 *
 * @file pzunglq_param.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11 12 13 14 15 16
 *
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
17
 * @version 1.0.0
18
 * @author Mathieu Faverge
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
19 20
 * @author Raphael Boucherie
 * @date 2017-05-17
21 22 23 24
 * @precisions normal z -> s d c
 *
 **/
#include "control/common.h"
Mathieu Faverge's avatar
Mathieu Faverge committed
25
#include <stdlib.h>
26

27 28 29
#define A(m,n) A, (m), (n)
#define Q(m,n) Q, (m), (n)
#define T(m,n) T, (m), (n)
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
30
#define D(m,n) D, (m), (n)
31 32 33 34 35

/**
 *  Parallel construction of Q using tile V - dynamic scheduling
 */
void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
36 37
                         MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                         MORSE_sequence_t *sequence, MORSE_request_t *request)
38 39 40
{
    MORSE_context_t *morse;
    MORSE_option_t options;
41
    MORSE_desc_t *T;
42 43 44 45
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n, i, p;
46
    int K, L;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
47
    int ldak, ldqm;
48 49 50 51 52 53 54 55 56 57 58
    int tempkm, tempkmin, temppn, tempnn, tempmm;
    int ib;
    int *tiles;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

59 60 61 62
    if (D == NULL) {
        D = A;
    }

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    /*
     * zunmqr = A->nb * ib
     * ztsmqr = A->nb * ib
     * zttmqr = A->nb * ib
     */
    ws_worker = A->nb * ib;

#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
     * zunmqr = A->nb * ib
     * ztsmqr = 2 * A->nb * ib
     */
    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif

    /* Initialisation of tiles */

81
    tiles = (int*)calloc( qrtree->mt, sizeof(int));
82 83 84 85 86 87

    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
88 89
    K = chameleon_min(A->mt, A->nt);

90 91 92 93 94 95 96
    for (k = K-1; k >= 0; k--) {
        RUNTIME_iteration_push(morse, k);

        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
        ldak = BLKLDD(A, k);

        /* Setting the order of the tiles*/
97
        libhqr_walk_stepk(qrtree, k, tiles + (k+1));
98

99
        for (i = A->nt-1; i > k; i--) {
100 101 102 103 104 105
            n = tiles[i];
            p = qrtree->currpiv(qrtree, k, n);

            tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;

            if(qrtree->gettype(qrtree, k, n) == 0){
106
                /* TS kernel */
107 108
                L = 0;
                T = TS;
109 110
            }
            else {
111
                /* TT kernel */
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
                L = tempnn;
                T = TT;
            }
            for (m = k; m < Q->mt; m++) {
                tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
                ldqm = BLKLDD(Q, m);

                RUNTIME_data_migrate( sequence, Q(m, p),
                                      Q->get_rankof( Q, m, n ) );
                RUNTIME_data_migrate( sequence, Q(m, n),
                                      Q->get_rankof( Q, m, n ) );

                MORSE_TASK_ztpmlqt(
                    &options,
                    MorseRight, MorseNoTrans,
                    tempmm, tempnn, tempkm, L, ib, T->nb,
                    A(k, n), ldak,
                    T(k, n), T->mb,
                    Q(m, p), ldqm,
                    Q(m, n), ldqm);
132 133
            }
        }
134 135

        T = TS;
136 137 138
        for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
            p = qrtree->getm(qrtree, k, i);

BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
139
            temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
140 141 142 143 144
            tempkmin = chameleon_min(tempkm, temppn);

#if defined(CHAMELEON_COPY_DIAG)
            MORSE_TASK_zlacpy(
                &options,
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
145
                MorseUpper, tempkmin, temppn, A->nb,
146 147 148 149 150 151 152 153 154 155 156 157
                A(k, p), ldak,
                D(k, p), ldak );
#if defined(CHAMELEON_USE_CUDA)
            MORSE_TASK_zlaset(
                &options,
                MorseLower, tempkmin, temppn,
                0., 1.,
                D(k, p), ldak );
#endif
#endif
            for (m = k; m < Q->mt; m++) {
                tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
158
                ldqm = BLKLDD(Q, m);
159 160 161 162

                RUNTIME_data_migrate( sequence, Q(m, p),
                                      Q->get_rankof( Q, m, p ) );

163 164 165
                MORSE_TASK_zunmlq(
                    &options,
                    MorseRight, MorseNoTrans,
166 167 168 169
                    tempmm, temppn, tempkmin, ib, T->nb,
                    D(k, p), ldak,
                    T(k, p), T->mb,
                    Q(m, p), ldqm);
170 171 172 173
            }
        }
        RUNTIME_iteration_pop(morse);
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
174 175

    free(tiles);
176 177 178
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
}