pzgelqfrh.c 5.73 KB
Newer Older
1 2
/**
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
3 4 5 6
 * @copyright (c) 2009-2014 The University of Tennessee and The University of
 *                          Tennessee Research Foundation.  All rights reserved.
 * @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                          Univ. Bordeaux. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 **/

/**
 *
 * @file pzgelqfrh.c
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.5.0
 * @comment This file has been automatically generated
 *          from Plasma 2.5.0 for MORSE 1.0.0
 * @author Jakub Kurzak
 * @author Hatem Ltaief
 * @author Dulceneia Becker
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
 **/
31
#include "control/common.h"
32

Mathieu Faverge's avatar
Mathieu Faverge committed
33 34
#define A(m,n)  A,  (m),  (n)
#define T(m,n)  T,  (m),  (n)
35
#define T2(m,n) T,  (m),  (n)+A->nt
36
#if defined(CHAMELEON_COPY_DIAG)
37
#define D(m,n) D, ((n)/BS), 0
38
#else
39
#define D(m,n) A,  (m),  (n)
40
#endif
41

Mathieu Faverge's avatar
Mathieu Faverge committed
42
/*
43
 *  Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
Mathieu Faverge's avatar
Mathieu Faverge committed
44
 */
45
void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS,
46 47 48 49 50 51 52 53
                     MORSE_sequence_t *sequence, MORSE_request_t *request)
{
    MORSE_context_t *morse;
    MORSE_option_t options;
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
Mathieu Faverge's avatar
Mathieu Faverge committed
54
    int K, N, RD;
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
    int ldak, ldam;
    int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
    int ib;

    morse = morse_context_self();
    if (sequence->status != MORSE_SUCCESS)
        return;
    RUNTIME_options_init(&options, morse, sequence, request);

    ib = MORSE_IB;

    /*
     * zgelqt = A->nb * (ib+1)
     * zunmlq = A->nb * ib
     * ztslqt = A->nb * (ib+1)
     * zttlqt = A->nb * (ib+1)
     * ztsmlq = A->nb * ib
     * zttmlq = A->nb * ib
     */
    ws_worker = A->nb * (ib+1);

    /* Allocation of temporary (scratch) working space */
Mathieu Faverge's avatar
Mathieu Faverge committed
77 78 79 80 81 82
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
     * zunmqr = A->nb * ib
     * ztsmqr = 2 * A->nb * ib
     */
83
    ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
Mathieu Faverge's avatar
Mathieu Faverge committed
84 85
#endif

86 87 88 89 90
    ws_worker *= sizeof(MORSE_Complex64_t);
    ws_host   *= sizeof(MORSE_Complex64_t);

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

Mathieu Faverge's avatar
Mathieu Faverge committed
91 92 93 94
    K = chameleon_min(A->mt, A->nt);

    /* The number of the factorization */
    for (k = 0; k < K; k++) {
95
        RUNTIME_iteration_push(morse, k);
96

97 98 99 100
        tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
        ldak = BLKLDD(A, k);
        for (N = k; N < A->nt; N += BS) {
            tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
101
            tempkmin = chameleon_min(tempkm, tempNn);
102 103 104 105 106
            MORSE_TASK_zgelqt(
                &options,
                tempkm, tempNn, ib, T->nb,
                A(k, N), ldak,
                T(k, N), T->mb);
Mathieu Faverge's avatar
Mathieu Faverge committed
107
            if ( k < (A->mt-1) ) {
108
#if defined(CHAMELEON_COPY_DIAG)
Mathieu Faverge's avatar
Mathieu Faverge committed
109 110 111 112
                MORSE_TASK_zlacpy(
                    &options,
                    MorseUpper, tempkm, tempNn, A->nb,
                    A(k, N), ldak,
113
                    D(k, N), ldak );
Mathieu Faverge's avatar
Mathieu Faverge committed
114
#if defined(CHAMELEON_USE_CUDA)
Mathieu Faverge's avatar
Mathieu Faverge committed
115 116 117 118
                MORSE_TASK_zlaset(
                    &options,
                    MorseLower, tempkm, tempNn,
                    0., 1.,
119
                    D(k, N), ldak );
120
#endif
121
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
122
            }
123 124 125 126 127 128 129
            for (m = k+1; m < A->mt; m++) {
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                ldam = BLKLDD(A, m);
                MORSE_TASK_zunmlq(
                    &options,
                    MorseRight, MorseConjTrans,
                    tempmm, tempNn, tempkmin, ib, T->nb,
130
                    D(k, N), ldak,
131 132 133
                    T(k, N), T->mb,
                    A(m, N), ldam);
            }
134
            for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) {
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                MORSE_TASK_ztslqt(
                    &options,
                    tempkm, tempnn, ib, T->nb,
                    A(k, N), ldak,
                    A(k, n), ldak,
                    T(k, n), T->mb);

                for (m = k+1; m < A->mt; m++) {
                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                    ldam = BLKLDD(A, m);
                    MORSE_TASK_ztsmlq(
                        &options,
                        MorseRight, MorseConjTrans,
                        tempmm, A->nb, tempmm, tempnn, tempkm, ib, T->nb,
                        A(m, N), ldam,
                        A(m, n), ldam,
                        A(k, n), ldak,
                        T(k, n), T->mb);
                }
            }
        }
        for (RD = BS; RD < A->nt-k; RD *= 2) {
            for (N = k; N+RD < A->nt; N += 2*RD) {
                tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
                MORSE_TASK_zttlqt(
                    &options,
                    tempkm, tempNRDn, ib, T->nb,
                    A (k, N   ), ldak,
                    A (k, N+RD), ldak,
                    T2(k, N+RD), T->mb);

                for (m = k+1; m < A->mt; m++) {
                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                    ldam   = BLKLDD(A, m );
                    MORSE_TASK_zttmlq(
                        &options,
                        MorseRight, MorseConjTrans,
                        tempmm, A->nb, tempmm, tempNRDn, tempkm, ib, T->nb,
                        A (m, N   ), ldam,
                        A (m, N+RD), ldam,
                        A (k, N+RD), ldak,
                        T2(k, N+RD), T->mb);
                }
            }
        }
181
        RUNTIME_iteration_pop(morse);
182
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
183

184 185
    RUNTIME_options_ws_free(&options);
    RUNTIME_options_finalize(&options, morse);
Mathieu Faverge's avatar
Mathieu Faverge committed
186
    (void)D;
187
}