Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

pztpgqrt.c 3.11 KB
Newer Older
1
/**
2 3
 *
 * @file pztpgqrt.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2016 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8 9
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 * @copyright 2016-2018 KAUST. All rights reserved.
10
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
11
 ***
12
 *
13
 * @brief Chameleon computational routines
14
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
15
 * @version 1.0.0
16 17 18 19
 * @author Mathieu Faverge
 * @date 2016-12-15
 * @precisions normal z -> s d c
 *
20
 */
21 22
#include "control/common.h"

23 24 25 26
#define V2(m,n) V2,  m,  n
#define T2(m,n) T2,  m,  n
#define Q1(m,n) Q1,  m,  n
#define Q2(m,n) Q2,  m,  n
27
#define D(k)    D,   k,  k
28

29
/**
30
 *  Parallel tile QR factorization - dynamic scheduling
31
 */
32
void chameleon_pztpgqrt( int KT, int L,
33 34 35
                         CHAM_desc_t *V2, CHAM_desc_t *T2,
                         CHAM_desc_t *Q1, CHAM_desc_t *Q2,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
36
{
Mathieu Faverge's avatar
Mathieu Faverge committed
37
    CHAM_context_t *chamctxt;
Mathieu Faverge's avatar
Mathieu Faverge committed
38
    RUNTIME_option_t options;
39 40 41 42
    size_t ws_worker = 0;
    size_t ws_host = 0;

    int k, m, n;
Mathieu Faverge's avatar
Mathieu Faverge committed
43
    int ldvm, ldqk, ldqm;
44
    int tempkn, tempnn, tempmm, templm;
45
    int ib;
46 47

    /* Dimension of the first column */
48
    int maxm  = chameleon_max( Q2->m - L, 1 );
49
    int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1);
50 51
    int maxmtk;

Mathieu Faverge's avatar
Mathieu Faverge committed
52
    chamctxt = chameleon_context_self();
Mathieu Faverge's avatar
Mathieu Faverge committed
53
    if (sequence->status != CHAMELEON_SUCCESS) {
54
        return;
Mathieu Faverge's avatar
Mathieu Faverge committed
55
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
56
    RUNTIME_options_init(&options, chamctxt, sequence, request);
57

Mathieu Faverge's avatar
Mathieu Faverge committed
58
    ib = CHAMELEON_IB;
59

60
    /*
61
     * ztpmqrt = Q1->nb * ib
62
     */
63
    ws_worker = Q1->nb * ib;
64 65 66 67 68

    /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
    /* Worker space
     *
69
     * ztpmqrt = 2 * Q1->nb * ib
70
     */
71
    ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
72 73
#endif

Mathieu Faverge's avatar
Mathieu Faverge committed
74 75
    ws_worker *= sizeof(CHAMELEON_Complex64_t);
    ws_host   *= sizeof(CHAMELEON_Complex64_t);
76 77 78

    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );

79
    for (k = KT-1; k >= 0; k--) {
Mathieu Faverge's avatar
Mathieu Faverge committed
80
        RUNTIME_iteration_push(chamctxt, k);
81

82 83
        tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
        ldqk = BLKLDD(Q1, k);
84

85 86
        /* Equivalent to the tsmqr step on Q1,Q2 */
        maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
87
        for (m = maxmtk; m > -1; m--) {
88
            tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
89
            templm = ((L > 0) && (m == maxmtk)) ? tempmm : 0;
90 91 92 93 94
            ldvm = BLKLDD(V2, m);
            ldqm = BLKLDD(Q2, m);

            for (n = k; n < Q2->nt; n++) {
                tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
95
                /* TT kernel */
Mathieu Faverge's avatar
Mathieu Faverge committed
96
                INSERT_TASK_ztpmqrt(
97
                    &options,
Mathieu Faverge's avatar
Mathieu Faverge committed
98
                    ChamLeft, ChamNoTrans,
99 100 101 102 103 104 105
                    tempmm, tempnn, tempkn, templm, ib, T2->nb,
                    V2(m, k), ldvm,
                    T2(m, k), T2->mb,
                    Q1(k, n), ldqk,
                    Q2(m, n), ldqm );
            }
        }
106

Mathieu Faverge's avatar
Mathieu Faverge committed
107
        RUNTIME_iteration_pop(chamctxt);
108
    }
109

110
    RUNTIME_options_ws_free(&options);
Mathieu Faverge's avatar
Mathieu Faverge committed
111
    RUNTIME_options_finalize(&options, chamctxt);
112
}