zgels.c 16 KB
Newer Older
1
/**
2 3
 *
 * @file zgels.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zgels wrappers
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15
 * @comment This file has been automatically generated
16
 *          from Plasma 2.5.0 for CHAMELEON 1.0.0
17 18 19 20 21 22 23
 * @author Jakub Kurzak
 * @author Mathieu Faverge
 * @author Emmanuel Agullo
 * @author Cedric Castagnede
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
24
 */
25
#include "control/common.h"
Mathieu Faverge's avatar
Mathieu Faverge committed
26
#include <stdlib.h>
27

28
/**
29
 *
30
 * @ingroup CHAMELEON_Complex64_t
31
 *
32
 *  CHAMELEON_zgels - solves overdetermined or underdetermined linear systems involving an M-by-N
33 34 35
 *  matrix A using the QR or the LQ factorization of A.  It is assumed that A has full rank.
 *  The following options are provided:
 *
36
 *  # trans = ChamNoTrans and M >= N: find the least squares solution of an overdetermined
37 38
 *    system, i.e., solve the least squares problem: minimize || B - A*X ||.
 *
39
 *  # trans = ChamNoTrans and M < N:  find the minimum norm solution of an underdetermined
40 41 42 43 44 45 46 47 48 49
 *    system A * X = B.
 *
 *  Several right hand side vectors B and solution vectors X can be handled in a single call;
 *  they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS
 *  solution matrix X.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Intended usage:
50 51 52
 *          = ChamNoTrans:   the linear system involves A;
 *          = ChamConjTrans: the linear system involves A**H.
 *          Currently only ChamNoTrans is supported.
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
 *
 * @param[in] M
 *          The number of rows of the matrix A. M >= 0.
 *
 * @param[in] N
 *          The number of columns of the matrix A. N >= 0.
 *
 * @param[in] NRHS
 *          The number of right hand sides, i.e., the number of columns of the matrices B and X.
 *          NRHS >= 0.
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix A.
 *          On exit,
 *          if M >= N, A is overwritten by details of its QR factorization as returned by
68
 *                     CHAMELEON_zgeqrf;
69
 *          if M < N, A is overwritten by details of its LQ factorization as returned by
70
 *                      CHAMELEON_zgelqf.
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,M).
 *
 * @param[out] descT
 *          On exit, auxiliary factorization data.
 *
 * @param[in,out] B
 *          On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise;
 *          On exit, if return value = 0, B is overwritten by the solution vectors, stored
 *          columnwise:
 *          if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual
 *          sum of squares for the solution in each column is given by the sum of squares of the
 *          modulus of elements N+1 to M in that column;
 *          if M < N, rows 1 to N of B contain the minimum norm solution vectors;
 *
 * @param[in] LDB
 *          The leading dimension of the array B. LDB >= MAX(1,M,N).
 *
 *******************************************************************************
 *
92 93
 * @retval CHAMELEON_SUCCESS successful exit
 * @retval <0 if -i, the i-th argument had an illegal value
94 95 96
 *
 *******************************************************************************
 *
97 98 99 100 101
 * @sa CHAMELEON_zgels_Tile
 * @sa CHAMELEON_zgels_Tile_Async
 * @sa CHAMELEON_cgels
 * @sa CHAMELEON_dgels
 * @sa CHAMELEON_sgels
102
 *
103
 */
104 105 106 107
int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS,
                 CHAMELEON_Complex64_t *A, int LDA,
                 CHAM_desc_t *descT,
                 CHAMELEON_Complex64_t *B, int LDB )
108 109 110 111
{
    int i, j;
    int NB;
    int status;
Mathieu Faverge's avatar
Mathieu Faverge committed
112
    CHAM_context_t *chamctxt;
113 114 115 116
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
    CHAM_desc_t descAl, descAt;
    CHAM_desc_t descBl, descBt;
117

Mathieu Faverge's avatar
Mathieu Faverge committed
118 119 120
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels", "CHAMELEON not initialized");
121
        return CHAMELEON_ERR_NOT_INITIALIZED;
122 123 124
    }

    /* Check input arguments */
125
    if (trans != ChamNoTrans) {
Mathieu Faverge's avatar
Mathieu Faverge committed
126
        chameleon_error("CHAMELEON_zgels", "only ChamNoTrans supported");
127
        return CHAMELEON_ERR_NOT_SUPPORTED;
128 129
    }
    if (M < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
130
        chameleon_error("CHAMELEON_zgels", "illegal value of M");
131 132 133
        return -2;
    }
    if (N < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
134
        chameleon_error("CHAMELEON_zgels", "illegal value of N");
135 136 137
        return -3;
    }
    if (NRHS < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
138
        chameleon_error("CHAMELEON_zgels", "illegal value of NRHS");
139 140
        return -4;
    }
141
    if (LDA < chameleon_max(1, M)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
142
        chameleon_error("CHAMELEON_zgels", "illegal value of LDA");
143 144
        return -6;
    }
145
    if (LDB < chameleon_max(1, chameleon_max(M, N))) {
Mathieu Faverge's avatar
Mathieu Faverge committed
146
        chameleon_error("CHAMELEON_zgels", "illegal value of LDB");
147 148 149
        return -9;
    }
    /* Quick return */
150 151
    if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) {
        for (i = 0; i < chameleon_max(M, N); i++)
152 153
            for (j = 0; j < NRHS; j++)
                B[j*LDB+i] = 0.0;
154
        return CHAMELEON_SUCCESS;
155 156 157
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
Mathieu Faverge's avatar
Mathieu Faverge committed
158
    status = chameleon_tune(CHAMELEON_FUNC_ZGELS, M, N, NRHS);
159
    if (status != CHAMELEON_SUCCESS) {
Mathieu Faverge's avatar
Mathieu Faverge committed
160
        chameleon_error("CHAMELEON_zgels", "chameleon_tune() failed");
161 162 163 164
        return status;
    }

    /* Set NT */
165
    NB = CHAMELEON_NB;
166

Mathieu Faverge's avatar
Mathieu Faverge committed
167
    chameleon_sequence_create( chamctxt, &sequence );
168

169
    /* Submit the matrix conversion */
170
    if ( M >= N ) {
Mathieu Faverge's avatar
Mathieu Faverge committed
171
        chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
172
                         A, NB, NB, LDA, N, M, N, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
173
        chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
174
                         B, NB, NB, LDB, NRHS, M, NRHS, sequence, &request );
175
    } else {
176
        /* Submit the matrix conversion */
Mathieu Faverge's avatar
Mathieu Faverge committed
177
        chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
178
                         A, NB, NB, LDA, N, M, N, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
179
        chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower,
180
                         B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request );
181 182 183
    }

    /* Call the tile interface */
184
    CHAMELEON_zgels_Tile_Async( ChamNoTrans, &descAt, descT, &descBt, sequence, &request );
185

Mathieu Faverge's avatar
Mathieu Faverge committed
186
    /* Submit the matrix conversion back */
Mathieu Faverge's avatar
Mathieu Faverge committed
187
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
188
                     ChamDescInout, ChamUpperLower, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
189
    chameleon_ztile2lap( chamctxt, &descBl, &descBt,
190 191
                     ChamDescInout, ChamUpperLower, sequence, &request );
    CHAMELEON_Desc_Flush( descT, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
192

Mathieu Faverge's avatar
Mathieu Faverge committed
193
    chameleon_sequence_wait( chamctxt, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
194

Mathieu Faverge's avatar
Mathieu Faverge committed
195
    /* Cleanup the temporary data */
Mathieu Faverge's avatar
Mathieu Faverge committed
196 197
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
    chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt );
198

199
    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
200
    chameleon_sequence_destroy( chamctxt, sequence );
201 202 203
    return status;
}

204 205
/**
 ********************************************************************************
206
 *
207
 * @ingroup CHAMELEON_Complex64_t_Tile
208
 *
209
 *  CHAMELEON_zgels_Tile - Solves overdetermined or underdetermined linear system of equations
210
 *  using the tile QR or the tile LQ factorization.
211
 *  Tile equivalent of CHAMELEON_zgels().
212 213 214 215 216 217 218 219
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] trans
 *          Intended usage:
220 221 222
 *          = ChamNoTrans:   the linear system involves A;
 *          = ChamConjTrans: the linear system involves A**H.
 *          Currently only ChamNoTrans is supported.
223 224 225 226 227
 *
 * @param[in,out] A
 *          On entry, the M-by-N matrix A.
 *          On exit,
 *          if M >= N, A is overwritten by details of its QR factorization as returned by
228
 *                     CHAMELEON_zgeqrf;
229
 *          if M < N, A is overwritten by details of its LQ factorization as returned by
230
 *                      CHAMELEON_zgelqf.
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
 *
 * @param[out] T
 *          On exit, auxiliary factorization data.
 *
 * @param[in,out] B
 *          On entry, the M-by-NRHS matrix B of right hand side vectors, stored columnwise;
 *          On exit, if return value = 0, B is overwritten by the solution vectors, stored
 *          columnwise:
 *          if M >= N, rows 1 to N of B contain the least squares solution vectors; the residual
 *          sum of squares for the solution in each column is given by the sum of squares of the
 *          modulus of elements N+1 to M in that column;
 *          if M < N, rows 1 to N of B contain the minimum norm solution vectors;
 *
 *******************************************************************************
 *
246
 * @return CHAMELEON_SUCCESS successful exit
247 248 249
 *
 *******************************************************************************
 *
250 251 252 253 254
 * @sa CHAMELEON_zgels
 * @sa CHAMELEON_zgels_Tile_Async
 * @sa CHAMELEON_cgels_Tile
 * @sa CHAMELEON_dgels_Tile
 * @sa CHAMELEON_sgels_Tile
255
 *
256
 */
257 258
int CHAMELEON_zgels_Tile( cham_trans_t trans, CHAM_desc_t *A,
                      CHAM_desc_t *T, CHAM_desc_t *B )
259
{
Mathieu Faverge's avatar
Mathieu Faverge committed
260
    CHAM_context_t *chamctxt;
261 262
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
263 264
    int status;

Mathieu Faverge's avatar
Mathieu Faverge committed
265 266 267
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_Tile", "CHAMELEON not initialized");
268
        return CHAMELEON_ERR_NOT_INITIALIZED;
269
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
270
    chameleon_sequence_create( chamctxt, &sequence );
271

272
    CHAMELEON_zgels_Tile_Async( trans, A, T, B, sequence, &request );
273

274 275 276
    CHAMELEON_Desc_Flush( A, sequence );
    CHAMELEON_Desc_Flush( T, sequence );
    CHAMELEON_Desc_Flush( B, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
277

Mathieu Faverge's avatar
Mathieu Faverge committed
278
    chameleon_sequence_wait( chamctxt, sequence );
279
    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
280
    chameleon_sequence_destroy( chamctxt, sequence );
281 282 283
    return status;
}

284 285
/**
 ********************************************************************************
286
 *
287
 * @ingroup CHAMELEON_Complex64_t_Tile_Async
288
 *
289
 *  CHAMELEON_zgels_Tile_Async - Solves overdetermined or underdetermined linear
290
 *  system of equations using the tile QR or the tile LQ factorization.
291
 *  Non-blocking equivalent of CHAMELEON_zgels_Tile().
292 293 294 295 296 297 298 299 300 301 302 303 304 305
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
306 307 308 309 310
 * @sa CHAMELEON_zgels
 * @sa CHAMELEON_zgels_Tile
 * @sa CHAMELEON_cgels_Tile_Async
 * @sa CHAMELEON_dgels_Tile_Async
 * @sa CHAMELEON_sgels_Tile_Async
311
 *
312
 */
313 314 315
int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A,
                            CHAM_desc_t *T, CHAM_desc_t *B,
                            RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
316
{
317 318
    CHAM_desc_t *subA;
    CHAM_desc_t *subB;
Mathieu Faverge's avatar
Mathieu Faverge committed
319
    CHAM_context_t *chamctxt;
320
    CHAM_desc_t D, *Dptr = NULL;
321

Mathieu Faverge's avatar
Mathieu Faverge committed
322 323 324
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zgels_Tile", "CHAMELEON not initialized");
325
        return CHAMELEON_ERR_NOT_INITIALIZED;
326 327
    }
    if (sequence == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
328
        chameleon_fatal_error("CHAMELEON_zgels_Tile", "NULL sequence");
329
        return CHAMELEON_ERR_UNALLOCATED;
330 331
    }
    if (request == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
332
        chameleon_fatal_error("CHAMELEON_zgels_Tile", "NULL request");
333
        return CHAMELEON_ERR_UNALLOCATED;
334 335
    }
    /* Check sequence status */
336 337
    if (sequence->status == CHAMELEON_SUCCESS) {
        request->status = CHAMELEON_SUCCESS;
Mathieu Faverge's avatar
Mathieu Faverge committed
338 339
    }
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
340
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
Mathieu Faverge's avatar
Mathieu Faverge committed
341
    }
342 343

    /* Check descriptors for correctness */
Mathieu Faverge's avatar
Mathieu Faverge committed
344 345 346
    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_Tile", "invalid first descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
347
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
348 349 350
    if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_Tile", "invalid second descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
351
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
352 353 354
    if (chameleon_desc_check(B) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zgels_Tile", "invalid third descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
355 356 357
    }
    /* Check input arguments */
    if (A->nb != A->mb || B->nb != B->mb) {
Mathieu Faverge's avatar
Mathieu Faverge committed
358 359
        chameleon_error("CHAMELEON_zgels_Tile", "only square tiles supported");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
360
    }
361
    if (trans != ChamNoTrans) {
Mathieu Faverge's avatar
Mathieu Faverge committed
362 363
        chameleon_error("CHAMELEON_zgels_Tile", "only ChamNoTrans supported");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_NOT_SUPPORTED);
364 365
    }
    /* Quick return  - currently NOT equivalent to LAPACK's:
366 367 368 369
     if (chameleon_min(M, chameleon_min(N, NRHS)) == 0) {
     for (i = 0; i < chameleon_max(M, N); i++)
     for (j = 0; j < NRHS; j++)
     B[j*LDB+i] = 0.0;
370
     return CHAMELEON_SUCCESS;
371
     }
372 373
     */
    if (A->m >= A->n) {
374 375
#if defined(CHAMELEON_COPY_DIAG)
        {
376
            int n = chameleon_min(A->m, A->n);
Mathieu Faverge's avatar
Mathieu Faverge committed
377
            chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
378 379 380
            Dptr = &D;
        }
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
381
        if (chamctxt->householder == ChamFlatHouseholder) {
382
            chameleon_pzgeqrf( 1, A, T, Dptr, sequence, request );
383

384
            chameleon_pzunmqr( 0, ChamLeft, ChamConjTrans, A, B, T, Dptr, sequence, request );
385 386
        }
        else {
387
            chameleon_pzgeqrfrh( 1, CHAMELEON_RHBLK, A, T, Dptr, sequence, request );
388

389
            chameleon_pzunmqrrh( 0, CHAMELEON_RHBLK, ChamLeft, ChamConjTrans, A, B, T, Dptr, sequence, request );
390
        }
Mathieu Faverge's avatar
Mathieu Faverge committed
391 392 393
        subB = chameleon_desc_submatrix(B, 0, 0, A->n, B->n);
        subA = chameleon_desc_submatrix(A, 0, 0, A->n, A->n);
        chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );
394 395
    }
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
396 397
        /* subB = chameleon_desc_submatrix(B, A->m, 0, A->n-A->m, B->n);
         chameleon_pzlaset( ChamUpperLower, 0., 0., subB, sequence, request );
398
         free(subB); */
399 400
#if defined(CHAMELEON_COPY_DIAG)
        {
401
            int m = chameleon_min(A->m, A->n);
Mathieu Faverge's avatar
Mathieu Faverge committed
402
            chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
403 404 405
            Dptr = &D;
        }
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
406
        if (chamctxt->householder == ChamFlatHouseholder) {
407
            chameleon_pzgelqf( 1, A, T, Dptr, sequence, request );
408 409
        }
        else {
410
            chameleon_pzgelqfrh( 1, CHAMELEON_RHBLK, A, T, Dptr, sequence, request );
411
        }
Mathieu Faverge's avatar
Mathieu Faverge committed
412 413 414
        subB = chameleon_desc_submatrix(B, 0, 0, A->m, B->n);
        subA = chameleon_desc_submatrix(A, 0, 0, A->m, A->m);
        chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamNonUnit, 1.0, subA, subB, sequence, request );
415

Mathieu Faverge's avatar
Mathieu Faverge committed
416
        if (chamctxt->householder == ChamFlatHouseholder) {
417
            chameleon_pzunmlq( 0, ChamLeft, ChamConjTrans, A, B, T, Dptr, sequence, request );
418 419
        }
        else {
420
            chameleon_pzunmlqrh( 0, CHAMELEON_RHBLK, ChamLeft, ChamConjTrans, A, B, T, Dptr, sequence, request );
421 422
        }
    }
423 424 425 426 427

    free(subA);
    free(subB);

    if (Dptr != NULL) {
428 429 430 431
        CHAMELEON_Desc_Flush( A, sequence );
        CHAMELEON_Desc_Flush( T, sequence );
        CHAMELEON_Desc_Flush( B, sequence );
        CHAMELEON_Desc_Flush( Dptr, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
432
        chameleon_sequence_wait( chamctxt, sequence );
433
        chameleon_desc_destroy( Dptr );
434 435
    }
    (void)D;
436
    return CHAMELEON_SUCCESS;
437
}