zheevd.c 19.6 KB
Newer Older
1
/**
2 3
 *
 * @file zheevd.c
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
Mathieu Faverge's avatar
Mathieu Faverge committed
7
 * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
8
 *                      Univ. Bordeaux. All rights reserved.
9
 *
10
 ***
11
 *
12
 * @brief Chameleon zheevd wrappers
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15 16 17 18 19
 * @author Azzam Haidar
 * @author Hatem Ltaief
 * @date 2010-11-15
 * @precisions normal z -> s d c
 *
20
 */
21
#include "control/common.h"
22
#include <stdlib.h>
23
#include <string.h>
24
#if !defined(CHAMELEON_SIMULATION)
25
#include <coreblas/lapacke.h>
26
#endif
27

28 29
/**
 ********************************************************************************
30
 *
31
 * @ingroup CHAMELEON_Complex64_t
32
 *
33
 *  CHAMELEON_zheevd - Computes all eigenvalues and, optionally,
34 35 36 37 38 39 40 41 42 43
 *  eigenvectors of a complex Hermitian matrix A. The matrix A is
 *  preliminary reduced to tridiagonal form using a two-stage
 *  approach:
 *  First stage: reduction to band tridiagonal form;
 *  Second stage: reduction from band to tridiagonal form.
 *
 *******************************************************************************
 *
 * @param[in] jobz
 *          Intended usage:
44 45
 *          = ChamNoVec: computes eigenvalues only;
 *          = ChamVec: computes eigenvalues and eigenvectors.
46 47 48 49
 *
 * @param[in] uplo
 *          Specifies whether the matrix A is upper triangular or
 *          lower triangular:
50 51
 *          = ChamUpper: Upper triangle of A is stored;
 *          = ChamLower: Lower triangle of A is stored.
52 53 54 55 56 57
 *
 * @param[in] N
 *          The order of the matrix A. N >= 0.
 *
 * @param[in,out] A
 *          On entry, the symmetric (or Hermitian) matrix A.
58
 *          If uplo = ChamUpper, the leading N-by-N upper triangular
59 60 61
 *          part of A contains the upper triangular part of the matrix
 *          A, and the strictly lower triangular part of A is not
 *          referenced.
62
 *          If uplo = ChamLower, the leading N-by-N lower triangular
63 64 65
 *          part of A contains the lower triangular part of the matrix
 *          A, and the strictly upper triangular part of A is not
 *          referenced.
66 67
 *          On exit, the lower triangle (if uplo = ChamLower) or the
 *          upper triangle (if uplo = ChamUpper) of A, including the
68 69 70 71 72 73 74 75 76
 *          diagonal, is destroyed.
 *
 * @param[in] LDA
 *          The leading dimension of the array A. LDA >= max(1,N).
 *
 * @param[out] W
 *          On exit, if info = 0, the eigenvalues.
 *
 * @param[in, out] descT
77
 *          On entry, descriptor as return by CHAMELEON_Alloc_Workspace_zheevd
78 79 80 81
 *          On exit, contains auxiliary factorization data.
 *
 *******************************************************************************
 *
82 83 84
 * @retval CHAMELEON_SUCCESS successful exit
 * @retval <0 if -i, the i-th argument had an illegal value
 * @retval >0 if INFO = i, the algorithm failed to converge; i
85 86 87 88 89
 *               off-diagonal elements of an intermediate tridiagonal
 *               form did not converge to zero.
 *
 *******************************************************************************
 *
90 91 92 93 94
 * @sa CHAMELEON_zheevd_Tile
 * @sa CHAMELEON_zheevd_Tile_Async
 * @sa CHAMELEON_cheevd
 * @sa CHAMELEON_dsyev
 * @sa CHAMELEON_ssyev
95
 *
96
 */
97
int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N,
98 99 100
                      CHAMELEON_Complex64_t *A, int LDA,
                      double *W,
                      CHAM_desc_t *descT )
101 102 103
{
    int NB;
    int status;
Mathieu Faverge's avatar
Mathieu Faverge committed
104
    CHAM_context_t  *chamctxt;
105 106 107
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t   request = RUNTIME_REQUEST_INITIALIZER;
    CHAM_desc_t descAl, descAt;
108

Mathieu Faverge's avatar
Mathieu Faverge committed
109 110 111
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zheevd", "CHAMELEON not initialized");
112
        return CHAMELEON_ERR_NOT_INITIALIZED;
113 114 115
    }

    /* Check input arguments */
116
    if (jobz != ChamNoVec && jobz != ChamVec) {
Mathieu Faverge's avatar
Mathieu Faverge committed
117
        chameleon_error("CHAMELEON_zheevd", "illegal value of jobz");
118 119
        return -1;
    }
120
    if ((uplo != ChamLower) && (uplo != ChamUpper)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
121
        chameleon_error("CHAMELEON_zheevd", "illegal value of uplo");
122 123 124
        return -2;
    }
    if (N < 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
125
        chameleon_error("CHAMELEON_zheevd", "illegal value of N");
126 127
        return -3;
    }
128
    if (LDA < chameleon_max(1, N)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
129
        chameleon_error("CHAMELEON_zheevd", "illegal value of LDA");
130 131 132 133 134
        return -5;
    }

    /* Quick return */
    if (N == 0)
135
        return CHAMELEON_SUCCESS;
136 137

    /* Tune NB & IB depending on N; Set NBNB */
Mathieu Faverge's avatar
Mathieu Faverge committed
138
    status = chameleon_tune(CHAMELEON_FUNC_ZHEEVD, N, N, 0);
139
    if (status != CHAMELEON_SUCCESS) {
Mathieu Faverge's avatar
Mathieu Faverge committed
140
        chameleon_error("CHAMELEON_zheevd", "chameleon_tune() failed");
141 142 143 144
        return status;
    }

    /* Set NT */
145
    NB = CHAMELEON_NB;
146

Mathieu Faverge's avatar
Mathieu Faverge committed
147
    chameleon_sequence_create( chamctxt, &sequence );
148

149
    /* Submit the matrix conversion */
Mathieu Faverge's avatar
Mathieu Faverge committed
150
    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
151
                         A, NB, NB, LDA, N, N, N, sequence, &request );
152 153

    /* Call the tile interface */
154
    CHAMELEON_zheevd_Tile_Async( jobz, uplo, &descAt, W, descT, sequence, &request );
155

Mathieu Faverge's avatar
Mathieu Faverge committed
156
    /* Submit the matrix conversion back */
Mathieu Faverge's avatar
Mathieu Faverge committed
157
    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
158
                         ChamDescInout, uplo, sequence, &request );
Mathieu Faverge's avatar
Mathieu Faverge committed
159

Mathieu Faverge's avatar
Mathieu Faverge committed
160
    chameleon_sequence_wait( chamctxt, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
161

Mathieu Faverge's avatar
Mathieu Faverge committed
162
    /* Cleanup the temporary data */
Mathieu Faverge's avatar
Mathieu Faverge committed
163
    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
164 165

    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
166
    chameleon_sequence_destroy( chamctxt, sequence );
167 168
    return status;
}
169 170
/**
 ********************************************************************************
171
 *
172
 * @ingroup CHAMELEON_Complex64_t_Tile
173
 *
174
 *  CHAMELEON_zheevd_Tile - Computes all eigenvalues and, optionally, eigenvectors of a
175 176 177 178 179 180 181 182 183 184 185 186
 *  complex Hermitian matrix A using a two-stage approach:
 *  First stage: reduction to band tridiagonal form;
 *  Second stage: reduction from band to tridiagonal form.
 *
 *  Operates on matrices stored by tiles.
 *  All matrices are passed through descriptors.
 *  All dimensions are taken from the descriptors.
 *
 *******************************************************************************
 *
 * @param[in] jobz
 *          Intended usage:
187 188
 *          = ChamNoVec: computes eigenvalues only;
 *          = ChamVec: computes eigenvalues and eigenvectors.
189 190 191 192
 *
 * @param[in] uplo
 *          Specifies whether the matrix A is upper triangular or
 *          lower triangular:
193 194
 *          = ChamUpper: Upper triangle of A is stored;
 *          = ChamLower: Lower triangle of A is stored.
195 196 197
 *
 * @param[in,out] A
 *          On entry, the symmetric (or Hermitian) matrix A.
198
 *          If uplo = ChamUpper, the leading N-by-N upper triangular
199 200 201 202 203 204
 *          part of A contains the upper triangular part of the matrix
 *          A, and the strictly lower triangular part of A is not
 *          referenced.
 *          If UPLO = 'L', the leading N-by-N lower triangular part of
 *          A contains the lower triangular part of the matrix A, and
 *          the strictly upper triangular part of A is not referenced.
205
 *          On exit, if jobz = ChamVec, then if return value = 0, A
206
 *          contains the orthonormal eigenvectors of the matrix A.
207 208 209
 *          If jobz = ChamNoVec, then on exit the lower triangle (if
 *          uplo = ChamLower) or the upper triangle (if uplo =
 *          ChamUpper) of A, including the diagonal, is destroyed.*
210 211 212 213 214 215
 *
 * @param[out] W
 *          On exit, if info = 0, the eigenvalues.
 *
 * @param[in,out] T
 *          On entry, descriptor as return by
216
 *          CHAMELEON_Alloc_Workspace_zheevd
217 218 219 220
 *          On exit, contains auxiliary factorization data.
 *
 *******************************************************************************
 *
221 222 223
 * @retval CHAMELEON_SUCCESS successful exit
 * @retval <0 if -i, the i-th argument had an illegal value
 * @retval >0 if INFO = i, the algorithm failed to converge; i
224 225 226 227 228
 *               off-diagonal elements of an intermediate tridiagonal
 *               form did not converge to zero.
 *
 *******************************************************************************
 *
229 230 231 232 233
 * @sa CHAMELEON_zheevd_Tile
 * @sa CHAMELEON_zheevd_Tile_Async
 * @sa CHAMELEON_cheevd
 * @sa CHAMELEON_dsyev
 * @sa CHAMELEON_ssyev
234
 *
235
 */
236
int CHAMELEON_zheevd_Tile( cham_job_t jobz, cham_uplo_t uplo,
237
                           CHAM_desc_t *A, double *W, CHAM_desc_t *T )
238
{
Mathieu Faverge's avatar
Mathieu Faverge committed
239
    CHAM_context_t *chamctxt;
240 241
    RUNTIME_sequence_t *sequence = NULL;
    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
242 243
    int status;

Mathieu Faverge's avatar
Mathieu Faverge committed
244 245 246
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zheevd_Tile", "CHAMELEON not initialized");
247
        return CHAMELEON_ERR_NOT_INITIALIZED;
248
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
249
    chameleon_sequence_create( chamctxt, &sequence );
250

251
    CHAMELEON_zheevd_Tile_Async( jobz, uplo, A, W, T, sequence, &request );
252

253 254
    CHAMELEON_Desc_Flush( A, sequence );
    CHAMELEON_Desc_Flush( T, sequence );
Mathieu Faverge's avatar
Mathieu Faverge committed
255

Mathieu Faverge's avatar
Mathieu Faverge committed
256
    chameleon_sequence_wait( chamctxt, sequence );
257
    status = sequence->status;
Mathieu Faverge's avatar
Mathieu Faverge committed
258
    chameleon_sequence_destroy( chamctxt, sequence );
259 260 261
    return status;
}

262 263
/**
 ********************************************************************************
264
 *
265
 * @ingroup CHAMELEON_Complex64_t_Tile_Async
266
 *
267
 *  CHAMELEON_zheevd_Tile_Async - Computes all eigenvalues and,
268 269 270 271 272 273 274 275 276 277 278 279
 *  optionally, eigenvectors of a complex Hermitian matrix A using a
 *  two-stage approach:
 *  First stage: reduction to band tridiagonal form;
 *  Second stage: reduction from band to tridiagonal form.
 *
 *  May return before the computation is finished.
 *  Allows for pipelining of operations at runtime.
 *
 *******************************************************************************
 *
 * @param[in] jobz
 *          Intended usage:
280 281
 *          = ChamNoVec: computes eigenvalues only;
 *          = ChamVec: computes eigenvalues and eigenvectors.
282 283 284 285
 *
 * @param[in] uplo
 *          Specifies whether the matrix A is upper triangular or
 *          lower triangular:
286 287
 *          = ChamUpper: Upper triangle of A is stored;
 *          = ChamLower: Lower triangle of A is stored.
288 289 290
 *
 * @param[in,out] A
 *          On entry, the symmetric (or Hermitian) matrix A.
291
 *          If uplo = ChamUpper, the leading N-by-N upper triangular
292 293 294 295 296 297
 *          part of A contains the upper triangular part of the matrix
 *          A, and the strictly lower triangular part of A is not
 *          referenced.
 *          If UPLO = 'L', the leading N-by-N lower triangular part of
 *          A contains the lower triangular part of the matrix A, and
 *          the strictly upper triangular part of A is not referenced.
298
 *          On exit, if jobz = ChamVec, then if return value = 0, A
299
 *          contains the orthonormal eigenvectors of the matrix A.
300 301 302
 *          If jobz = ChamNoVec, then on exit the lower triangle (if
 *          uplo = ChamLower) or the upper triangle (if uplo =
 *          ChamUpper) of A, including the diagonal, is destroyed.*
303 304 305 306 307 308
 *
 * @param[out] W
 *          On exit, if info = 0, the eigenvalues.
 *
 * @param[in,out] T
 *          On entry, descriptor as return by
309
 *          CHAMELEON_Alloc_Workspace_zheevd
310 311 312 313 314 315 316 317 318 319 320
 *          On exit, contains auxiliary factorization data.
 *
 * @param[in] sequence
 *          Identifies the sequence of function calls that this call belongs to
 *          (for completion checks and exception handling purposes).
 *
 * @param[out] request
 *          Identifies this function call (for exception handling purposes).
 *
 *******************************************************************************
 *
321 322 323 324 325
 * @sa CHAMELEON_zheevd
 * @sa CHAMELEON_zheevd_Tile
 * @sa CHAMELEON_cheevd_Tile_Async
 * @sa CHAMELEON_dsyev_Tile_Async
 * @sa CHAMELEON_ssyev_Tile_Async
326
 *
327
 */
328
int CHAMELEON_zheevd_Tile_Async( cham_job_t jobz, cham_uplo_t uplo,
329 330
                                 CHAM_desc_t *A, double *W, CHAM_desc_t *T,
                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
331
{
Mathieu Faverge's avatar
Mathieu Faverge committed
332
    CHAM_context_t *chamctxt;
333 334 335 336
    CHAM_desc_t descA;
    CHAM_desc_t descT;
    CHAM_desc_t D, *Dptr = NULL;
    CHAMELEON_Complex64_t *Q2 = NULL;
337
    int N, NB, status;
338
    double *E;
339 340 341 342
    CHAMELEON_Complex64_t *V;
    CHAM_desc_t descQ2l, descQ2t;
    CHAM_desc_t descVl, descVt;
    CHAM_desc_t *subA, *subQ, *subT;
343

Mathieu Faverge's avatar
Mathieu Faverge committed
344 345 346
    chamctxt = chameleon_context_self();
    if (chamctxt == NULL) {
        chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "CHAMELEON not initialized");
347
        return CHAMELEON_ERR_NOT_INITIALIZED;
348 349
    }
    if (sequence == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
350
        chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "NULL sequence");
351
        return CHAMELEON_ERR_UNALLOCATED;
352 353
    }
    if (request == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
354
        chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "NULL request");
355
        return CHAMELEON_ERR_UNALLOCATED;
356 357
    }
    /* Check sequence status */
358 359
    if (sequence->status == CHAMELEON_SUCCESS) {
        request->status = CHAMELEON_SUCCESS;
Mathieu Faverge's avatar
Mathieu Faverge committed
360 361
    }
    else {
Mathieu Faverge's avatar
Mathieu Faverge committed
362
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
Mathieu Faverge's avatar
Mathieu Faverge committed
363
    }
364 365

    /* Check descriptors for correctness */
Mathieu Faverge's avatar
Mathieu Faverge committed
366 367 368
    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "invalid descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
369 370 371
    } else {
        descA = *A;
    }
Mathieu Faverge's avatar
Mathieu Faverge committed
372 373 374
    if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) {
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "invalid descriptor");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
375 376 377 378
    } else {
        descT = *T;
    }
    /* Check input arguments */
379
    if (jobz != ChamNoVec && jobz != ChamVec) {
Mathieu Faverge's avatar
Mathieu Faverge committed
380 381
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "illegal value of jobz");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
382
    }
383
    if ((uplo != ChamLower) && (uplo != ChamUpper)) {
Mathieu Faverge's avatar
Mathieu Faverge committed
384 385
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "illegal value of uplo");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
386 387
    }
    if (descA.m != descA.n) {
Mathieu Faverge's avatar
Mathieu Faverge committed
388 389
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "matrix need to be square");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
390 391
    }
    if (descA.nb != descA.mb) {
Mathieu Faverge's avatar
Mathieu Faverge committed
392 393
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "only square tiles supported");
        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
394 395
    }

396 397
    N  = descA.m;
    NB = descA.mb;
398 399 400 401

    /* Allocate data structures for reduction to tridiagonal form */
    E = malloc( (N - 1) * sizeof(double) );
    if (E == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
402
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "malloc(E) failed");
403
        free(E);
404
        return CHAMELEON_ERR_OUT_OF_RESOURCES;
405 406
    }

407
    if (jobz == ChamVec){
408
        /* Have to synchrone right now */
409
        Q2 = malloc( N * N * sizeof(CHAMELEON_Complex64_t));
410
        /* For bug in lapacke */
411
        memset( Q2, 0, N * N * sizeof(CHAMELEON_Complex64_t));
412 413
    }

414
    status = CHAMELEON_zhetrd_Tile_Async( jobz, uplo,
415 416 417
                                          A, W, E, T,
                                          Q2, N,
                                          sequence, request );
418
    if (status != 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
419
        chameleon_error("CHAMELEON_zheevd_Tile", "CHAMELEON_zhetrd failed");
420 421
    }

422
    if (jobz == ChamNoVec){
423
#if !defined(CHAMELEON_SIMULATION)
424 425
        /* Tridiagonal eigensolver */
        status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
Mathieu Faverge's avatar
Mathieu Faverge committed
426
                                 chameleon_lapack_const(jobz),
427 428 429
                                 N, W, E,
                                 NULL, N );
        if (status != 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
430
            chameleon_error("CHAMELEON_zheevd_Tile_Async", "LAPACKE_zstedc failed");
431
        }
432
#endif /* !defined(CHAMELEON_SIMULATION) */
433
        free(E);
434
        return CHAMELEON_SUCCESS;
435 436
    }

437
    V = malloc( N * N * sizeof(CHAMELEON_Complex64_t) );
438
    if (V == NULL) {
Mathieu Faverge's avatar
Mathieu Faverge committed
439
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "malloc(V) failed");
440
        free(V);
441
        return CHAMELEON_ERR_OUT_OF_RESOURCES;
442 443
    }
    /* For bug in lapacke */
444
    memset(V, 0, N * N * sizeof(CHAMELEON_Complex64_t));
445

446 447 448 449 450
    /*
     * Tridiagonal eigensolver
     * V contains the eigenvectors of the tridiagonal system on exit
     */
#if !defined(CHAMELEON_SIMULATION)
451
    status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
Mathieu Faverge's avatar
Mathieu Faverge committed
452
                             chameleon_lapack_const(ChamIvec),
453 454 455
                             N, W, E,
                             V, N );
    if (status != 0) {
Mathieu Faverge's avatar
Mathieu Faverge committed
456
        chameleon_error("CHAMELEON_zheevd_Tile_Async", "LAPACKE_zstedc failed");
457
    }
458
#endif /* !defined(CHAMELEON_SIMULATION) */
459 460

    /* Convert matrices in tile format */
461 462
    /* A/T from CHAMELEON_zhetrd   refer  to Q1 (tile   layout) */
    /* Q   from CHAMELEON_zhetrd   refers to Q2 (lapack layout) */
463 464
    /* V   from LAPACKE_zstedc refers to V  (lapack layout) */
    /* The final eigenvectors are (Q1 Q2 V) or (Q1^h Q2 V)  */
Mathieu Faverge's avatar
Mathieu Faverge committed
465
    chameleon_zlap2tile( chamctxt, &descQ2l, &descQ2t, ChamDescInput, ChamUpperLower,
466
                         Q2, NB, NB, N, N, N, N, sequence, request );
467

Mathieu Faverge's avatar
Mathieu Faverge committed
468
    chameleon_zlap2tile( chamctxt, &descVl, &descVt, ChamDescInput, ChamUpperLower,
469
                         V, NB, NB, N, N, N, N, sequence, request );
470

471
    if (uplo == ChamLower)
472
    {
473
#if defined(CHAMELEON_COPY_DIAG)
474 475
        {
            int n = chameleon_min(A->mt, A->nt) * A->nb;
Mathieu Faverge's avatar
Mathieu Faverge committed
476
            chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
477 478
            Dptr = &D;
        }
479
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
480 481 482
        subA = chameleon_desc_submatrix(&descA,   descA.mb,   0, descA.m  -descA.mb,   descA.n-descA.nb);
        subQ = chameleon_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n );
        subT = chameleon_desc_submatrix(&descT,   descT.mb,   0, descT.m  -descT.mb,   descT.n-descT.nb);
483 484

        /* Compute Q2 = Q1 * Q2 */
485 486 487
        chameleon_pzunmqr( 1, ChamLeft, ChamNoTrans,
                           subA, subQ, subT, Dptr,
                           sequence, request );
488 489

        /* Compute the final eigenvectors A = (Q1 * Q2) * V */
Mathieu Faverge's avatar
Mathieu Faverge committed
490
        chameleon_pzgemm( ChamNoTrans, ChamNoTrans,
491 492 493
                          1.0, &descQ2t, &descVt,
                          0.0, &descA,
                          sequence, request );
494 495 496

    }
    else {
497
#if defined(CHAMELEON_COPY_DIAG)
498 499
        {
            int m = chameleon_min(A->mt, A->nt) * A->mb;
Mathieu Faverge's avatar
Mathieu Faverge committed
500
            chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
501 502
            Dptr = &D;
        }
503
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
504 505 506
        subA = chameleon_desc_submatrix(&descA,   0,   descA.nb, descA.m  -descA.mb,   descA.n -descA.nb );
        subQ = chameleon_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n );
        subT = chameleon_desc_submatrix(&descT,   0,   descT.nb, descT.m  -descT.mb,   descT.n -descT.nb );
507 508

        /* Compute Q2 = Q1^h * Q2 */
509 510 511
        chameleon_pzunmlq( 1, ChamLeft, ChamConjTrans,
                           subA, subQ, subT, Dptr,
                           sequence, request );
512 513

        /* Compute the final eigenvectors A =  (Q1^h * Q2) * V */
Mathieu Faverge's avatar
Mathieu Faverge committed
514
        chameleon_pzgemm( ChamNoTrans, ChamNoTrans,
515 516 517
                          1.0, &descQ2t, &descVt,
                          0.0, &descA,
                          sequence, request );
518 519
    }

Mathieu Faverge's avatar
Mathieu Faverge committed
520
    chameleon_ztile2lap( chamctxt, &descQ2l, &descQ2t,
521
                         ChamDescInput, ChamUpperLower, sequence, request );
Mathieu Faverge's avatar
Mathieu Faverge committed
522
    chameleon_ztile2lap( chamctxt, &descVl, &descVt,
523
                         ChamDescInput, ChamUpperLower, sequence, request );
524

Mathieu Faverge's avatar
Mathieu Faverge committed
525
    chameleon_sequence_wait( chamctxt, sequence );
526

527
    /* Cleanup the temporary data */
Mathieu Faverge's avatar
Mathieu Faverge committed
528 529
    chameleon_ztile2lap_cleanup( chamctxt, &descQ2l, &descQ2t );
    chameleon_ztile2lap_cleanup( chamctxt, &descVl, &descVt );
530

531 532 533 534
    free(subA); free(subQ); free(subT);
    free(Q2);
    free(V);
    free(E);
535 536
    if ( Dptr != NULL ) {
        chameleon_desc_destroy( Dptr );
537 538
    }
    (void)D;
539
    return CHAMELEON_SUCCESS;
540
}