-
Mathieu Faverge authoredMathieu Faverge authored
zheevd.c 20.00 KiB
/**
*
* @file zheevd.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zheevd wrappers
*
* @version 1.2.0
* @author Azzam Haidar
* @author Hatem Ltaief
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2022-02-22
* @precisions normal z -> s d c
*
*/
#include "control/common.h"
#include <stdlib.h>
#include <string.h>
#if !defined(CHAMELEON_SIMULATION)
#include <coreblas/lapacke.h>
#endif
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t
*
* CHAMELEON_zheevd - Computes all eigenvalues and, optionally,
* eigenvectors of a complex Hermitian matrix A. The matrix A is
* preliminary reduced to tridiagonal form using a two-stage
* approach:
* First stage: reduction to band tridiagonal form;
* Second stage: reduction from band to tridiagonal form.
*
*******************************************************************************
*
* @param[in] jobz
* Intended usage:
* = ChamNoVec: computes eigenvalues only;
* = ChamVec: computes eigenvalues and eigenvectors.
*
* @param[in] uplo
* Specifies whether the matrix A is upper triangular or
* lower triangular:
* = ChamUpper: Upper triangle of A is stored;
* = ChamLower: Lower triangle of A is stored.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[in,out] A
* On entry, the symmetric (or Hermitian) matrix A.
* If uplo = ChamUpper, the leading N-by-N upper triangular
* part of A contains the upper triangular part of the matrix
* A, and the strictly lower triangular part of A is not
* referenced.
* If uplo = ChamLower, the leading N-by-N lower triangular
* part of A contains the lower triangular part of the matrix
* A, and the strictly upper triangular part of A is not
* referenced.
* On exit, the lower triangle (if uplo = ChamLower) or the
* upper triangle (if uplo = ChamUpper) of A, including the
* diagonal, is destroyed.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[out] W
* On exit, if info = 0, the eigenvalues.
*
* @param[in, out] descT
* On entry, descriptor as return by CHAMELEON_Alloc_Workspace_zheevd
* On exit, contains auxiliary factorization data.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
* @retval >0 if INFO = i, the algorithm failed to converge; i
* off-diagonal elements of an intermediate tridiagonal
* form did not converge to zero.
*
*******************************************************************************
*
* @sa CHAMELEON_zheevd_Tile
* @sa CHAMELEON_zheevd_Tile_Async
* @sa CHAMELEON_cheevd
* @sa CHAMELEON_dsyev
* @sa CHAMELEON_ssyev
*
*/
int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N,
CHAMELEON_Complex64_t *A, int LDA,
double *W,
CHAM_desc_t *descT )
{
int NB;
int status;
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
CHAM_desc_t descAl, descAt;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zheevd", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (jobz != ChamNoVec && jobz != ChamVec) {
chameleon_error("CHAMELEON_zheevd", "illegal value of jobz");
return -1;
}
if ((uplo != ChamLower) && (uplo != ChamUpper)) {
chameleon_error("CHAMELEON_zheevd", "illegal value of uplo");
return -2;
}
if (N < 0) {
chameleon_error("CHAMELEON_zheevd", "illegal value of N");
return -3;
}
if (LDA < chameleon_max(1, N)) {
chameleon_error("CHAMELEON_zheevd", "illegal value of LDA");
return -5;
}
/* Quick return */
if (N == 0)
return CHAMELEON_SUCCESS;
/* Tune NB & IB depending on N; Set NBNB */
status = chameleon_tune(CHAMELEON_FUNC_ZHEEVD, N, N, 0);
if (status != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zheevd", "chameleon_tune() failed");
return status;
}
/* Set NT */
NB = CHAMELEON_NB;
chameleon_sequence_create( chamctxt, &sequence );
/* Submit the matrix conversion */
chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo,
A, NB, NB, LDA, N, N, N, sequence, &request );
/* Call the tile interface */
CHAMELEON_zheevd_Tile_Async( jobz, uplo, &descAt, W, descT, sequence, &request );
/* Submit the matrix conversion back */
chameleon_ztile2lap( chamctxt, &descAl, &descAt,
ChamDescInout, uplo, sequence, &request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile
*
* CHAMELEON_zheevd_Tile - Computes all eigenvalues and, optionally, eigenvectors of a
* complex Hermitian matrix A using a two-stage approach:
* First stage: reduction to band tridiagonal form;
* Second stage: reduction from band to tridiagonal form.
*
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in] jobz
* Intended usage:
* = ChamNoVec: computes eigenvalues only;
* = ChamVec: computes eigenvalues and eigenvectors.
*
* @param[in] uplo
* Specifies whether the matrix A is upper triangular or
* lower triangular:
* = ChamUpper: Upper triangle of A is stored;
* = ChamLower: Lower triangle of A is stored.
*
* @param[in,out] A
* On entry, the symmetric (or Hermitian) matrix A.
* If uplo = ChamUpper, the leading N-by-N upper triangular
* part of A contains the upper triangular part of the matrix
* A, and the strictly lower triangular part of A is not
* referenced.
* If UPLO = 'L', the leading N-by-N lower triangular part of
* A contains the lower triangular part of the matrix A, and
* the strictly upper triangular part of A is not referenced.
* On exit, if jobz = ChamVec, then if return value = 0, A
* contains the orthonormal eigenvectors of the matrix A.
* If jobz = ChamNoVec, then on exit the lower triangle (if
* uplo = ChamLower) or the upper triangle (if uplo =
* ChamUpper) of A, including the diagonal, is destroyed.*
*
* @param[out] W
* On exit, if info = 0, the eigenvalues.
*
* @param[in,out] T
* On entry, descriptor as return by
* CHAMELEON_Alloc_Workspace_zheevd
* On exit, contains auxiliary factorization data.
*
*******************************************************************************
*
* @retval CHAMELEON_SUCCESS successful exit
* @retval <0 if -i, the i-th argument had an illegal value
* @retval >0 if INFO = i, the algorithm failed to converge; i
* off-diagonal elements of an intermediate tridiagonal
* form did not converge to zero.
*
*******************************************************************************
*
* @sa CHAMELEON_zheevd_Tile
* @sa CHAMELEON_zheevd_Tile_Async
* @sa CHAMELEON_cheevd
* @sa CHAMELEON_dsyev
* @sa CHAMELEON_ssyev
*
*/
int CHAMELEON_zheevd_Tile( cham_job_t jobz, cham_uplo_t uplo,
CHAM_desc_t *A, double *W, CHAM_desc_t *T )
{
CHAM_context_t *chamctxt;
RUNTIME_sequence_t *sequence = NULL;
RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
int status;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zheevd_Tile", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
chameleon_sequence_create( chamctxt, &sequence );
CHAMELEON_zheevd_Tile_Async( jobz, uplo, A, W, T, sequence, &request );
CHAMELEON_Desc_Flush( A, sequence );
CHAMELEON_Desc_Flush( T, sequence );
chameleon_sequence_wait( chamctxt, sequence );
status = sequence->status;
chameleon_sequence_destroy( chamctxt, sequence );
return status;
}
/**
********************************************************************************
*
* @ingroup CHAMELEON_Complex64_t_Tile_Async
*
* CHAMELEON_zheevd_Tile_Async - Computes all eigenvalues and,
* optionally, eigenvectors of a complex Hermitian matrix A using a
* two-stage approach:
* First stage: reduction to band tridiagonal form;
* Second stage: reduction from band to tridiagonal form.
*
* May return before the computation is finished.
* Allows for pipelining of operations at runtime.
*
*******************************************************************************
*
* @param[in] jobz
* Intended usage:
* = ChamNoVec: computes eigenvalues only;
* = ChamVec: computes eigenvalues and eigenvectors.
*
* @param[in] uplo
* Specifies whether the matrix A is upper triangular or
* lower triangular:
* = ChamUpper: Upper triangle of A is stored;
* = ChamLower: Lower triangle of A is stored.
*
* @param[in,out] A
* On entry, the symmetric (or Hermitian) matrix A.
* If uplo = ChamUpper, the leading N-by-N upper triangular
* part of A contains the upper triangular part of the matrix
* A, and the strictly lower triangular part of A is not
* referenced.
* If UPLO = 'L', the leading N-by-N lower triangular part of
* A contains the lower triangular part of the matrix A, and
* the strictly upper triangular part of A is not referenced.
* On exit, if jobz = ChamVec, then if return value = 0, A
* contains the orthonormal eigenvectors of the matrix A.
* If jobz = ChamNoVec, then on exit the lower triangle (if
* uplo = ChamLower) or the upper triangle (if uplo =
* ChamUpper) of A, including the diagonal, is destroyed.*
*
* @param[out] W
* On exit, if info = 0, the eigenvalues.
*
* @param[in,out] T
* On entry, descriptor as return by
* CHAMELEON_Alloc_Workspace_zheevd
* On exit, contains auxiliary factorization data.
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa CHAMELEON_zheevd
* @sa CHAMELEON_zheevd_Tile
* @sa CHAMELEON_cheevd_Tile_Async
* @sa CHAMELEON_dsyev_Tile_Async
* @sa CHAMELEON_ssyev_Tile_Async
*
*/
int CHAMELEON_zheevd_Tile_Async( cham_job_t jobz, cham_uplo_t uplo,
CHAM_desc_t *A, double *W, CHAM_desc_t *T,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
CHAM_desc_t descA;
CHAM_desc_t descT;
CHAM_desc_t D, *Dptr = NULL;
CHAMELEON_Complex64_t *Q2 = NULL;
int N, NB, status;
double *E;
CHAMELEON_Complex64_t *V;
CHAM_desc_t descQ2l, descQ2t;
CHAM_desc_t descVl, descVt;
CHAM_desc_t *subA, *subQ, *subT;
void *gemm_ws;
chamctxt = chameleon_context_self();
if (chamctxt == NULL) {
chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "NULL sequence");
return CHAMELEON_ERR_UNALLOCATED;
}
if (request == NULL) {
chameleon_fatal_error("CHAMELEON_zheevd_Tile_Async", "NULL request");
return CHAMELEON_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == CHAMELEON_SUCCESS) {
request->status = CHAMELEON_SUCCESS;
}
else {
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
}
/* Check descriptors for correctness */
if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "invalid descriptor");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
} else {
descA = *A;
}
if (chameleon_desc_check(T) != CHAMELEON_SUCCESS) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "invalid descriptor");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
} else {
descT = *T;
}
/* Check input arguments */
if (jobz != ChamNoVec && jobz != ChamVec) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "illegal value of jobz");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
if ((uplo != ChamLower) && (uplo != ChamUpper)) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "illegal value of uplo");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
if (descA.m != descA.n) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "matrix need to be square");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
if (descA.nb != descA.mb) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "only square tiles supported");
return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
}
N = descA.m;
NB = descA.mb;
/* Allocate data structures for reduction to tridiagonal form */
E = malloc( (N - 1) * sizeof(double) );
if (E == NULL) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "malloc(E) failed");
free(E);
return CHAMELEON_ERR_OUT_OF_RESOURCES;
}
if (jobz == ChamVec){
/* Have to synchrone right now */
Q2 = malloc( N * N * sizeof(CHAMELEON_Complex64_t));
/* For bug in lapacke */
memset( Q2, 0, N * N * sizeof(CHAMELEON_Complex64_t));
}
status = CHAMELEON_zhetrd_Tile_Async( jobz, uplo,
A, W, E, T,
Q2, N,
sequence, request );
if (status != 0) {
chameleon_error("CHAMELEON_zheevd_Tile", "CHAMELEON_zhetrd failed");
}
if (jobz == ChamNoVec){
#if !defined(CHAMELEON_SIMULATION)
/* Tridiagonal eigensolver */
status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
chameleon_lapack_const(jobz),
N, W, E,
NULL, N );
if (status != 0) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "LAPACKE_zstedc failed");
}
#endif /* !defined(CHAMELEON_SIMULATION) */
free(E);
return CHAMELEON_SUCCESS;
}
V = malloc( N * N * sizeof(CHAMELEON_Complex64_t) );
if (V == NULL) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "malloc(V) failed");
free(E);
free(Q2);
free(V);
return CHAMELEON_ERR_OUT_OF_RESOURCES;
}
/* For bug in lapacke */
memset(V, 0, N * N * sizeof(CHAMELEON_Complex64_t));
/*
* Tridiagonal eigensolver
* V contains the eigenvectors of the tridiagonal system on exit
*/
#if !defined(CHAMELEON_SIMULATION)
status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
chameleon_lapack_const(ChamIvec),
N, W, E,
V, N );
if (status != 0) {
chameleon_error("CHAMELEON_zheevd_Tile_Async", "LAPACKE_zstedc failed");
}
#endif /* !defined(CHAMELEON_SIMULATION) */
/* Convert matrices in tile format */
/* A/T from CHAMELEON_zhetrd refer to Q1 (tile layout) */
/* Q from CHAMELEON_zhetrd refers to Q2 (lapack layout) */
/* V from LAPACKE_zstedc refers to V (lapack layout) */
/* The final eigenvectors are (Q1 Q2 V) or (Q1^h Q2 V) */
chameleon_zlap2tile( chamctxt, &descQ2l, &descQ2t, ChamDescInput, ChamUpperLower,
Q2, NB, NB, N, N, N, N, sequence, request );
chameleon_zlap2tile( chamctxt, &descVl, &descVt, ChamDescInput, ChamUpperLower,
V, NB, NB, N, N, N, N, sequence, request );
/* Workspaces used for gemm */
gemm_ws = CHAMELEON_zgemm_WS_Alloc( ChamNoTrans, ChamNoTrans,
&descQ2t, &descVt,
&descA );
if (uplo == ChamLower)
{
#if defined(CHAMELEON_COPY_DIAG)
{
int n = chameleon_min(A->mt, A->nt) * A->nb;
chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, CHAMELEON_zgemm_WS_Free( gemm_ws ) );
Dptr = &D;
}
#endif
subA = chameleon_desc_submatrix(&descA, descA.mb, 0, descA.m -descA.mb, descA.n-descA.nb);
subQ = chameleon_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n );
subT = chameleon_desc_submatrix(&descT, descT.mb, 0, descT.m -descT.mb, descT.n-descT.nb);
/* Compute Q2 = Q1 * Q2 */
chameleon_pzunmqr( 1, ChamLeft, ChamNoTrans,
subA, subQ, subT, Dptr,
sequence, request );
/* Compute the final eigenvectors A = (Q1 * Q2) * V */
chameleon_pzgemm( gemm_ws, ChamNoTrans, ChamNoTrans,
1.0, &descQ2t, &descVt,
0.0, &descA,
sequence, request );
}
else {
#if defined(CHAMELEON_COPY_DIAG)
{
int m = chameleon_min(A->mt, A->nt) * A->mb;
chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
Dptr = &D;
}
#endif
subA = chameleon_desc_submatrix(&descA, 0, descA.nb, descA.m -descA.mb, descA.n -descA.nb );
subQ = chameleon_desc_submatrix(&descQ2t, descQ2t.mb, 0, descQ2t.m-descQ2t.mb, descQ2t.n );
subT = chameleon_desc_submatrix(&descT, 0, descT.nb, descT.m -descT.mb, descT.n -descT.nb );
/* Compute Q2 = Q1^h * Q2 */
chameleon_pzunmlq( 1, ChamLeft, ChamConjTrans,
subA, subQ, subT, Dptr,
sequence, request );
/* Compute the final eigenvectors A = (Q1^h * Q2) * V */
chameleon_pzgemm( gemm_ws, ChamNoTrans, ChamNoTrans,
1.0, &descQ2t, &descVt,
0.0, &descA,
sequence, request );
}
chameleon_ztile2lap( chamctxt, &descQ2l, &descQ2t,
ChamDescInput, ChamUpperLower, sequence, request );
chameleon_ztile2lap( chamctxt, &descVl, &descVt,
ChamDescInput, ChamUpperLower, sequence, request );
chameleon_sequence_wait( chamctxt, sequence );
/* Cleanup the temporary data */
CHAMELEON_zgemm_WS_Free( gemm_ws );
chameleon_ztile2lap_cleanup( chamctxt, &descQ2l, &descQ2t );
chameleon_ztile2lap_cleanup( chamctxt, &descVl, &descVt );
free(subA); free(subQ); free(subT);
free(Q2);
free(V);
free(E);
if ( Dptr != NULL ) {
chameleon_desc_destroy( Dptr );
}
(void)D;
return CHAMELEON_SUCCESS;
}