diff --git a/compute/pzgebrd_ge2gb.c b/compute/pzgebrd_ge2gb.c index 2e9b9c5e926646c0aaeefc6fc2fbd8c1582bcca8..c3210313faf36919827e489fc1cdee3a3750d35d 100644 --- a/compute/pzgebrd_ge2gb.c +++ b/compute/pzgebrd_ge2gb.c @@ -20,85 +20,85 @@ */ #include "control/common.h" -void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_desc_t D, +void morse_pzgebrd_ge2gb(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request) { int k; int tempkm, tempkn; - if (A.m >= A.n){ - for (k = 0; k < A.nt; k++) { - tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; - tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; + MORSE_desc_t *A1, *A2, *T1, *D1 = NULL; - morse_pzgeqrf( - morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), - morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), - morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), - sequence, request); + if (A->m >= A->n){ + for (k = 0; k < A->nt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - morse_pzunmqr( - MorseLeft, - MorseConjTrans, - morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn), - morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb), - morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), - morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn), - sequence, request); + A1 = morse_desc_submatrix(A, k*A->mb, k*A->nb, A->m-k*A->mb, tempkn); + A2 = morse_desc_submatrix(A, k*A->mb, (k+1)*A->nb, A->m-k*A->mb, A->n-(k+1)*A->nb); + T1 = morse_desc_submatrix(T, k*T->mb, k*T->nb, T->m-k*T->mb, tempkn); + if ( D != NULL ) { + D1 = morse_desc_submatrix(D, k*D->mb, k*D->nb, D->m-k*D->mb, tempkn); + } + + morse_pzgeqrf( A1, T1, D1, + sequence, request); + + morse_pzunmqr( MorseLeft, MorseConjTrans, + A1, A2, T1, D1, + sequence, request); - if (k+1 < A.nt){ - tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb; + if (k+1 < A->nt){ + tempkn = k+1 == A->nt-1 ? A->n-(k+1)*A->nb : A->nb; - morse_pzgelqf( - morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), - morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), - morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), - sequence, request); + A1 = morse_desc_submatrix(A, k*A->mb, (k+1)*A->nb, tempkm, A->n-(k+1)*A->nb); + A2 = morse_desc_submatrix(A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb); + T1 = morse_desc_submatrix(T, k*T->mb, (k+1)*T->nb, tempkm, T->n-(k+1)*T->nb); + if ( D != NULL ) { + D1 = morse_desc_submatrix(D, k*D->mb, (k+1)*D->nb, tempkm, D->n-(k+1)*D->nb); + } - morse_pzunmlq( - MorseRight, MorseConjTrans, - morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb), - morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), - morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), - morse_desc_submatrix(&D, k*T.mb, (k+1)*T.nb, T.mb, T.n-(k+1)*T.nb), - sequence, request); + morse_pzgelqf( A1, T1, D1, + sequence, request); + + morse_pzunmlq( MorseRight, MorseConjTrans, + A1, A2, T1, D1, + sequence, request); } } } else{ - for (k = 0; k < A.mt; k++) { - tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; - tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; + for (k = 0; k < A->mt; k++) { + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + A1 = morse_desc_submatrix(A, k*A->mb, k*A->nb, tempkm, A->n-k*A->nb); + A2 = morse_desc_submatrix(A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, A->n-k*A->nb); + T1 = morse_desc_submatrix(T, k*T->mb, k*T->nb, tempkm, T->n-k*T->nb); + if ( D != NULL ) { + D1 = morse_desc_submatrix(D, k*D->mb, k*D->nb, tempkm, D->n-k*D->nb); + } + morse_pzgelqf( A1, T1, D1, + sequence, request); - morse_pzgelqf( - morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), - morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), - morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), - sequence, request); + morse_pzunmlq( MorseRight, MorseConjTrans, + A1, A2, T1, D1, + sequence, request); - morse_pzunmlq( - MorseRight, MorseConjTrans, - morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb), - morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb), - morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), - morse_desc_submatrix(&D, k*T.mb, k*T.nb, T.mb, T.n-k*T.nb), - sequence, request); + if (k+1 < A->mt){ + tempkm = k+1 == A->mt-1 ? A->m-(k+1)*A->mb : A->mb; - if (k+1 < A.mt){ - tempkm = k+1 == A.mt-1 ? A.m-(k+1)*A.mb : A.mb; + A1 = morse_desc_submatrix(A, (k+1)*A->mb, k*A->nb, A->m-(k+1)*A->mb, tempkn); + A2 = morse_desc_submatrix(A, (k+1)*A->mb, (k+1)*A->nb, A->m-(k+1)*A->mb, A->n-(k+1)*A->nb); + T1 = morse_desc_submatrix(T, (k+1)*T->mb, k*T->nb, T->m-(k+1)*T->mb, tempkn); + if ( D != NULL ) { + D1 = morse_desc_submatrix(D, (k+1)*D->mb, k*D->nb, D->m-(k+1)*D->mb, tempkn); + } - morse_pzgeqrf( - morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), - morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), - morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), - sequence, request); + morse_pzgeqrf( A1, T1, D1, + sequence, request); - morse_pzunmqr( - MorseLeft, MorseConjTrans, - morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn), - morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb), - morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), - morse_desc_submatrix(&D, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn), - sequence, request); + morse_pzunmqr( MorseLeft, MorseConjTrans, + A1, A2, T1, D1, + sequence, request); } } } diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c index 6bfbdb1e7174b033749065d3f7e90dec52ad45ee..ead29ceb00afd91f91b3875ae66c2131bb871bf3 100644 --- a/compute/pzunmlq_param.c +++ b/compute/pzunmlq_param.c @@ -299,7 +299,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, p = qrtree->currpiv(qrtree, k, n); tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - ldbp = BLKLDD(B, p); if(qrtree->gettype(qrtree, k, n) == 0){ /* TS kernel */ @@ -393,7 +392,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb; tempkmin = chameleon_min(tempkm, temppn); - ldbp = BLKLDD(B, p); #if defined(CHAMELEON_COPY_DIAG) MORSE_TASK_zlacpy( @@ -433,7 +431,6 @@ void morse_pzunmlq_param(const libhqr_tree_t *qrtree, p = qrtree->currpiv(qrtree, k, n); tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; - ldbp = BLKLDD(B, p); if(qrtree->gettype(qrtree, k, n) == 0){ /* TS kernel */ diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c index 55c2b74104127457e7977e11da0a14fcceeaa7a1..04b3f6d21343fff2f186dc2e3ea143a7e7c99f14 100644 --- a/compute/pzunmqr_param.c +++ b/compute/pzunmqr_param.c @@ -300,9 +300,8 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); - ldbp = BLKLDD(B, p); - if(qrtree->gettype(qrtree, k, n) == 0){ + if( qrtree->gettype(qrtree, k, n) == 0 ) { /* TS kernel */ L = 0; T = TS; @@ -312,6 +311,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, L = tempmm; T = TT; } + for (m = 0; m < B->mt; m++) { tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; ldbm = BLKLDD(B, m); @@ -324,7 +324,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_TASK_ztpmqrt( &options, side, trans, - tempmm, tempnn, tempkn, L, ib, T->nb, + tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb, A(n, k), ldan, T(n, k), T->mb, B(m, p), ldbm, @@ -432,16 +432,15 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; ldan = BLKLDD(A, n); - ldbp = BLKLDD(B, p); - if(qrtree->gettype(qrtree, k, n) == 0){ + if( qrtree->gettype(qrtree, k, n) == 0 ) { /* TS kernel */ L = 0; T = TS; } else { /* TT kernel */ - L = tempmm; + L = A->mb; T = TT; } @@ -457,7 +456,7 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_TASK_ztpmqrt( &options, side, trans, - tempmm, tempnn, tempkn, L, ib, T->nb, + tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb, A(n, k), ldan, T(n, k), T->mb, B(m, p), ldbm, diff --git a/compute/zgesvd.c b/compute/zgesvd.c index d4a249b8a080aa15d5a832ecfb74fecdb867e60f..134ece6b23d501dfc4ee4e5af6d8b6660be51516 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -467,6 +467,7 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, NB = descA.mb; LDAB = NB + 1; uplo = M >= N ? MorseUpper : MorseLower; + #if defined(CHAMELEON_COPY_DIAG) { morse_zdesc_alloc(D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, ); @@ -474,7 +475,7 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, } #endif /* Reduction to band */ - morse_pzgebrd_ge2gb( descA, descT, D, + morse_pzgebrd_ge2gb( descA, descT, Dptr, sequence, request ); /* Allocate band structure */ @@ -498,6 +499,7 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, } memset(E, 0, MINMN * sizeof(double) ); +#if !defined(CHAMELEON_SIMULATION) /* NCC = 0, C = NULL, we do not update any matrix with new singular vectors */ /* On exit, AB = U (S +~ E) VT */ if (uplo == MorseUpper){ @@ -535,7 +537,6 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, morse_sequence_wait( morse, sequence ); -#if !defined(CHAMELEON_SIMULATION) info = LAPACKE_zgbbrd( LAPACK_COL_MAJOR, gbbrd_vect, M, N, @@ -548,7 +549,10 @@ int MORSE_zgesvd_Tile_Async( MORSE_enum jobu, MORSE_enum jobvt, if (info != 0) { fprintf(stderr, "MORSE_zgesvd_Tile_Async: LAPACKE_zgbbrd = %d\n", info ); } +#else + morse_sequence_wait( morse, sequence ); #endif /* !defined(CHAMELEON_SIMULATION) */ + morse_desc_mat_free( &descAB ); /* Transform U and Vt into tile format */ diff --git a/compute/zheevd.c b/compute/zheevd.c index c4af46e6ced9cf1cc660f12f8b08e52b2d6c3f5e..68768489e6f10dac7b3e5ceb9eeb2d146b19187e 100644 --- a/compute/zheevd.c +++ b/compute/zheevd.c @@ -525,7 +525,6 @@ int MORSE_zheevd_Tile_Async( MORSE_enum jobz, MORSE_enum uplo, /* Cleanup the temporary data */ morse_desc_mat_free( &descV ); free(V); - free(E); if (Dptr != NULL) { morse_desc_mat_free( Dptr ); diff --git a/control/compute_z.h b/control/compute_z.h index 2f8e8492a0e19534341f1d1991f9bd7a009e12e3..d855a32e884090447823b2e7eaec2e498a390a52 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -65,7 +65,7 @@ void morse_pzbarrier_row2tl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_r void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgebrd_gb2bd(MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_desc_t D, MORSE_sequence_t *sequence, MORSE_request_t *request); +void morse_pzgebrd_ge2gb(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *D, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzgemm(MORSE_enum transA, MORSE_enum transB, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);