diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c index e6a9dd7aae8e09548b32c836563ef1cc26bea0ce..5b0672d031d537dcfaf402f655720a86e0a194e4 100644 --- a/compute/pztpgqrt.c +++ b/compute/pztpgqrt.c @@ -24,30 +24,42 @@ **/ #include "control/common.h" -#define V(m,n) V, m, n -#define T(m,n) T, m, n -#define A(m,n) A, m, n -#define B(m,n) B, m, n +#define V1(m,n) V1, m, n +#define T1(m,n) T1, m, n +#define V2(m,n) V2, m, n +#define T2(m,n) T2, m, n +#define Q1(m,n) Q1, m, n +#define Q2(m,n) Q2, m, n +#if defined(CHAMELEON_COPY_DIAG) +#define DIAG(k) DIAG, k, 0 +#else +#define DIAG(k) V1, k, k +#endif /***************************************************************************//** * Parallel tile QR factorization - dynamic scheduling **/ -void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, +void morse_pztpgqrt( int L, + MORSE_desc_t *V1, MORSE_desc_t *T1, + MORSE_desc_t *V2, MORSE_desc_t *T2, + MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; MORSE_option_t options; size_t ws_worker = 0; size_t ws_host = 0; + MORSE_desc_t *DIAG = NULL; int k, m, n; - int ldak, ldvm, ldbm; - int tempkn, tempnn, tempmm, templm; - int ib; + int ldvk, ldvm; + int ldqk, ldqm; + int tempkm, tempkn, tempkk, tempnn, tempmm, templm; + int ib, minMT; /* Dimension of the first column */ - int maxm = B->m - L; - int maxmt = (maxm % B->mb == 0) ? (maxm / B->mb) : (maxm / B->mb + 1); + int maxm = Q2->m - L; + int maxmt = (maxm % Q2->mb == 0) ? (maxm / Q2->mb) : (maxm / Q2->mb + 1); int maxmtk; morse = morse_context_self(); @@ -57,18 +69,24 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M ib = MORSE_IB; + if (V1->m > V1->n) { + minMT = V1->nt; + } else { + minMT = V1->mt; + } + /* - * ztpmqrt = A->nb * ib + * ztpmqrt = Q1->nb * ib */ - ws_worker = A->nb * ib; + ws_worker = Q1->nb * ib; /* Allocation of temporary (scratch) working space */ #if defined(CHAMELEON_USE_CUDA) /* Worker space * - * ztpmqrt = 2 * A->nb * ib + * ztpmqrt = 2 * Q1->nb * ib */ - ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); + ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 ); #endif ws_worker *= sizeof(MORSE_Complex64_t); @@ -76,31 +94,91 @@ void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, M RUNTIME_options_ws_alloc( &options, ws_worker, ws_host ); - for (k = V->nt-1; k >= 0; k--) { - tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; - ldak = BLKLDD(A, k); +#if defined(CHAMELEON_COPY_DIAG) + /* necessary to avoid dependencies between tasks regarding the diag tile */ + DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t)); + morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q); +#endif + + for (k = V1->nt-1; k >= 0; k--) { + tempkm = k == V1->mt-1 ? V1->m-k*V1->mb : V1->mb; + tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb; + tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb; + ldvk = BLKLDD(V1, k); + ldqk = BLKLDD(Q1, k); - maxmtk = chameleon_min( B->mt, maxmt+k ) - 1; + /* Equivalent to the tsmqr step on Q1,Q2 */ + maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1; for (m = maxmtk; m > -1; m--) { - tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb; - templm = m == maxmtk ? tempmm : 0; - ldvm = BLKLDD(V, m); - ldbm = BLKLDD(B, m); + tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb; + templm = m == maxmtk ? tempmm : 0; + ldvm = BLKLDD(V2, m); + ldqm = BLKLDD(Q2, m); + + for (n = k; n < Q2->nt; n++) { + tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb; + MORSE_TASK_ztpmqrt( + &options, + MorseLeft, MorseNoTrans, + tempmm, tempnn, tempkn, templm, ib, T2->nb, + V2(m, k), ldvm, + T2(m, k), T2->mb, + Q1(k, n), ldqk, + Q2(m, n), ldqm ); + } + } - for (n = k; n < B->nt; n++) { - tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb; + for (m = Q1->mt - 1; m > k; m--) { + tempmm = m == Q1->mt-1 ? Q1->m-m*Q1->mb : Q1->mb; + ldvm = BLKLDD(V1, m); + ldqm = BLKLDD(Q1, m); + for (n = k; n < Q1->nt; n++) { + tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb; MORSE_TASK_ztpmqrt( &options, - MorseLeft, MorseConjTrans, - tempmm, tempnn, tempkn, templm, ib, T->nb, - V(m, k), ldvm, - T(m, k), T->mb, - A(k, n), ldak, - B(m, n), ldbm ); + MorseLeft, MorseNoTrans, + tempmm, tempnn, tempkn, 0, ib, T1->nb, + V1(m, k), ldvm, + T1(m, k), T1->mb, + Q1(k, n), ldqk, + Q1(m, n), ldqm ); } } + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_TASK_zlacpy( + &options, + MorseLower, tempkm, tempkk, V1->nb, + V1(k, k), ldvk, + DIAG(k), ldvk ); +#if defined(CHAMELEON_USE_CUDA) + MORSE_TASK_zlaset( + &options, + MorseUpper, tempkm, tempkk, + 0., 1., + DIAG(k), ldvk ); +#endif +#endif + for (n = k; n < Q1->nt; n++) { + tempnn = n == Q1->nt-1 ? Q1->n-n*Q1->nb : Q1->nb; + MORSE_TASK_zunmqr( + &options, + MorseLeft, MorseNoTrans, + tempkm, tempnn, tempkk, ib, T1->nb, + DIAG(k), ldvk, + T1(k, k), T1->mb, + Q1(k, n), ldqk); + } } + RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, morse); MORSE_TASK_dataflush_all(); + +#if defined(CHAMELEON_COPY_DIAG) + MORSE_Sequence_Wait(sequence); + morse_desc_mat_free(DIAG); + free(DIAG); +#endif + (void)DIAG; } diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 6a81d5cd53ea8a830da38ac54e66fa43c74dbdb1..1cdab39d879bfe7d19355e55a7e60e33a8af37fe 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -37,68 +37,85 @@ ******************************************************************************* * * @param[in] M - * The number of rows of the matrices B, and V. M >= 0. + * The number of rows of the matrices Q2, and V2. M >= 0. * * @param[in] N - * The number of columns of the matrices B, and A. N >= 0. + * The number of columns of the matrices Q1, and Q1. N >= 0. * * @param[in] K * The number of elementary reflectors whose product defines * the matrix Q in the matrix V. + * The number of rows of the matrices V1, and Q1. + * The number of columns of the matrices V1, and V2. * * @param[in] L - * The number of rows of the upper trapezoidal part of V. + * The number of rows of the upper trapezoidal part of V2. * MIN(M,N) >= L >= 0. See Further Details. * - * @param[in] V + * @param[in] V1 * The i-th row must contain the vector which defines the * elementary reflector H(i), for i = 1,2,...,k, as returned by - * MORSE_ztpqrt() in the first k rows of its array argument V. - * V is matrx of size M-by-K. The first M-L rows + * MORSE_ztpqrt(). + * V1 is a matrix of size K-by-K. + * + * @param[in] LDV1 + * The leading dimension of the array V1. LDV1 >= max(1,K). + * + * @param[int] descT1 + * The auxiliary factorization data generated by the call to + * MORSE_zgeqrf() on V1. + * + * @param[in] V2 + * The i-th row must contain the vector which defines the + * elementary reflector H(i), for i = 1,2,...,k, as returned by + * MORSE_ztpqrt() in the first k rows of its array argument V2. + * V2 is a matrix of size M-by-K. The first M-L rows * are rectangular, and the last L rows are upper trapezoidal. * - * @param[in] LDV - * The leading dimension of the array V. LDV >= max(1,K). + * @param[in] LDV2 + * The leading dimension of the array V2. LDV2 >= max(1,M). * - * @param[int] descT - * On exit, auxiliary factorization data, required by MORSE_zgeqrs to - * solve the system of equations, or by any function to apply the Q. + * @param[int] descT2 + * The auxiliary factorization data, generated by the call to + * MORSE_ztpqrt() on V1 and V2, and associated to V2. * - * @param[in,out] A - * A is COMPLEX*16 array, dimension (LDA,N) - * On entry, the K-by-N matrix A. - * On exit, A is overwritten by the corresponding block of - * Q*A. See Further Details. + * @param[in,out] Q1 + * Q1 is COMPLEX*16 array, dimension (LDQ1,N) + * On entry, the K-by-N matrix Q1. + * On exit, Q1 is overwritten by the corresponding block of + * Q*Q1. See Further Details. * - * @param[in] LDA - * The leading dimension of the array A. LDA >= max(1,K). + * @param[in] LDQ1 + * The leading dimension of the array Q1. LDQ1 >= max(1,K). * - * @param[in,out] B - * On entry, the pentagonal M-by-N matrix B. - * On exit, B contains Q. + * @param[in,out] Q2 + * On entry, the pentagonal M-by-N matrix Q2. + * On exit, Q2 contains Q. * - * @param[in] LDB - * The leading dimension of the array B. LDB >= max(1,M). + * @param[in] LDQ2 + * The leading dimension of the array Q2. LDQ2 >= max(1,M). * * @par Further Details: * ===================== * * The input matrix Q is a (K+M)-by-N matrix * - * Q = [ A ] - * [ B ] + * Q = [ Q1 ] + * [ Q2 ] * - * where A is an identity matrix, and B is a M-by-N matrix of 0. - * V a matrix of householder reflectors with a pentagonal shape consisting of a - * (M-L)-by-K rectangular matrix V1 on top of a L-by-N - * Upper trapezoidal matrix V2: + * where Q1 is an identity matrix, and Q2 is a M-by-N matrix of 0. + * V is a matrix of householder reflectors with a pentagonal shape consisting + * of a K-by-K rectangular matrix V1 on top of a matrix V2 composed of + * (M-L)-by-K rectangular part on top of a L-by-N upper trapezoidal matrix: * - * V = [ V1 ] <- (M-L)-by-N rectangular - * [ V2 ] <- L-by-N upper trapezoidal. + * V = [ V1 ] <- K-by-K rectangular + * [ V2a ] <- (M-L)-by-K rectangular + * [ V2b ] <- L-by-K upper trapezoidal. * - * The upper trapezoidal matrix V2 consists of the first L rows of a - * K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, - * V is rectangular M-by-K; if M=L=K, V is upper triangular. + * The upper trapezoidal part of the matrix V2 consists of the first L rows of + * a K-by-K upper triangular matrix, where 0 <= L <= MIN(M,K). If L=0, V2 is + * rectangular M-by-K; if M=L=K, V2 is upper triangular. Those are the two + * cases only handled for now. * ******************************************************************************* * @@ -117,17 +134,17 @@ * ******************************************************************************/ int MORSE_ztpgqrt( int M, int N, int K, int L, - MORSE_Complex64_t *V, int LDV, - MORSE_desc_t *descT, - MORSE_Complex64_t *A, int LDA, - MORSE_Complex64_t *B, int LDB ) + MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1, + MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2, + MORSE_Complex64_t *Q1, int LDQ1, + MORSE_Complex64_t *Q2, int LDQ2 ) { int NB; int status; MORSE_context_t *morse; MORSE_sequence_t *sequence = NULL; MORSE_request_t request = MORSE_REQUEST_INITIALIZER; - MORSE_desc_t descA, descB, descV; + MORSE_desc_t descQ1, descQ2, descV1, descV2; int minMK = chameleon_min( M, K ); morse = morse_context_self(); @@ -153,18 +170,26 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, morse_error("MORSE_ztpgqrt", "illegal value of N"); return -4; } - if (LDV < chameleon_max(1, M)) { - morse_error("MORSE_ztpgqrt", "illegal value of LDV"); + if (K != N) { + morse_error("MORSE_ztpgqrt", "illegal value of K and N. K must be equal to N"); + return -3; + } + if (LDV1 < chameleon_max(1, K)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDV1"); return -6; } - if (LDA < chameleon_max(1, K)) { - morse_error("MORSE_ztpgqrt", "illegal value of LDA"); + if (LDV2 < chameleon_max(1, M)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDV2"); return -9; } - if (LDB < chameleon_max(1, M)) { - morse_error("MORSE_ztpgqrt", "illegal value of LDB"); + if (LDQ1 < chameleon_max(1, K)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDQ1"); return -11; } + if (LDQ2 < chameleon_max(1, M)) { + morse_error("MORSE_ztpgqrt", "illegal value of LDQ2"); + return -13; + } /* Quick return */ if (minMK == 0) @@ -183,34 +208,41 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, morse_sequence_create(morse, &sequence); /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ - morse_zooplap2tile( descV, V, NB, NB, LDB, K, 0, 0, M, K, sequence, &request, - morse_desc_mat_free(&(descV)) ); - morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, sequence, &request, - (morse_desc_mat_free(&(descV)), - morse_desc_mat_free(&(descA))) ); - morse_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, sequence, &request, - (morse_desc_mat_free(&(descV)), - morse_desc_mat_free(&(descA)), - morse_desc_mat_free(&(descB))) ); + morse_zooplap2tile( descV1, V1, NB, NB, LDV1, K, 0, 0, M, K, sequence, &request, + morse_desc_mat_free(&(descV1)) ); + morse_zooplap2tile( descV2, V2, NB, NB, LDV2, K, 0, 0, M, K, sequence, &request, + (morse_desc_mat_free(&(descV1)), + morse_desc_mat_free(&(descV2))) ); + morse_zooplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, K, N, sequence, &request, + (morse_desc_mat_free(&(descV1)), + morse_desc_mat_free(&(descV2)), + morse_desc_mat_free(&(descQ1))) ); + morse_zooplap2tile( descQ2, Q2, NB, NB, LDQ2, N, 0, 0, M, N, sequence, &request, + (morse_desc_mat_free(&(descV1)), + morse_desc_mat_free(&(descV2)), + morse_desc_mat_free(&(descQ1)), + morse_desc_mat_free(&(descQ2))) ); /* } else {*/ -/* morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N,*/ +/* morse_ziplap2tile( descQ1, Q1, NB, NB, LDQ1, N, 0, 0, M, N,*/ /* sequence, &request);*/ /* }*/ /* Call the tile interface */ - MORSE_ztpgqrt_Tile_Async(L, &descV, descT, &descA, &descB, sequence, &request); + MORSE_ztpgqrt_Tile_Async(L, &descV1, descT1, &descV2, descT2, &descQ1, &descQ2, sequence, &request); /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) {*/ - morse_zooptile2lap(descA, A, NB, NB, LDA, N, sequence, &request); - morse_zooptile2lap(descB, B, NB, NB, LDB, N, sequence, &request); + morse_zooptile2lap(descQ1, Q1, NB, NB, LDQ1, N, sequence, &request); + morse_zooptile2lap(descQ2, Q2, NB, NB, LDQ2, N, sequence, &request); morse_sequence_wait(morse, sequence); - morse_desc_mat_free(&descV); - morse_desc_mat_free(&descA); - morse_desc_mat_free(&descB); + morse_desc_mat_free(&descV1); + morse_desc_mat_free(&descV2); + morse_desc_mat_free(&descQ1); + morse_desc_mat_free(&descQ2); /* } else {*/ -/* morse_ziptile2lap( descV, V, NB, NB, LDV, K, sequence, &request);*/ -/* morse_ziptile2lap( descA, A, NB, NB, LDA, N, sequence, &request);*/ -/* morse_ziptile2lap( descB, B, NB, NB, LDB, N, sequence, &request);*/ +/* morse_ziptile2lap( descV1, V1, NB, NB, LDV1, K, sequence, &request);*/ +/* morse_ziptile2lap( descV2, V2, NB, NB, LDV2, K, sequence, &request);*/ +/* morse_ziptile2lap( descQ1, Q1, NB, NB, LDQ1, N, sequence, &request);*/ +/* morse_ziptile2lap( descQ2, Q2, NB, NB, LDQ2, N, sequence, &request);*/ /* morse_sequence_wait(morse, sequence);*/ /* }*/ @@ -231,16 +263,6 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, * ******************************************************************************* * - * @param[in,out] A - * On entry, the M-by-N matrix A. - * On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N - * upper trapezoidal matrix R (R is upper triangular if M >= N); the elements below the - * diagonal represent the unitary matrix Q as a product of elementary reflectors stored - * by tiles. - * - * @param[out] T - * On exit, auxiliary factorization data, required by MORSE_zgeqrs to solve the system - * of equations. * ******************************************************************************* * @@ -257,7 +279,10 @@ int MORSE_ztpgqrt( int M, int N, int K, int L, * @sa MORSE_zgeqrs_Tile * ******************************************************************************/ -int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B ) +int MORSE_ztpgqrt_Tile( int L, + MORSE_desc_t *V1, MORSE_desc_t *T1, + MORSE_desc_t *V2, MORSE_desc_t *T2, + MORSE_desc_t *Q1, MORSE_desc_t *Q2 ) { MORSE_context_t *morse; MORSE_sequence_t *sequence = NULL; @@ -270,10 +295,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A return MORSE_ERR_NOT_INITIALIZED; } morse_sequence_create(morse, &sequence); - MORSE_ztpgqrt_Tile_Async(L, V, T, A, B, sequence, &request); + MORSE_ztpgqrt_Tile_Async(L, V1, T1, V2, T2, Q1, Q2, sequence, &request); morse_sequence_wait(morse, sequence); - RUNTIME_desc_getoncpu(A); - RUNTIME_desc_getoncpu(B); + RUNTIME_desc_getoncpu(Q1); + RUNTIME_desc_getoncpu(Q2); status = sequence->status; morse_sequence_destroy(morse, sequence); @@ -309,7 +334,10 @@ int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A * @sa MORSE_zgeqrs_Tile_Async * ******************************************************************************/ -int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, +int MORSE_ztpgqrt_Tile_Async( int L, + MORSE_desc_t *V1, MORSE_desc_t *T1, + MORSE_desc_t *V2, MORSE_desc_t *T2, + MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ) { MORSE_context_t *morse; @@ -334,37 +362,47 @@ int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_des return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED); /* Check descriptors for correctness */ - if (morse_desc_check(V) != MORSE_SUCCESS) { - morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor"); + if (morse_desc_check(V1) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid V1 descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(T1) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid T1 descriptor"); + return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); + } + if (morse_desc_check(V2) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid V2 descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(T) != MORSE_SUCCESS) { - morse_error("MORSE_ztpgqrt_Tile", "invalid third descriptor"); + if (morse_desc_check(T2) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid T2 descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(A) != MORSE_SUCCESS) { - morse_error("MORSE_ztpgqrt_Tile", "invalid first descriptor"); + if (morse_desc_check(Q1) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid Q1 descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (morse_desc_check(B) != MORSE_SUCCESS) { - morse_error("MORSE_ztpgqrt_Tile", "invalid second descriptor"); + if (morse_desc_check(Q2) != MORSE_SUCCESS) { + morse_error("MORSE_ztpgqrt_Tile", "invalid Q2 descriptor"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } /* Check input arguments */ - if (A->nb != A->mb) { + if (Q1->nb != Q1->mb) { morse_error("MORSE_ztpgqrt_Tile", "only square tiles supported"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (((B->m - L) % B->mb) != 0) { + if ( (L != 0) && (((Q2->m - L) % Q2->mb) != 0) ) { morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */ - morse_pztpgqrt(L, V, T, A, B, sequence, request); + morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request ); + morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request ); + morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request ); /* } */ /* else { */ - /* morse_pztpgqrtrh(A, T, MORSE_RHBLK, sequence, request); */ + /* morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */ /* } */ return MORSE_SUCCESS; diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c index f7c95881575bad2bc97965eb056c6854f59f5c13..adb72fe9aef7004c9eae4548e30859802c55b7fd 100644 --- a/compute/ztpqrt.c +++ b/compute/ztpqrt.c @@ -348,7 +348,7 @@ int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc morse_error("MORSE_ztpqrt_Tile", "only square tiles supported"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } - if (((B->m - L) % B->mb) != 0) { + if ( (L != 0) && (((B->m - L) % B->mb) != 0) ) { morse_error("MORSE_ztpqrt_Tile", "Triangular part must be aligned with tiles"); return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE); } diff --git a/control/compute_z.h b/control/compute_z.h index 122120e48999160fafb1e01aef495670fe363dba..bb930ba0ff84322f11b7c5969ea268ae7a91b831 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request); -void morse_pztpgqrt( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request ); +void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/include/morse_z.h b/include/morse_z.h index 2718a6bea1c82411a0c4c401821f0b5be940f90d..65f911f00a7f257fea0d897da102c1c8294ef9fa 100644 --- a/include/morse_z.h +++ b/include/morse_z.h @@ -102,7 +102,7 @@ int MORSE_zsyrk(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64 int MORSE_zsyr2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_Complex64_t beta, MORSE_Complex64_t *C, int LDC); int MORSE_zsysv(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); int MORSE_zsytrs(MORSE_enum uplo, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); -int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V, int LDV, MORSE_desc_t *descT, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB ); +int MORSE_ztpgqrt( int M, int N, int K, int L, MORSE_Complex64_t *V1, int LDV1, MORSE_desc_t *descT1, MORSE_Complex64_t *V2, int LDV2, MORSE_desc_t *descT2, MORSE_Complex64_t *Q1, int LDQ1, MORSE_Complex64_t *Q2, int LDQ2 ); int MORSE_ztpqrt( int M, int N, int L, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, MORSE_desc_t *descT ); int MORSE_ztradd(MORSE_enum uplo, MORSE_enum trans, int M, int N, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t beta, MORSE_Complex64_t *B, int LDB); int MORSE_ztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, int N, int NRHS, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB); @@ -181,7 +181,7 @@ int MORSE_zsyrk_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, int MORSE_zsyr2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C); int MORSE_zsysv_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); int MORSE_zsytrs_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B); -int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B ); +int MORSE_ztpgqrt_Tile( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2 ); int MORSE_ztpqrt_Tile( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T ); int MORSE_ztradd_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B); int MORSE_ztrmm_Tile(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B); @@ -257,7 +257,7 @@ int MORSE_zsytrs_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, M int MORSE_zsymm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyrk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_zsyr2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request); -int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V, MORSE_desc_t *T, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request ); +int MORSE_ztpgqrt_Tile_Async( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request ); int MORSE_ztpqrt_Tile_Async( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request ); int MORSE_ztradd_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); int MORSE_ztrmm_Tile_Async(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request); diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c index 1a6a8408584bfd0e861b08db5787c7552610ea14..e1743543e701b43c6a81372c97aae60db1b6154e 100644 --- a/runtime/starpu/codelets/codelet_ztpmqrt.c +++ b/runtime/starpu/codelets/codelet_ztpmqrt.c @@ -49,6 +49,7 @@ void MORSE_TASK_ztpmqrt( const MORSE_option_t *options, STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &K, sizeof(int), STARPU_VALUE, &L, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn), STARPU_VALUE, &ldv, sizeof(int), STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c index 8121d765e808fd7fefe9add88dec4956572ddaf6..bda639581a0dd6a1474be055ba4b038efa04137a 100644 --- a/runtime/starpu/codelets/codelet_ztpqrt.c +++ b/runtime/starpu/codelets/codelet_ztpqrt.c @@ -43,6 +43,7 @@ void MORSE_TASK_ztpqrt( const MORSE_option_t *options, STARPU_VALUE, &M, sizeof(int), STARPU_VALUE, &N, sizeof(int), STARPU_VALUE, &L, sizeof(int), + STARPU_VALUE, &ib, sizeof(int), STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An), STARPU_VALUE, &lda, sizeof(int), STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index fe9c243f374a1210b8394491c77bd2e6465e3756..a095b33b38da178dcab0ba38304ce5f919ecffcf 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -89,6 +89,7 @@ set(ZSRC #testing_zhegst.c #testing_zhegv.c #testing_zhegvd.c + testing_zgeqrf_qdwh.c ) # Add include and link directories diff --git a/testing/testing_zauxiliary.c b/testing/testing_zauxiliary.c index aa73b238279ecac1614e926aa1ab4c4f5f3781d0..485efb5a496009ed308bf28f356ff5f2c1ab2b53 100644 --- a/testing/testing_zauxiliary.c +++ b/testing/testing_zauxiliary.c @@ -304,6 +304,9 @@ int main (int argc, char **argv) /* else if ( strcmp(func, "GETMI") == 0 ) { */ /* info += testing_zgetmi( argc, argv ); */ /* } */ + else if ( strcmp(func, "GEQRF_QDWH") == 0 ) { + info += testing_zgeqrf_qdwh( argc, argv ); + } else { fprintf(stderr, "Function unknown\n"); } diff --git a/testing/testing_zauxiliary.h b/testing/testing_zauxiliary.h index f566abbfe9a3760997cf4dc9455533503d9c3974..1fc57362bef19393d323fe6c61e3eaef7468ce2e 100644 --- a/testing/testing_zauxiliary.h +++ b/testing/testing_zauxiliary.h @@ -108,6 +108,8 @@ int testing_zhegst(int argc, char **argv); int testing_zgecfi(int argc, char **argv); int testing_zgetmi(int argc, char **argv); +int testing_zgeqrf_qdwh(int argc, char **argv); + #ifdef DOUBLE int testing_zcposv(int argc, char **argv); int testing_zcgesv(int argc, char **argv); diff --git a/testing/testing_zgeqrf_qdwh.c b/testing/testing_zgeqrf_qdwh.c new file mode 100644 index 0000000000000000000000000000000000000000..956ffa3f980169947dcbd0e5364bfbdf4605bae7 --- /dev/null +++ b/testing/testing_zgeqrf_qdwh.c @@ -0,0 +1,268 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file testing_zgels.c + * + * MORSE testing routines + * MORSE is a software package provided by Univ. of Tennessee, + * Univ. of California Berkeley and Univ. of Colorado Denver + * + * @version 2.5.0 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for MORSE 1.0.0 + * @author Bilel Hadri + * @author Hatem Ltaief + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2010-11-15 + * @precisions normal z -> c d s + * + **/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> + +#include <morse.h> +#include <coreblas/include/cblas.h> +#include <coreblas/include/lapacke.h> +#include <coreblas/include/coreblas.h> +#include <coreblas/include/coreblas_z.h> +#include "testing_zauxiliary.h" + +#undef REAL +#define COMPLEX + +static int check_orthogonality(int, int, const MORSE_Complex64_t*, int, double); +static int check_factorization(int, int, const MORSE_Complex64_t*, int, const MORSE_Complex64_t*, int, MORSE_Complex64_t*, int, double); + +int testing_zgeqrf_qdwh(int argc, char **argv) +{ + int hres = 0; + + if ( argc != 4 ) { + USAGE("GEQRF_QDWH", "optid M NB LDA", + " - optid: Take into account the fact that A2 is Id or not\n" + " - M : number of rows of the matrix A1 and A2\n" + " - NB : tile size\n" + " - IB : inner tile size\n"); + return -1; + } + + int optid = atoi(argv[0]) ? 1: 0; + int M = atoi(argv[1]); + int NB = atoi(argv[2]); + int IB = atoi(argv[3]); + int MxM = M * M; + int LDA = 2*M; + double eps; + int info_ortho, info_solution, info_factorization; + int i, j; + + /** + * Compute A = QR with + * + * A = [ A1 ] and Q = [ Q1 ] + * [ A2 ] = [ Q2 ] + * + * and where A1 is the same size as A2 + * + */ + MORSE_Complex64_t *A1 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *A2 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Q1 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Q2 = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *A = (MORSE_Complex64_t *)malloc(2*M*M*sizeof(MORSE_Complex64_t)); + MORSE_Complex64_t *Q; + MORSE_desc_t *T1, *T2; + + /* Check if unable to allocate memory */ + if ( (!A) || (!A1) || (!A2) || (!Q1) || (!Q2) ){ + printf("Out of Memory \n "); + return -2; + } + + MORSE_Disable(MORSE_AUTOTUNING); + MORSE_Set(MORSE_TILE_SIZE, NB); + MORSE_Set(MORSE_INNER_BLOCK_SIZE, IB); + + MORSE_Alloc_Workspace_zgels(M, M, &T1, 1, 1); + MORSE_Alloc_Workspace_zgels(M, M, &T2, 1, 1); + + eps = LAPACKE_dlamch('e'); + + /* Initialize A1, A2, and A */ + LAPACKE_zlarnv_work(IONE, ISEED, MxM, A1); + LAPACKE_zlaset_work( LAPACK_COL_MAJOR, 'A', M, M, 0., 1., A2, M ); + + LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, A1, M, A, LDA ); + LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, A2, M, A + M, LDA ); + + /* Factorize A */ + MORSE_zgeqrf( M, M, A1, M, T1 ); + MORSE_ztpqrt( M, M, optid ? M : 0, + A1, M, + A2, M, T2 ); + + /* Generate the Q */ + MORSE_ztpgqrt( M, M, M, (optid) ? M : 0, + A1, M, T1, A2, M, T2, Q1, M, Q2, M ); + + /* Copy Q in a single matrix */ + Q = (MORSE_Complex64_t *)malloc(2*M*M*sizeof(MORSE_Complex64_t)); + LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, Q1, M, Q, LDA ); + free(Q1); + LAPACKE_zlacpy_work( LAPACK_COL_MAJOR, 'A', M, M, Q2, M, Q + M, LDA ); + free(Q2); + + printf("\n"); + printf("------ TESTS FOR CHAMELEON ZGELS ROUTINE ------- \n"); + printf(" Size of the Matrix %d by %d\n", M, M); + printf("\n"); + printf(" The matrix A is randomly generated for each test.\n"); + printf("============\n"); + printf(" The relative machine precision (eps) is to be %e \n",eps); + printf(" Computational tests pass if scaled residuals are less than 60.\n"); + + /* Check the orthogonality, factorization and the solution */ + info_ortho = check_orthogonality( 2*M, M, Q, LDA, eps ); + info_factorization = check_factorization( 2*M, M, A, LDA, A1, M, Q, LDA, eps ); + + if ((info_factorization == 0) & (info_ortho == 0)) { + printf("***************************************************\n"); + printf(" ---- TESTING ZGELS ...................... PASSED !\n"); + printf("***************************************************\n"); + } + else { + printf("************************************************\n"); + printf(" - TESTING ZGELS ... FAILED !\n"); hres++; + printf("************************************************\n"); + } + + free(A); free(A1); free(A2); free(Q); + MORSE_Dealloc_Workspace( &T1 ); + MORSE_Dealloc_Workspace( &T2 ); + + return hres; +} + +/*------------------------------------------------------------------- + * Check the orthogonality of Q + */ + +static int +check_orthogonality( int M, int N, + const MORSE_Complex64_t *Q, int LDQ, + double eps ) +{ + MORSE_Complex64_t *Id; + double alpha, beta; + double normQ; + int info_ortho; + int i; + int minMN = min(M, N); + + double *work = (double *)malloc(minMN*sizeof(double)); + + alpha = 1.0; + beta = -1.0; + + /* Build the idendity matrix */ + Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t)); + LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN ); + + /* Perform Id - Q'Q */ + if (M >= N) + cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, beta, Q, LDQ, alpha, Id, N); + else + cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, beta, Q, LDQ, alpha, Id, M); + + normQ = LAPACKE_zlansy_work( LAPACK_COL_MAJOR, 'I', 'U', minMN, Id, minMN, work ); + + printf("============\n"); + printf("Checking the orthogonality of Q \n"); + printf("||Id-Q'*Q||_oo / (N*eps) = %e \n", normQ/(minMN*eps)); + + if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) { + printf("-- Orthogonality is suspicious ! \n"); + info_ortho=1; + } + else { + printf("-- Orthogonality is CORRECT ! \n"); + info_ortho=0; + } + + free(work); free(Id); + + return info_ortho; +} + +/*------------------------------------------------------------ + * Check the factorization QR + */ + +static int +check_factorization(int M, int N, + const MORSE_Complex64_t *A, int LDA, + const MORSE_Complex64_t *R, int LDR, + MORSE_Complex64_t *Q, int LDQ, + double eps ) +{ + double Anorm, Rnorm; + MORSE_Complex64_t alpha, beta; + int info_factorization; + int i,j; + double *work = (double *)malloc(max(M,N)*sizeof(double)); + + alpha = 1.0; + beta = 0.0; + + if (M >= N) { + /* Perform Q = Q * R */ + cblas_ztrmm( CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, M, N, CBLAS_SADDR(alpha), R, LDR, Q, LDQ); + } + else { + /* Perform Q = L * Q */ + cblas_ztrmm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, M, N, CBLAS_SADDR(alpha), R, LDR, Q, LDQ); + } + + /* Compute the Residual */ + CORE_zgeadd( MorseNoTrans, M, N, -1., A, LDA, 1., Q, LDQ ); + + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, Q, LDQ, work ); + Anorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'I', M, N, A, LDA, work ); + + if (M >= N) { + printf("============\n"); + printf("Checking the QR Factorization \n"); + printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + else { + printf("============\n"); + printf("Checking the LQ Factorization \n"); + printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); + } + + if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) { + printf("-- Factorization is suspicious ! \n"); + info_factorization = 1; + } + else { + printf("-- Factorization is CORRECT ! \n"); + info_factorization = 0; + } + + free(work); + + return info_factorization; +}