diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 3f1dc31dd9b88c61b4f923edf02afcc02538ec7e..efada065d0c7bde35a821c17744bc259aecdb754 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -116,9 +116,7 @@ set(ZSRC pzgetrf_nopiv.c pzlacpy.c pzlange.c - pzlanhe.c pzlansy.c - pzlantr.c pzlaset2.c pzlaset.c pzlauum.c diff --git a/compute/pzlange.c b/compute/pzlange.c index 5a6b15bbad1c39f75ed295f2b17ba159df04959d..94200a379d9facdb2b13f1715512d4e640dc420b 100644 --- a/compute/pzlange.c +++ b/compute/pzlange.c @@ -26,323 +26,411 @@ //WS_ADD : A->mb + A->nb #include "control/common.h" -#define A(m, n) A, m, n -#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n -#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n -#define RESULT(m, n) RESULT, m, n - -/** - * - */ -void chameleon_pzlange( cham_normtype_t norm, CHAM_desc_t *A, double *result, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +#define A(m, n) A, (m), (n) +#define Wcol(m, n) Wcol, (m), (n) +#define Welt(m, n) Welt, (m), (n) + +static inline void +chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, + CHAM_desc_t *Wcol, CHAM_desc_t *Welt, + RUNTIME_option_t *options) { - CHAM_desc_t *VECNORMS_STEP1 = NULL; - CHAM_desc_t *VECNORMS_STEP2 = NULL; - CHAM_desc_t *RESULT = NULL; - CHAM_context_t *chamctxt; - RUNTIME_option_t options; - - int workm, workn; - int tempkm, tempkn; - int ldam; int m, n; + int minMNT = chameleon_min( A->mt, A->nt ); + int minMN = chameleon_min( A->m, A->n ); + int MT = (uplo == ChamUpper) ? minMNT : A->mt; + int NT = (uplo == ChamLower) ? minMNT : A->nt; + int M = (uplo == ChamUpper) ? minMN : A->m; + int N = (uplo == ChamLower) ? minMN : A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,P], W(i, n) = reduce( A(i+k*P, n) ) + */ + for(n = 0; n < NT; n++) { + int mmin = ( uplo == ChamLower ) ? n : 0; + int mmax = ( uplo == ChamUpper ) ? chameleon_min(n+1, MT) : MT; + + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + for(m = mmin; m < mmax; m++) { + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); + + if ( (n == m) && (uplo != ChamUpperLower) ) { + INSERT_TASK_ztrasm( + options, + ChamColumnwise, uplo, diag, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); + } + else { + INSERT_TASK_dzasum( + options, + ChamColumnwise, ChamUpperLower, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); + } - chamctxt = chameleon_context_self(); - if (sequence->status != CHAMELEON_SUCCESS) - return; - RUNTIME_options_init(&options, chamctxt, sequence, request); + if ( m >= P ) { + INSERT_TASK_dgeadd( + options, + ChamNoTrans, tempnn, 1, A->nb, + 1.0, Wcol(m, n), tempnn, + 1.0, Wcol(m%P, n), tempnn ); + } + } - *result = 0.0; - switch ( norm ) { - /* - * ChamOneNorm + /** + * Step 2: + * For each i, W(i, n) = reduce( W(0..P-1, n) ) */ - case ChamOneNorm: - /* Init workspace handle for the call to zlange but unused */ - RUNTIME_options_ws_alloc( &options, 1, 0 ); - - workm = chameleon_max( A->mt, A->p ); - workn = A->n; - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, A->nb, A->nb, - workm, workn, 0, 0, workm, workn, A->p, A->q); + for(m = 1; m < P; n++) { + INSERT_TASK_dgeadd( + options, + ChamNoTrans, tempnn, 1, A->nb, + 1.0, Wcol(m, n), tempnn, + 1.0, Wcol(0, n), tempnn ); + } - CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, 1, A->nb, A->nb, - 1, workn, 0, 0, 1, workn, A->p, A->q); + INSERT_TASK_dlange( + options, + ChamMaxNorm, tempnn, 1, A->nb, + Wcol(0, n), tempnn, Welt(0, n)); + } - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); + /** + * Step 3: + * For n in 0..Q-1, W(m, n) = max( W(m, n..nt[Q] ) ) + */ + for(n = Q; n < NT; n++) { + INSERT_TASK_dlange_max( + options, + Welt(0, n), Welt(0, n%Q) ); + } - for(n = A->myrank % A->q; n < A->nt; n+=A->q) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dlange_max( + options, + Welt(0, n), Welt(0, 0) ); + } +} - /* Zeroes my intermediate vectors */ - for(m = (A->myrank / A->q); m < workm; m+=A->p) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - VECNORMS_STEP1(m, n), 1); +static inline void +chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, + CHAM_desc_t *Wcol, CHAM_desc_t *Welt, + RUNTIME_option_t *options) +{ + int m, n; + int minMNT = chameleon_min( A->mt, A->nt ); + int minMN = chameleon_min( A->m, A->n ); + int MT = (uplo == ChamUpper) ? minMNT : A->mt; + int NT = (uplo == ChamLower) ? minMNT : A->nt; + int M = (uplo == ChamUpper) ? minMN : A->m; + int N = (uplo == ChamLower) ? minMN : A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Wcol(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = ( uplo == ChamUpper ) ? m : 0; + int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; + + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); + + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + if ( (n == m) && (uplo != ChamUpperLower) ) { + INSERT_TASK_ztrasm( + options, + ChamRowwise, uplo, diag, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); } - - /* compute sums of absolute values on columns of each tile */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); + else { INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); + options, + ChamRowwise, ChamUpperLower, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); } - /* Zeroes the second intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - VECNORMS_STEP2(0, n), 1); - - /* Compute vector sums between tiles in columns */ - for(m = 0; m < A->mt; m++) { + if ( n >= Q ) { INSERT_TASK_dgeadd( - &options, - ChamNoTrans, 1, tempkn, A->mb, - 1.0, VECNORMS_STEP1(m, n), 1, - 1.0, VECNORMS_STEP2(0, n), 1); + options, + ChamNoTrans, tempmm, 1, A->mb, + 1.0, Wcol(m, n ), tempmm, + 1.0, Wcol(m, n%Q), tempmm ); } + } - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - INSERT_TASK_dlange( - &options, - ChamMaxNorm, 1, tempkn, A->nb, - VECNORMS_STEP2(0, n), 1, - VECNORMS_STEP1(0, n)); + /** + * Step 2: + * For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dgeadd( + options, + ChamNoTrans, tempmm, 1, A->mb, + 1.0, Wcol(m, n), tempmm, + 1.0, Wcol(m, 0), tempmm ); } - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); + INSERT_TASK_dlange( + options, + ChamMaxNorm, tempmm, 1, A->nb, + Wcol(m, 0), 1, Welt(m, 0)); + } + + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Wcol(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(m%P, 0) ); + } + + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(0, 0) ); + } +} + +static inline void +chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_desc_t *Welt, + RUNTIME_option_t *options) +{ + int m, n; + int minMNT = chameleon_min( A->mt, A->nt ); + int minMN = chameleon_min( A->m, A->n ); + int MT = (uplo == ChamUpper) ? minMNT : A->mt; + int NT = (uplo == ChamLower) ? minMNT : A->nt; + int M = (uplo == ChamUpper) ? minMN : A->m; + int N = (uplo == ChamLower) ? minMN : A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = ( uplo == ChamUpper ) ? m : 0; + int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; + + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); + + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + if ( (n == m) && (uplo != ChamUpperLower) ) { + INSERT_TASK_zlantr( + options, + ChamMaxNorm, uplo, diag, tempmm, tempnn, A->nb, + A(m, n), ldam, Welt(m, n)); + } + else { + INSERT_TASK_zlange( + options, + ChamMaxNorm, tempmm, tempnn, A->nb, + A(m, n), ldam, Welt(m, n)); + } - /* Compute max norm between tiles in the row */ - if (A->myrank < A->q) { - for(n = 0; n < A->nt; n++) { + if ( n >= Q ) { INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(0, n), - RESULT(0,0)); + options, + Welt(m, n), Welt(m, n%Q) ); } } - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); + /** + * Step 2: + * For each j, W(m, j) = reduce( Welt(m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dlange_max( + options, + Welt(m, n), Welt(m, 0) ); + } + } + + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(m%P, 0) ); + } + + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(0, 0) ); + } +} + +static inline void +chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_desc_t *Welt, + RUNTIME_option_t *options) +{ + int m, n; + int minMNT = chameleon_min( A->mt, A->nt ); + int minMN = chameleon_min( A->m, A->n ); + int MT = (uplo == ChamUpper) ? minMNT : A->mt; + int NT = (uplo == ChamLower) ? minMNT : A->nt; + int M = (uplo == ChamUpper) ? minMN : A->m; + int N = (uplo == ChamLower) ? minMN : A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = ( uplo == ChamUpper ) ? m : 0; + int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; + + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); + + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + if ( (n == m) && (uplo != ChamUpperLower) ) { + INSERT_TASK_ztrssq( + options, + uplo, diag, tempmm, tempnn, + A(m, n), ldam, Welt(m, n) ); + } + else { + INSERT_TASK_zgessq( + options, + tempmm, tempnn, + A(m, n), ldam, Welt(m, n) ); + } + + if ( n >= Q ) { + INSERT_TASK_dplssq( + options, Welt(m, n), Welt(m, n%Q) ); } } - CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence ); - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) ); - break; - /* - * ChamInfNorm + /** + * Step 2: + * For each j, W(m, j) = reduce( Welt(m, 0..Q-1) ) */ - case ChamInfNorm: - /* Init workspace handle for the call to zlange */ - RUNTIME_options_ws_alloc( &options, A->mb, 0 ); + for(n = 1; n < Q; n++) { + INSERT_TASK_dplssq( + options, Welt(m, n), Welt(m, 0) ); + } + } - workm = A->m; - workn = chameleon_max( A->nt, A->q ); - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, workn, 0, 0, workm, workn, A->p, A->q); + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dplssq( + options, Welt(m, 0), Welt(m%P, 0) ); + } - CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, 1, 0, 0, workm, 1, A->p, A->q); + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dplssq( + options, Welt(m, 0), Welt(0, 0) ); + } - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - A->p, A->q, 0, 0, A->p, A->q, A->p, A->q); + INSERT_TASK_dplssq2( + options, Welt(0, 0) ); +} - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); +/** + * + */ +void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, + CHAM_desc_t *A, double *result, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t *Wcol = NULL; + CHAM_desc_t *Welt = NULL; + double alpha = 0.0; + double beta = 0.0; - /* Zeroes my intermediate vectors */ - for(n = A->myrank % A->q; n < workn; n+=A->q) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP1(m, n), 1); - } + int workn, workmt, worknt; + int m, n; - /* compute sums of absolute values on rows of each tile */ - for(n = A->myrank % A->q; n < A->nt; n+=A->q) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); - } + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) + return; + RUNTIME_options_init(&options, chamctxt, sequence, request); - /* Zeroes the second intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP2(m, 0), 1); + *result = 0.0; - /* compute vector sums between tiles in rows locally on each rank */ - for(n = A->myrank % A->q + A->q; n < A->nt; n+=A->q) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, VECNORMS_STEP1(m, n), tempkm, - 1.0, VECNORMS_STEP1(m, A->myrank % A->q), tempkm); - } + workmt = chameleon_max( A->mt, A->p ); + worknt = chameleon_max( A->nt, A->q ); + workn = chameleon_max( A->n, A->q ); - /* compute vector sums between tiles in rows between ranks */ - for(n = 0; n < A->q; n++) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, VECNORMS_STEP1(m, n), tempkm, - 1.0, VECNORMS_STEP2(m, 0), tempkm); - } + switch ( norm ) { + case ChamOneNorm: + RUNTIME_options_ws_alloc( &options, 1, 0 ); - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - INSERT_TASK_dlange( - &options, - ChamMaxNorm, tempkm, 1, A->nb, - VECNORMS_STEP2(m, 0), tempkm, - VECNORMS_STEP1(m, 0)); - } + CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, 1, A->nb, A->nb, + workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q ); - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(A->myrank / A->q, A->myrank % A->q), 1); + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, + A->p, worknt, 0, 0, A->p, worknt, A->p, A->q ); - /* compute max norm between tiles in the column locally on each rank */ - if (A->myrank % A->q == 0) { - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, 0), - RESULT(A->myrank / A->q, A->myrank % A->q)); - } - } + break; - /* compute max norm between tiles in the column between ranks */ - if (A->myrank % A->q == 0) { - for(m = 0; m < A->p; m++) { - INSERT_TASK_dlange_max( - &options, - RESULT(m,0), - RESULT(0,0)); - } - } + /* + * ChamInfNorm + */ + case ChamInfNorm: + RUNTIME_options_ws_alloc( &options, A->mb, 0 ); - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } - } - CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence ); - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) ); + CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, A->mb, 1, A->mb, + workmt * A->mb, worknt, 0, 0, workmt * A->mb, worknt, A->p, A->q ); + + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, + workmt, A->q, 0, 0, workmt, A->q, A->p, A->q ); break; /* * ChamFrobeniusNorm */ case ChamFrobeniusNorm: + RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2, - workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2, - 1, 2, 0, 0, 1, 2, 1, 1); - - /* Compute local norm to each tile */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - VECNORMS_STEP1(m,n), 1); - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - - /* Initialize arrays */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - RESULT(0,0), 1); - - /* Compute accumulation of scl and ssq */ - for(m = 0; m < A->mt; m++) { - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dplssq( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - /* Compute scl * sqrt(ssq) */ - INSERT_TASK_dplssq2( - &options, - RESULT(0,0)); - - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } - } - - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); + alpha = 1.; + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2, + workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q ); break; /* @@ -350,68 +438,81 @@ void chameleon_pzlange( cham_normtype_t norm, CHAM_desc_t *A, double *result, */ case ChamMaxNorm: default: - /* Init workspace handle for the call to zlange but unused */ RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1, - workm, workn, 0, 0, workm, workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, + workmt, workn, 0, 0, workmt, workn, A->p, A->q ); + } - /* Compute local maximum to each tile */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - for(n = 0; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( + /* Initialize workspaces */ + if ( (norm == ChamInfNorm) || + (norm == ChamOneNorm) ) + { + /* Initialize Wcol tile */ + for(m = 0; m < Wcol->mt; m++) { + for(n = 0; n < Wcol->nt; n++) { + INSERT_TASK_dlaset( &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); + ChamUpperLower, Wcol->mb, Wcol->nb, + alpha, beta, + Wcol(m,n), Wcol->mb ); } } + } + for(m = 0; m < Welt->mt; m++) { + for(n = 0; n < Welt->nt; n++) { + INSERT_TASK_dlaset( + &options, + ChamUpperLower, Welt->mb, Welt->nb, + alpha, beta, + Welt(m,n), Welt->mb ); + } + } - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); + switch ( norm ) { + case ChamOneNorm: + chameleon_pzlange_one( uplo, diag, A, Wcol, Welt, &options ); + CHAMELEON_Desc_Flush( Wcol, sequence ); + break; - /* Compute max norm between tiles */ - for(m = 0; m < A->mt; m++) { - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } + case ChamInfNorm: + chameleon_pzlange_inf( uplo, diag, A, Wcol, Welt, &options ); + CHAMELEON_Desc_Flush( Wcol, sequence ); + break; - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + case ChamFrobeniusNorm: + chameleon_pzlange_frb( uplo, diag, A, Welt, &options ); + break; + + case ChamMaxNorm: + default: + chameleon_pzlange_max( uplo, diag, A, Welt, &options ); + } + + /** + * Broadcast the result + */ + for(m = 0; m < A->p; m++) { + for(n = 0; n < A->q; n++) { + if ( (m != 0) && (n != 0) ) { INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); + Welt(0,0), 1, Welt(m, n), 1); } } - - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); } - *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); + CHAMELEON_Desc_Flush( Welt, sequence ); + RUNTIME_sequence_wait(chamctxt, sequence); + + *result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q ); + + if ( Wcol != NULL ) { + CHAMELEON_Desc_Destroy( &Wcol ); + } + CHAMELEON_Desc_Destroy( &Welt ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, chamctxt); } diff --git a/compute/pzlanhe.c b/compute/pzlanhe.c deleted file mode 100644 index e7c181296209393fd2b940d92a44238cc72c663f..0000000000000000000000000000000000000000 --- a/compute/pzlanhe.c +++ /dev/null @@ -1,460 +0,0 @@ -/** - * - * @file pzlanhe.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zlanhe parallel algorithm - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.6.0 for CHAMELEON 1.0.0 - * @author Emmanuel Agullo - * @author Mathieu Faverge - * @date 2010-11-15 - * @precisions normal z -> c - * - */ -//ALLOC_WS : A->mb -//#include <stdlib.h> -//#include <math.h> -//WS_ADD : A->mb -#include "control/common.h" - -#define A(m, n) A, m, n -#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n -#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n -#define RESULT(m, n) RESULT, m, n -/** - * - */ -/** - * - */ -void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) -{ - CHAM_desc_t *VECNORMS_STEP1 = NULL; - CHAM_desc_t *VECNORMS_STEP2 = NULL; - CHAM_desc_t *RESULT = NULL; - CHAM_context_t *chamctxt; - RUNTIME_option_t options; - - int workm, workn; - int tempkm, tempkn; - int ldam; - int m, n; - /* int part_p, part_q; */ - - /* part_p = A->myrank / A->q; */ - /* part_q = A->myrank % A->q; */ - - chamctxt = chameleon_context_self(); - if (sequence->status != CHAMELEON_SUCCESS) - return; - RUNTIME_options_init(&options, chamctxt, sequence, request); - - *result = 0.0; - switch ( norm ) { - /* - * ChamOneNorm / ChamInfNorm - */ - case ChamOneNorm: - case ChamInfNorm: - /* Init workspace handle for the call to zlanhe */ - RUNTIME_options_ws_alloc( &options, A->mb, 0 ); - - workm = A->m; - workn = chameleon_max( A->nt, A->q ); - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, workn, 0, 0, workm, workn, A->p, A->q); - - CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, 1, 0, 0, workm, 1, A->p, A->q); - - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); - - /* Zeroes my intermediate vectors */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - for(n = 0; n < workn; n++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP1(m, n), 1); - } - } - - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - - /* compute sums of absolute values on diagonal tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, uplo, tempkm, tempkm, - A(m, m), ldam, VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute sums of absolute values on rows of tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(n, m)); - } - } - /* - * ChamUpper - */ - else { -// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); -// n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute sums of absolute values on rows of tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(n, m)); - } - } - } - - /* compute vector sum between tiles in rows */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP2(m, 0), 1); - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, VECNORMS_STEP1(m, n), tempkm, - 1.0, VECNORMS_STEP2(m, 0), tempkm); - } - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - INSERT_TASK_dlange( - &options, - ChamMaxNorm, tempkm, 1, A->nb, - VECNORMS_STEP2(m, 0), tempkm, - VECNORMS_STEP1(m, 0)); - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* compute max norm between tiles in the column */ - if (A->myrank % A->q == 0) { - for(m = 0; m < A->mt; m++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, 0), - RESULT(0,0)); - } - } - - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } - } - CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence ); - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - break; - /* - * ChamFrobeniusNorm - */ - case ChamFrobeniusNorm: - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2, - workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2, - 1, 2, 0, 0, 1, 2, 1, 1); - - /* Compute local norm to each tile */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - - /* Zeroes my intermediate vectors */ - for(n = A->myrank % A->q; n < workn; n+=A->q) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - VECNORMS_STEP1(m,n), 1); - } - - /* compute norm on diagonal tile m */ - INSERT_TASK_zhessq( - &options, - uplo, tempkm, - A(m, m), ldam, - VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute norm on the lower part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - /* - * ChamUpper - */ - else { -// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); -// n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute norm on the lower part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - } - - /* Initialize arrays */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - RESULT(0,0), 1); - - /* Compute accumulation of scl and ssq */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n <= m; n++) { - INSERT_TASK_dplssq( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - /* - * ChamUpper - */ - else { -// for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); -// n < A->mt; n+=A->q) { - for(n = m; n < A->mt; n++) { - INSERT_TASK_dplssq( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - } - - /* Compute scl * sqrt(ssq) */ - INSERT_TASK_dplssq2( - &options, - RESULT(0,0)); - - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } - } - - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - break; - - /* - * ChamMaxNorm - */ - case ChamMaxNorm: - default: - /* Init workspace handle for the call to zlange but unused */ - RUNTIME_options_ws_alloc( &options, 1, 0 ); - - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1, - workm, workn, 0, 0, workm, workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); - - /* Compute local maximum to each tile */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - - INSERT_TASK_zlanhe( - &options, - ChamMaxNorm, uplo, tempkm, A->nb, - A(m, m), ldam, - VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - /* - * ChamUpper - */ - else { - //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* Compute max norm between tiles */ - for(m = 0; m < A->mt; m++) { - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n <= m; n++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - /* - * ChamUpper - */ - else { - //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m; n < A->mt; n++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - } - - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } - } - - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - } - RUNTIME_options_ws_free(&options); - RUNTIME_options_finalize(&options, chamctxt); -} diff --git a/compute/pzlansy.c b/compute/pzlansy.c index 96411e26c4946c157bec2218e334b5f0bb373ad9..1df96420f3ecf199123a4b54643145b5a314a408 100644 --- a/compute/pzlansy.c +++ b/compute/pzlansy.c @@ -21,445 +21,415 @@ * */ //ALLOC_WS : A->mb +//WS_ADD : A->mb + #include <stdlib.h> #include <math.h> -//WS_ADD : A->mb #include "control/common.h" -#define A(m, n) A, m, n -#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n -#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n -#define RESULT(m, n) RESULT, m, n -/** - * - */ -/** - * - */ -void chameleon_pzlansy(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) -{ - CHAM_desc_t *VECNORMS_STEP1 = NULL; - CHAM_desc_t *VECNORMS_STEP2 = NULL; - CHAM_desc_t *RESULT = NULL; - CHAM_context_t *chamctxt; - RUNTIME_option_t options; +#define A(m, n) A, (m), (n) +#define Wcol(m, n) Wcol, (m), (n) +#define Welt(m, n) Welt, (m), (n) - int workm, workn; - int tempkm, tempkn; - int ldam; +static inline void +chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A, + CHAM_desc_t *Wcol, CHAM_desc_t *Welt, + RUNTIME_option_t *options) +{ int m, n; - /* int part_p, part_q; */ - - /* part_p = A->myrank / A->q; */ - /* part_q = A->myrank % A->q; */ + int MT = A->mt; + int NT = A->nt; + int M = A->m; + int N = A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Wcol(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = ( uplo == ChamUpper ) ? m : 0; + int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - chamctxt = chameleon_context_self(); - if (sequence->status != CHAMELEON_SUCCESS) - return; - RUNTIME_options_init(&options, chamctxt, sequence, request); + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); - *result = 0.0; - switch ( norm ) { - /* - * ChamOneNorm / ChamInfNorm - */ - case ChamOneNorm: - case ChamInfNorm: - /* Init workspace handle for the call to zlange */ - RUNTIME_options_ws_alloc( &options, A->mb, 0 ); + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; - workm = A->m; - workn = chameleon_max( A->nt, A->q ); - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, workn, 0, 0, workm, workn, A->p, A->q); + if ( n == m ) { + INSERT_TASK_dzasum( + options, + ChamRowwise, uplo, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); + } + else { + INSERT_TASK_dzasum( + options, + ChamRowwise, ChamUpperLower, tempmm, tempnn, + A(m, n), ldam, Wcol(m, n) ); + + INSERT_TASK_dzasum( + options, + ChamColumnwise, ChamUpperLower, tempmm, tempnn, + A(m, n), ldam, Wcol(n, m) ); + } + } + } - CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, 1, 0, 0, workm, 1, A->p, A->q); + for(m = 0; m < MT; m++) { + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); + for(n = Q; n < NT; n++) { + INSERT_TASK_dgeadd( + options, + ChamNoTrans, tempmm, 1, A->nb, + 1.0, Wcol(m, n ), tempmm, + 1.0, Wcol(m, n%Q), tempmm ); + } - /* Zeroes my intermediate vectors */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - for(n = 0; n < workn; n++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP1(m, n), 1); - } + /** + * Step 2: + * For each j, W(m, j) = reduce( Wcol(m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dgeadd( + options, + ChamNoTrans, tempmm, 1, A->mb, + 1.0, Wcol(m, n), tempmm, + 1.0, Wcol(m, 0), tempmm ); } - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); + INSERT_TASK_dlange( + options, + ChamMaxNorm, tempmm, 1, A->nb, + Wcol(m, 0), 1, Welt(m, 0)); + } - /* compute sums of absolute values on diagonal tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, uplo, tempkm, tempkm, - A(m, m), ldam, VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute sums of absolute values on rows of tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(n, m)); + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Wcol(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(m%P, 0) ); + } + + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(0, 0) ); + } +} + +static inline void +chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A, + CHAM_desc_t *Welt, RUNTIME_option_t *options) +{ + int m, n; + int MT = A->mt; + int NT = A->nt; + int M = A->m; + int N = A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = (uplo == ChamUpper ) ? m : 0; + int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; + + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); + + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + if ( n == m ) { + if ( trans == ChamConjTrans) { + INSERT_TASK_zlanhe( + options, + ChamMaxNorm, uplo, tempmm, A->nb, + A(m, n), ldam, Welt(m, n)); + } + else { + INSERT_TASK_zlansy( + options, + ChamMaxNorm, uplo, tempmm, A->nb, + A(m, n), ldam, Welt(m, n)); } } - /* - * ChamUpper - */ else { - // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* compute sums of absolute values on rows of tile m */ - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, VECNORMS_STEP1(n, m)); - } + INSERT_TASK_zlange( + options, + ChamMaxNorm, tempmm, tempnn, A->nb, + A(m, n), ldam, Welt(m, n)); } - } - /* compute vector sum between tiles in rows */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - VECNORMS_STEP2(m, 0), 1); - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, VECNORMS_STEP1(m, n), tempkm, - 1.0, VECNORMS_STEP2(m, 0), tempkm); - } - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - INSERT_TASK_dlange( - &options, - ChamMaxNorm, tempkm, 1, A->nb, - VECNORMS_STEP2(m, 0), tempkm, - VECNORMS_STEP1(m, 0)); - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* compute max norm between tiles in the column */ - if (A->myrank % A->q == 0) { - for(m = 0; m < A->mt; m++) { + if ( n >= Q ) { INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, 0), - RESULT(0,0)); + options, + Welt(m, n), Welt(m, n%Q) ); } } - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } + /** + * Step 2: + * For each j, W(m, j) = reduce( Welt(m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dlange_max( + options, + Welt(m, n), Welt(m, 0) ); } - CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence ); - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) ); - break; - /* - * ChamFrobeniusNorm + } + + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) ) */ - case ChamFrobeniusNorm: - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); + for(m = P; m < MT; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(m%P, 0) ); + } - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2, - workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2, - 1, 2, 0, 0, 1, 2, 1, 1); + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) + */ + for(m = 1; m < P; m++) { + INSERT_TASK_dlange_max( + options, + Welt(m, 0), Welt(0, 0) ); + } +} - /* Compute local norm to each tile */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); +static inline void +chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo, + CHAM_desc_t *A, CHAM_desc_t *Welt, + RUNTIME_option_t *options) +{ + int m, n; + int MT = A->mt; + int NT = A->nt; + int M = A->m; + int N = A->n; + int P = Welt->p; + int Q = Welt->q; + + /** + * Step 1: + * For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) ) + */ + for(m = 0; m < MT; m++) { + int nmin = (uplo == ChamUpper ) ? m : 0; + int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT; - /* Zeroes my intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - VECNORMS_STEP1(m, m), 1); - /* compute norm on diagonal tile m */ - INSERT_TASK_zsyssq( - &options, - uplo, tempkm, - A(m, m), ldam, - VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* Zeroes my intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - VECNORMS_STEP1(m, n), 1); - /* compute norm on the lower part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - /* - * ChamUpper - */ - else { - // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - /* Zeroes my intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - VECNORMS_STEP1(m, n), 1); - /* compute norm on the lower part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - /* same operation on the symmetric part */ - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - } + int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb; + int ldam = BLKLDD( A, m ); - /* Zeroes my intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - RESULT(0,0), 1); - - /* Compute accumulation of scl and ssq */ - for(m = (A->myrank / A->q); m < A->mt; m+=A->p) { - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n <= m; n++) { - INSERT_TASK_dplssq( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); + for(n = nmin; n < nmax; n++) { + int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb; + + if ( n == m ) { + if ( trans == ChamConjTrans) { + INSERT_TASK_zhessq( + options, uplo, tempmm, + A(m, n), ldam, Welt(m, n) ); + } + else { + INSERT_TASK_zsyssq( + options, uplo, tempmm, + A(m, n), ldam, Welt(m, n) ); } } - /* - * ChamUpper - */ else { - // for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m; n < A->mt; n++) { - INSERT_TASK_dplssq( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } + INSERT_TASK_zgessq( + options, tempmm, tempnn, + A(m, n), ldam, Welt(m, n) ); + INSERT_TASK_zgessq( + options, tempmm, tempnn, + A(m, n), ldam, Welt(n, m) ); } } + } - /* Compute scl * sqrt(ssq) */ - INSERT_TASK_dplssq2( - &options, - RESULT(0,0)); + for(m = 0; m < MT; m++) { + for(n = Q; n < NT; n++) { + INSERT_TASK_dplssq( + options, Welt(m, n), Welt(m, n%Q) ); + } - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); - } + /** + * Step 2: + * For each j, W(m, j) = reduce( Welt(m, 0..Q-1) ) + */ + for(n = 1; n < Q; n++) { + INSERT_TASK_dplssq( + options, Welt(m, n), Welt(m, 0) ); } + } - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - break; + /** + * Step 3: + * For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) ) + */ + for(m = P; m < MT; m++) { + INSERT_TASK_dplssq( + options, Welt(m, 0), Welt(m%P, 0) ); + } - /* - * ChamMaxNorm + /** + * Step 4: + * For each i, Welt(i, n) = max( Welt(0..P-1, n) ) */ - case ChamMaxNorm: - default: - /* Init workspace handle for the call to zlange but unused */ + for(m = 1; m < P; m++) { + INSERT_TASK_dplssq( + options, Welt(m, 0), Welt(0, 0) ); + } + + INSERT_TASK_dplssq2( + options, Welt(0, 0) ); +} + +/** + * + */ +void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_trans_t trans, + CHAM_desc_t *A, double *result, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + CHAM_desc_t *Wcol = NULL; + CHAM_desc_t *Welt = NULL; + double alpha = 0.0; + double beta = 0.0; + + int workn, workmt, worknt; + int m, n; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) + return; + RUNTIME_options_init(&options, chamctxt, sequence, request); + + *result = 0.0; + + workmt = chameleon_max( A->mt, A->p ); + worknt = chameleon_max( A->nt, A->q ); + workn = chameleon_max( A->n, A->q ); + + switch ( norm ) { + case ChamOneNorm: + case ChamInfNorm: RUNTIME_options_ws_alloc( &options, 1, 0 ); - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); + CHAMELEON_Desc_Create( &Wcol, NULL, ChamRealDouble, 1, A->nb, A->nb, + workmt, worknt * A->nb, 0, 0, workmt, worknt * A->nb, A->p, A->q ); - CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1, - workm, workn, 0, 0, workm, workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, + A->p, worknt, 0, 0, A->p, worknt, A->p, A->q ); - /* Compute local maximum to each tile */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); + break; - INSERT_TASK_zlansy( - &options, - ChamMaxNorm, uplo, tempkm, A->nb, - A(m, m), ldam, - VECNORMS_STEP1(m, m)); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n < m; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - /* - * ChamUpper - */ - else { - //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m+1; n < A->mt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - VECNORMS_STEP1(m, n)); - } - } - } + /* + * ChamFrobeniusNorm + */ + case ChamFrobeniusNorm: + RUNTIME_options_ws_alloc( &options, 1, 0 ); - /* Zeroes RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* Compute max norm between tiles */ - for(m = 0; m < A->mt; m++) { - /* - * ChamLower - */ - if (uplo == ChamLower) { - //for(n = A->myrank % A->q; n < m; n+=A->q) { - for(n = 0; n <= m; n++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } - } - /* - * ChamUpper - */ - else { - //for(n = ( part_q > part_p ? (m/part_p)*part_p + part_q : (m/part_p)*part_p + part_q + A->q ); - // n < A->mt; n+=A->q) { - for(n = m; n < A->mt; n++) { - INSERT_TASK_dlange_max( - &options, - VECNORMS_STEP1(m, n), - RESULT(0,0)); - } + alpha = 1.; + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 2, 1, 2, + workmt*2, workn, 0, 0, workmt*2, workn, A->p, A->q ); + break; + + /* + * ChamMaxNorm + */ + case ChamMaxNorm: + default: + RUNTIME_options_ws_alloc( &options, 1, 0 ); + + CHAMELEON_Desc_Create( &Welt, NULL, ChamRealDouble, 1, 1, 1, + workmt, workn, 0, 0, workmt, workn, A->p, A->q ); + } + + /* Initialize workspaces */ + if ( (norm == ChamInfNorm) || + (norm == ChamOneNorm) ) + { + /* Initialize Wcol tile */ + for(m = 0; m < Wcol->mt; m++) { + for(n = 0; n < Wcol->nt; n++) { + INSERT_TASK_dlaset( + &options, + ChamUpperLower, Wcol->mb, Wcol->nb, + alpha, beta, + Wcol(m,n), Wcol->mb ); } } + } + for(m = 0; m < Welt->mt; m++) { + for(n = 0; n < Welt->nt; n++) { + INSERT_TASK_dlaset( + &options, + ChamUpperLower, Welt->mb, Welt->nb, + alpha, beta, + Welt(m,n), Welt->mb ); + } + } + + switch ( norm ) { + case ChamOneNorm: + case ChamInfNorm: + chameleon_pzlansy_inf( uplo, A, Wcol, Welt, &options ); + CHAMELEON_Desc_Flush( Wcol, sequence ); + break; + + case ChamFrobeniusNorm: + chameleon_pzlansy_frb( trans, uplo, A, Welt, &options ); + break; + + case ChamMaxNorm: + default: + chameleon_pzlansy_max( trans, uplo, A, Welt, &options ); + } - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { + /** + * Broadcast the result + */ + for(m = 0; m < A->p; m++) { + for(n = 0; n < A->q; n++) { + if ( (m != 0) && (n != 0) ) { INSERT_TASK_dlacpy( &options, ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - VECNORMS_STEP1(m, n), 1 ); + Welt(0,0), 1, Welt(m, n), 1); } } - - CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); } - *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q ); + CHAMELEON_Desc_Flush( Welt, sequence ); + RUNTIME_sequence_wait(chamctxt, sequence); + + *result = *(double *)Welt->get_blkaddr(Welt, A->myrank / A->q, A->myrank % A->q ); + + if ( Wcol != NULL ) { + CHAMELEON_Desc_Destroy( &Wcol ); + } + CHAMELEON_Desc_Destroy( &Welt ); - CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); RUNTIME_options_ws_free(&options); RUNTIME_options_finalize(&options, chamctxt); } diff --git a/compute/pzlantr.c b/compute/pzlantr.c deleted file mode 100644 index acf27965580c15df4fd4535249a8112e47e07fdb..0000000000000000000000000000000000000000 --- a/compute/pzlantr.c +++ /dev/null @@ -1,674 +0,0 @@ -/** - * - * @file pzlantr.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon zlantr parallel algorithm - * - * @version 1.0.0 - * @comment This file has been automatically generated - * from Plasma 2.6.0 for CHAMELEON 1.0.0 - * @author Mathieu Faverge - * @date 2010-11-15 - * @precisions normal z -> c d s - * - */ -#include <stdlib.h> -#include <math.h> -#include "control/common.h" - -#define A(m, n) A, m, n -#define W1(m, n) W1, m, n -#define W2(m, n) W2, m, n -#define RESULT(m, n) RESULT, m, n -/** - * - */ -void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, - CHAM_desc_t *A, double *result, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) -{ - CHAM_desc_t *W1 = NULL; - CHAM_desc_t *W2 = NULL; - CHAM_desc_t *RESULT = NULL; - CHAM_context_t *chamctxt; - RUNTIME_option_t options; - - int workm, workn; - int tempkm, tempkn; - int ldam, ldan; - int m, n, minMNT; - /* int part_p, part_q; */ - - minMNT = chameleon_min( A->mt, A->nt ); - - /* part_p = A->myrank / A->q; */ - /* part_q = A->myrank % A->q; */ - - chamctxt = chameleon_context_self(); - if (sequence->status != CHAMELEON_SUCCESS) - return; - RUNTIME_options_init(&options, chamctxt, sequence, request); - - *result = 0.0; - switch ( norm ) { - /* - * ChamOneNorm - */ - case ChamOneNorm: - /* Init workspace handle for the call to zlange but unused */ - RUNTIME_options_ws_alloc( &options, 1, 0 ); - - workm = chameleon_max( A->mt, A->p ); - workn = ( uplo == ChamLower ) ? chameleon_min( A->m, A->n ) : A->n; - - CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, A->nb, A->nb, - workm, workn, 0, 0, workm, workn, A->p, A->q); - - CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, 1, A->nb, A->nb, - 1, workn, 0, 0, 1, workn, A->p, A->q); - - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); - - /* - * ChamUpper - */ - if (uplo == ChamUpper) { - /* Zeroes intermediate vector */ - for(n = 0; n < W2->nt; n++) { - tempkn = n == W2->nt-1 ? W2->n-n*W2->nb : W2->nb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - W2(0, n), 1); - } - for(m = 0; m < minMNT; m++) { - /* Zeroes intermediate vectors */ - for(n = m; n < W1->nt; n++) { - tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - W1(m, n), 1); - } - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb; - ldam = BLKLDD(A, m); - /* compute sums of absolute values on columns of diag tile */ - INSERT_TASK_ztrasm( - &options, - ChamColumnwise, uplo, diag, tempkm, tempkn, - A(m, m), ldam, - W1(m, m)); - - /* compute sums of absolute values on columns of each tile */ - for(n = m+1; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, W1(m, n)); - } - - /* Compute vector sums between tiles in columns */ - for(n = m; n < W1->nt; n++) { - tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb; - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, 1, tempkn, W1->mb, - 1.0, W1(m, n), 1, - 1.0, W2(0, n), 1); - } - } - } - /* - * ChamLower - */ - else { - for(n = 0; n < minMNT; n++) { - tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb; - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - /* Zeroes intermediate vectors */ - for(m = n; m < A->mt; m++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - W1(m, n), 1); - } - /* Zeroes the second intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, tempkn, - 0., 0., - W2(0, n), 1); - - /* compute sums of absolute values on columns of diag tile */ - INSERT_TASK_ztrasm( - &options, - ChamColumnwise, uplo, diag, tempkm, tempkn, - A(n, n), ldan, - W1(n, n)); - - /* compute sums of absolute values on columns of each tile */ - for(m = n+1; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - INSERT_TASK_dzasum( - &options, - ChamColumnwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, W1(m, n)); - } - - /* Compute vector sums between tiles in columns */ - for(m = n; m < A->mt; m++) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, 1, tempkn, A->mb, - 1.0, W1(m, n), 1, - 1.0, W2(0, n), 1); - } - } - } - - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - for(n = 0; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_dlange( - &options, - ChamMaxNorm, 1, tempkn, A->nb, - W2(0, n), 1, - W1(0, n)); - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* Compute max norm between tiles in the row */ - if (A->myrank < A->q) { - for(n = 0; n < A->nt; n++) { - INSERT_TASK_dlange_max( - &options, - W1(0, n), - RESULT(0,0)); - } - } - - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - W1(m, n), 1 ); - } - } - CHAMELEON_Desc_Flush( W2, sequence ); - CHAMELEON_Desc_Flush( W1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(W1) ); - CHAMELEON_Desc_Destroy( &(W2) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - break; - /* - * ChamInfNorm - */ - case ChamInfNorm: - /* Init workspace handle for the call to zlange */ - RUNTIME_options_ws_alloc( &options, A->mb, 0 ); - - workm = ( uplo == ChamUpper ) ? chameleon_min( A->m, A->n ) : A->m; - workn = chameleon_max( A->nt, A->q ); - CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, workn, 0, 0, workm, workn, A->p, A->q); - - CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, A->mb, 1, A->mb, - workm, 1, 0, 0, workm, 1, A->p, A->q); - - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); - - /* - * ChamUpper - */ - if (uplo == ChamUpper) { - for(m = 0; m < minMNT; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb; - ldam = BLKLDD(A, m); - /* Zeroes intermediate vectors */ - for(n = m; n < A->nt; n++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - W1(m, n), 1); - } - /* Zeroes intermediate vector */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - W2(m, 0), 1); - - /* compute sums of absolute values on rows of diag tile */ - INSERT_TASK_ztrasm( - &options, - ChamRowwise, uplo, diag, tempkm, tempkn, - A(m, m), ldam, - W1(m, m)); - - /* compute sums of absolute values on rows of each tile */ - for(n = m+1; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, W1(m, n)); - } - - /* Compute vector sums between tiles in rows */ - for(n = m; n < A->nt; n++) { - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, W1(m, n), tempkm, - 1.0, W2(m, 0), tempkm); - } - - } - } - /* - * ChamLower - */ - else { - /* Zeroes intermediate vector */ - for(m = 0; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - W2(m, 0), 1); - } - for(n = 0; n < minMNT; n++) { - /* Zeroes intermediate vectors */ - for(m = n; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dlaset( - &options, - ChamUpperLower, tempkm, 1, - 0., 0., - W1(m, n), tempkm); - } - tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb; - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - /* compute sums of absolute values on rows of diag tile */ - INSERT_TASK_ztrasm( - &options, - ChamRowwise, uplo, diag, tempkm, tempkn, - A(n, n), ldan, - W1(n, n)); - - /* compute sums of absolute values on rows of each tile */ - for(m = n+1; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - INSERT_TASK_dzasum( - &options, - ChamRowwise, ChamUpperLower, tempkm, tempkn, - A(m, n), ldam, W1(m, n)); - } - - /* Compute vector sums between tiles in rows */ - for(m = n; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dgeadd( - &options, - ChamNoTrans, tempkm, 1, A->mb, - 1.0, W1(m, n), tempkm, - 1.0, W2(m, 0), tempkm); - } - } - } - - /* - * Compute max norm of each segment of the final vector in the - * previous workspace - */ - for(m = 0; m < W1->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - INSERT_TASK_dlange( - &options, - ChamMaxNorm, tempkm, 1, A->nb, - W2(m, 0), 1, - W1(m, 0)); - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* compute max norm between tiles in the column */ - if (A->myrank % A->q == 0) { - for(m = 0; m < W1->mt; m++) { - INSERT_TASK_dlange_max( - &options, - W1(m, 0), - RESULT(0,0)); - } - } - - /* Scatter norm over processus */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - W1(m, n), 1 ); - } - } - CHAMELEON_Desc_Flush( W2, sequence ); - CHAMELEON_Desc_Flush( W1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(W1) ); - CHAMELEON_Desc_Destroy( &(W2) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - break; - /* - * ChamFrobeniusNorm - */ - case ChamFrobeniusNorm: - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 2, 2, - workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2, - 1, 2, 0, 0, 1, 2, 1, 1); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - /* Compute local maximum to each tile */ - for(n = 0; n < minMNT; n++) { - tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb; - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - /* Zeroes my intermediate vectors */ - for(m = n; m < A->mt; m++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - W1(m,n), 1); - } - /* Compute local norm of the diagonal tile */ - INSERT_TASK_ztrssq( - &options, - uplo, diag, tempkm, tempkn, - A(n, n), ldan, - W1(n, n)); - /* Compute local norm to each tile */ - for(m = n+1; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - W1(m, n)); - } - } - } - /* - * ChamUpper - */ - else { - /* Compute local maximum to each tile */ - for(m = 0; m < minMNT; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb; - ldam = BLKLDD(A, m); - /* Zeroes my intermediate vectors */ - for(n = m; n < A->nt; n++) { - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - W1(m,n), 1); - } - /* Compute local norm of the diagonal tile */ - INSERT_TASK_ztrssq( - &options, - uplo, diag, tempkm, tempkn, - A(m, m), ldam, - W1(m, m)); - /* Compute local norm to each tile */ - for(n = m+1; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zgessq( - &options, - tempkm, tempkn, - A(m, n), ldam, - W1(m, n)); - } - } - } - - /* Initialize arrays */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 2, - 1., 0., - RESULT(0,0), 1); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - /* Compute accumulation of scl and ssq */ - for(n = 0; n < minMNT; n++) { - for(m = n; m < A->mt; m++) { - INSERT_TASK_dplssq( - &options, - W1(m, n), - RESULT(0,0)); - } - } - } - /* - * ChamUpper - */ - else { - /* Compute accumulation of scl and ssq */ - for(m = 0; m < minMNT; m++) { - for(n = m; n < A->nt; n++) { - INSERT_TASK_dplssq( - &options, - W1(m, n), - RESULT(0,0)); - } - } - } - - /* Compute scl * sqrt(ssq) */ - INSERT_TASK_dplssq2( - &options, - RESULT(0,0)); - - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - W1(m, n), 1 ); - } - } - - CHAMELEON_Desc_Flush( W1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(W1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - break; - - /* - * ChamMaxNorm - */ - case ChamMaxNorm: - default: - /* Init workspace handle for the call to zlange but unused */ - RUNTIME_options_ws_alloc( &options, 1, 0 ); - - workm = chameleon_max( A->mt, A->p ); - workn = chameleon_max( A->nt, A->q ); - - CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 1, 1, - workm, workn, 0, 0, workm, workn, A->p, A->q); - CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1, - 1, 1, 0, 0, 1, 1, 1, 1); - /* - * ChamLower - */ - if (uplo == ChamLower) { - /* Compute local maximum to each tile */ - for(n = 0; n < minMNT; n++) { - tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb; - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - ldan = BLKLDD(A, n); - - INSERT_TASK_zlantr( - &options, - ChamMaxNorm, uplo, diag, - tempkm, tempkn, A->nb, - A(n, n), ldan, - W1(n, n)); - - for(m = n+1; m < A->mt; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - ldam = BLKLDD(A, m); - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - W1(m, n)); - } - } - } - /* - * ChamUpper - */ - else { - /* Compute local maximum to each tile */ - for(m = 0; m < minMNT; m++) { - tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb; - tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb; - ldam = BLKLDD(A, m); - - INSERT_TASK_zlantr( - &options, - ChamMaxNorm, uplo, diag, - tempkm, tempkn, A->nb, - A(m, m), ldam, - W1(m, m)); - - for(n = m+1; n < A->nt; n++) { - tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb; - INSERT_TASK_zlange( - &options, - ChamMaxNorm, tempkm, tempkn, A->nb, - A(m, n), ldam, - W1(m, n)); - } - } - } - - /* Initialize RESULT array */ - INSERT_TASK_dlaset( - &options, - ChamUpperLower, 1, 1, - 0., 0., - RESULT(0,0), 1); - - /* - * ChamLower - */ - if (uplo == ChamLower) { - /* Compute max norm between tiles */ - for(n = 0; n < minMNT; n++) { - for(m = n; m < A->mt; m++) { - INSERT_TASK_dlange_max( - &options, - W1(m, n), - RESULT(0,0)); - } - } - } - /* - * ChamUpper - */ - else { - /* Compute max norm between tiles */ - for(m = 0; m < minMNT; m++) { - for(n = m; n < A->nt; n++) { - INSERT_TASK_dlange_max( - &options, - W1(m, n), - RESULT(0,0)); - } - } - } - - /* Copy max norm in tiles to dispatch on every nodes */ - for(m = 0; m < A->p; m++) { - for(n = 0; n < A->q; n++) { - INSERT_TASK_dlacpy( - &options, - ChamUpperLower, 1, 1, 1, - RESULT(0,0), 1, - W1(m, n), 1 ); - } - } - - CHAMELEON_Desc_Flush( W1, sequence ); - CHAMELEON_Desc_Flush( RESULT, sequence ); - RUNTIME_sequence_wait(chamctxt, sequence); - *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q ); - CHAMELEON_Desc_Destroy( &(W1) ); - CHAMELEON_Desc_Destroy( &(RESULT) ); - } - RUNTIME_options_ws_free(&options); - RUNTIME_options_finalize(&options, chamctxt); -} diff --git a/compute/zlange.c b/compute/zlange.c index 82968715d9ef59ab1e7e75b38443153ca1788b70..e807b403f7d8f544657d3524c16337107ef7062c 100644 --- a/compute/zlange.c +++ b/compute/zlange.c @@ -287,7 +287,7 @@ int CHAMELEON_zlange_Tile_Async( cham_normtype_t norm, CHAM_desc_t *A, double *v return CHAMELEON_SUCCESS; } - chameleon_pzlange( norm, A, value, sequence, request ); + chameleon_pzlange_generic( norm, ChamUpperLower, ChamNonUnit, A, value, sequence, request ); return CHAMELEON_SUCCESS; } diff --git a/compute/zlanhe.c b/compute/zlanhe.c index 5297f8eabb7cec89bf327809d580c7298f3a0a86..4eff40c2907f80ab7278519a532c5394c317f68e 100644 --- a/compute/zlanhe.c +++ b/compute/zlanhe.c @@ -295,7 +295,7 @@ int CHAMELEON_zlanhe_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de return CHAMELEON_SUCCESS; } - chameleon_pzlanhe( norm, uplo, A, value, sequence, request ); + chameleon_pzlansy_generic( norm, uplo, ChamConjTrans, A, value, sequence, request ); return CHAMELEON_SUCCESS; } diff --git a/compute/zlansy.c b/compute/zlansy.c index 995f21c80d0e1d4a13316f7a768a757cd80c72ec..53b4e088d417f345a089c35b3904cbc86e50dd65 100644 --- a/compute/zlansy.c +++ b/compute/zlansy.c @@ -295,7 +295,7 @@ int CHAMELEON_zlansy_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, CHAM_de return CHAMELEON_SUCCESS; } - chameleon_pzlansy( norm, uplo, A, value, sequence, request ); + chameleon_pzlansy_generic( norm, uplo, ChamTrans, A, value, sequence, request ); return CHAMELEON_SUCCESS; } diff --git a/compute/zlantr.c b/compute/zlantr.c index fb78aa2e8d1954ff79db4c326b6a246b2788d9b6..82a372e48dab76e541522f7b5607e487bb79f718 100644 --- a/compute/zlantr.c +++ b/compute/zlantr.c @@ -323,7 +323,7 @@ int CHAMELEON_zlantr_Tile_Async( cham_normtype_t norm, cham_uplo_t uplo, cham_di return CHAMELEON_SUCCESS; } - chameleon_pzlantr( norm, uplo, diag, A, value, sequence, request ); + chameleon_pzlange_generic( norm, uplo, diag, A, value, sequence, request ); return CHAMELEON_SUCCESS; } diff --git a/control/compute_z.h b/control/compute_z.h index 0f5c0c7f77bead5bf041f9157111d8b25edc56b5..e99d83e57f2deb08c4a9099b8a959cb4571ea7c9 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -82,10 +82,11 @@ void chameleon_pzher2k(cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64 void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *E, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlag2c(CHAM_desc_t *A, CHAM_desc_t *SB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzlange(cham_normtype_t norm, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzlanhe(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzlansy(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, + double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_trans_t trans, + CHAM_desc_t *A, double *result, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaset( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);