From d9f3abee5eec7b3257d6bd6ba80e71247401562f Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 7 Jan 2020 13:45:05 +0100 Subject: [PATCH] Fix load balancing of the D matrix in QR/LQ algorithms --- compute/zgelqf.c | 4 ++-- compute/zgelqf_param.c | 4 ++-- compute/zgelqs.c | 4 ++-- compute/zgelqs_param.c | 4 ++-- compute/zgels.c | 8 ++++---- compute/zgels_param.c | 8 ++++---- compute/zgeqrf.c | 4 ++-- compute/zgeqrf_param.c | 4 ++-- compute/zgeqrs.c | 4 ++-- compute/zgeqrs_param.c | 4 ++-- compute/zgesv_incpiv.c | 4 ++-- compute/zgetrf_incpiv.c | 4 ++-- compute/ztpgqrt.c | 2 +- compute/zunglq.c | 4 ++-- compute/zunglq_param.c | 4 ++-- compute/zungqr.c | 4 ++-- compute/zungqr_param.c | 4 ++-- compute/zunmlq.c | 4 ++-- compute/zunmlq_param.c | 4 ++-- compute/zunmqr.c | 4 ++-- compute/zunmqr_param.c | 4 ++-- control/compute_z.h | 23 +++++++++++++++++++++++ 22 files changed, 68 insertions(+), 45 deletions(-) diff --git a/compute/zgelqf.c b/compute/zgelqf.c index 0b2ee5b69..4d6633b0a 100644 --- a/compute/zgelqf.c +++ b/compute/zgelqf.c @@ -278,8 +278,8 @@ int CHAMELEON_zgelqf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, */ #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c index 6d8c125a5..742617826 100644 --- a/compute/zgelqf_param.c +++ b/compute/zgelqf_param.c @@ -282,8 +282,8 @@ int CHAMELEON_zgelqf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t */ #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgelqs.c b/compute/zgelqs.c index 703650460..483d3ef27 100644 --- a/compute/zgelqs.c +++ b/compute/zgelqs.c @@ -318,8 +318,8 @@ int CHAMELEON_zgelqs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c index eedb2dacd..26839a939 100644 --- a/compute/zgelqs_param.c +++ b/compute/zgelqs_param.c @@ -330,8 +330,8 @@ int CHAMELEON_zgelqs_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgels.c b/compute/zgels.c index b132610a8..c18c4c046 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -364,8 +364,8 @@ int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A, if (A->m >= A->n) { #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif @@ -432,8 +432,8 @@ int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A, else { #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgels_param.c b/compute/zgels_param.c index 01999735f..26d777cc6 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -377,8 +377,8 @@ int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t if (A->m >= A->n) { #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif @@ -431,8 +431,8 @@ int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t else { #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c index 98675a467..f9557eb99 100644 --- a/compute/zgeqrf.c +++ b/compute/zgeqrf.c @@ -277,8 +277,8 @@ int CHAMELEON_zgeqrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, */ #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c index c406b9207..75e25f895 100644 --- a/compute/zgeqrf_param.c +++ b/compute/zgeqrf_param.c @@ -302,8 +302,8 @@ int CHAMELEON_zgeqrf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c index a6b41d0b1..0857f9e08 100644 --- a/compute/zgeqrs.c +++ b/compute/zgeqrs.c @@ -307,8 +307,8 @@ int CHAMELEON_zgeqrs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, */ #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c index 84aa1af68..eb914016d 100644 --- a/compute/zgeqrs_param.c +++ b/compute/zgeqrs_param.c @@ -311,8 +311,8 @@ int CHAMELEON_zgeqrs_param_Tile_Async( const libhqr_tree_t *qrtree, */ #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c index 89cc15107..3842b95c8 100644 --- a/compute/zgesv_incpiv.c +++ b/compute/zgesv_incpiv.c @@ -311,8 +311,8 @@ int CHAMELEON_zgesv_incpiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->mt, A->nt) * A->nb; - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c index f1f2e795a..6cc7614fd 100644 --- a/compute/zgetrf_incpiv.c +++ b/compute/zgetrf_incpiv.c @@ -283,8 +283,8 @@ int CHAMELEON_zgetrf_incpiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *L, int *IPI #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->mt, A->nt) * A->nb; - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ) * A->nb; + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c index 98db159af..d826d8111 100644 --- a/compute/ztpgqrt.c +++ b/compute/ztpgqrt.c @@ -404,7 +404,7 @@ int CHAMELEON_ztpgqrt_Tile_Async( int L, #if defined(CHAMELEON_COPY_DIAG) { - chameleon_zdesc_alloc(D, V1->mb, V1->nb, V1->m, KT*V1->nb, 0, 0, V1->m, KT*V1->nb, ); + chameleon_zdesc_copy_and_restrict( V1, &D, V1->m, KT*V1->nb ); Dptr = &D; } #endif diff --git a/compute/zunglq.c b/compute/zunglq.c index 3d3ce3062..085fcd29b 100644 --- a/compute/zunglq.c +++ b/compute/zunglq.c @@ -300,8 +300,8 @@ int CHAMELEON_zunglq_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c index aec224f96..4a7a908cb 100644 --- a/compute/zunglq_param.c +++ b/compute/zunglq_param.c @@ -303,8 +303,8 @@ int CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t */ #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zungqr.c b/compute/zungqr.c index 69f8be30e..0f4558e7a 100644 --- a/compute/zungqr.c +++ b/compute/zungqr.c @@ -297,8 +297,8 @@ int CHAMELEON_zungqr_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q, */ #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c index 36bacc089..8c0eb0a68 100644 --- a/compute/zungqr_param.c +++ b/compute/zungqr_param.c @@ -310,8 +310,8 @@ int CHAMELEON_zungqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zunmlq.c b/compute/zunmlq.c index 07c635a67..de73e20a3 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -359,8 +359,8 @@ int CHAMELEON_zunmlq_Tile_Async( cham_side_t side, cham_trans_t trans, */ #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index df0fba8ba..0c85cc62d 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -365,8 +365,8 @@ int CHAMELEON_zunmlq_param_Tile_Async( const libhqr_tree_t *qrtree, cham_side_t #if defined(CHAMELEON_COPY_DIAG) { - int m = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, ); + int m = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, m, A->n ); Dptr = &D; } #endif diff --git a/compute/zunmqr.c b/compute/zunmqr.c index 09387b79b..979823987 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -362,8 +362,8 @@ int CHAMELEON_zunmqr_Tile_Async( cham_side_t side, cham_trans_t trans, #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index e7b31e742..810c2ea2f 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -372,8 +372,8 @@ int CHAMELEON_zunmqr_param_Tile_Async( const libhqr_tree_t *qrtree, #if defined(CHAMELEON_COPY_DIAG) { - int n = chameleon_min(A->m, A->n); - chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, ); + int n = chameleon_min( A->m, A->n ); + chameleon_zdesc_copy_and_restrict( A, &D, A->m, n ); Dptr = &D; } #endif diff --git a/control/compute_z.h b/control/compute_z.h index 196fde204..ad911e8f6 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -154,6 +154,29 @@ chameleon_zdesc_alloc_diag( CHAM_desc_t *descA, int nb, int m, int n, int p, int } \ } +/** + * @brief Create a copy of a descriptor restricted to a smaller size. + * @param[in] descIn The input descriptor from which the structure should be copied. + * @param[out] descOut The output descriptor that is a copy of the input one with allocation on the fly. + * @param[in] m The number of rows of the output descriptor. + * @param[in] n The number of columns of the output descriptor. + * @return CHAMELEON_SUCCESS on success, the associated error on failure. + */ +static inline int +chameleon_zdesc_copy_and_restrict( CHAM_desc_t *descIn, + CHAM_desc_t *descOut, + int m, int n ) +{ + int rc; + rc = chameleon_desc_init( descOut, CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, descIn->mb, descIn->nb, descIn->mb * descIn->nb, + m, n, 0, 0, m, n, descIn->p, descIn->q, + descIn->get_blkaddr, + descIn->get_blkldd, + descIn->get_rankof ); + return rc; +} + /** * @brief Internal function to convert the lapack format to tile format in * LAPACK interface calls -- GitLab