From 0aa032c58e7f52d470092021a1c82b610936a591 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 28 Sep 2022 22:20:37 +0200 Subject: [PATCH] compute/mixed-precision: Fix algorithm for task insertion --- compute/pzlag2c.c | 64 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/compute/pzlag2c.c b/compute/pzlag2c.c index bc28fcf11..f6d01a246 100644 --- a/compute/pzlag2c.c +++ b/compute/pzlag2c.c @@ -24,23 +24,54 @@ */ #include "control/common.h" -#define A(m,n) A, m, n -#define B(m,n) B, m, n -#define SA(m,n) SA, m, n -#define SB(m,n) SB, m, n +#define A( _m_, _n_ ) A, (_m_), (_n_) +#define B( _m_, _n_ ) B, (_m_), (_n_) + /** * */ +void chameleon_pclag2z( CHAM_desc_t *A, CHAM_desc_t *B, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + + int tempmm, tempnn; + int m, n; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + RUNTIME_options_init(&options, chamctxt, sequence, request); + + for(m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + + for(n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + + INSERT_TASK_clag2z( + &options, + tempmm, tempnn, A->mb, + A(m, n), + B(m, n)); + } + } + + RUNTIME_options_finalize(&options, chamctxt); +} + /** * */ -void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request) +void chameleon_pzlag2c( CHAM_desc_t *A, CHAM_desc_t *B, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_option_t options; - int X, Y; + int tempmm, tempnn; int m, n; chamctxt = chameleon_context_self(); @@ -49,16 +80,19 @@ void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B, } RUNTIME_options_init(&options, chamctxt, sequence, request); - for(m = 0; m < SA->mt; m++) { - X = m == SA->mt-1 ? SA->m-m*SA->mb : SA->mb; - for(n = 0; n < SA->nt; n++) { - Y = n == SA->nt-1 ? SA->n-n*SA->nb : SA->nb; - INSERT_TASK_clag2z( + for(m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb; + + for(n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb; + + INSERT_TASK_zlag2c( &options, - X, Y, SA->mb, - SA(m, n), + tempmm, tempnn, A->mb, + A(m, n), B(m, n)); } } + RUNTIME_options_finalize(&options, chamctxt); } -- GitLab