Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 4480d9a7 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

plrnk: Add a generic algorithm for non 2dbc data distributions

parent 8c6e13fd
No related branches found
No related tags found
1 merge request!503Make sure testings are succeeding when SBC is enabled.
...@@ -19,93 +19,211 @@ ...@@ -19,93 +19,211 @@
*/ */
#include "control/common.h" #include "control/common.h"
#define C(m, n) C, m, n #define WA(m, n) WA, m, n
#define WA(m, n) &WA, m, n #define WB(m, n) WB, m, n
#define WB(m, n) &WB, m, n #define C(m, n) C, m, n
/** /**
* chameleon_pzplrnk - Generate a random rank-k matrix by tiles. * chameleon_pzplrnk - Generate a random rank-k matrix by tiles.
*/ */
void chameleon_pzplrnk( int K, CHAM_desc_t *C, static inline void
unsigned long long int seedA, chameleon_pzplrnk_generic( CHAM_context_t *chamctxt,
unsigned long long int seedB, int K,
RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) CHAM_desc_t *WA,
CHAM_desc_t *WB,
CHAM_desc_t *C,
unsigned long long int seedA,
unsigned long long int seedB,
RUNTIME_option_t *options )
{ {
CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = options->sequence;
RUNTIME_option_t options;
int m, n, k, KT;
int tempmm, tempnn, tempkk;
int myp, myq;
CHAMELEON_Complex64_t zbeta; CHAMELEON_Complex64_t zbeta;
CHAM_desc_t WA, WB; int m, n, k, KT;
int tempmm, tempnn, tempkk;
int myrank = RUNTIME_comm_rank( chamctxt );
int initA;
int *initB = malloc( C->nt * sizeof(int) );
chamctxt = chameleon_context_self(); KT = (K + C->mb - 1) / C->mb;
if (sequence->status != CHAMELEON_SUCCESS) {
return; for (k = 0; k < KT; k++) {
tempkk = k == KT-1 ? K - k * WA->nb : WA->nb;
zbeta = k == 0 ? 0. : 1.;
memset( initB, 0, C->nt * sizeof(int) );
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
initA = 0;
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
if ( C->get_rankof( C(m, n) ) == myrank ) {
if ( !initA ) {
INSERT_TASK_zplrnt(
options,
tempmm, tempkk, WA(m, myrank),
WA->m, m * WA->mb, k * WA->nb, seedA );
initA = 1;
}
if ( !initB[n] ) {
INSERT_TASK_zplrnt(
options,
tempkk, tempnn, WB(myrank, n),
WB->m, k * WB->mb, n * WB->nb, seedB );
initB[n] = 1;
}
INSERT_TASK_zgemm(
options,
ChamNoTrans, ChamNoTrans,
tempmm, tempnn, tempkk, C->mb,
1., WA(m, myrank),
WB(myrank, n),
zbeta, C(m, n));
}
}
if ( initA ) {
RUNTIME_data_flush( sequence, WA(m, myrank) );
}
}
for (n = 0; n < C->nt; n++) {
if ( initB[n] ) {
RUNTIME_data_flush( sequence, WB(myrank, n) );
}
}
} }
RUNTIME_options_init( &options, chamctxt, sequence, request ); }
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE, /**
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), * chameleon_pzplrnk - Generate a random rank-k matrix by tiles on a 2dbc grid.
C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1), 0, 0, */
C->mt * C->mb, C->nb * chameleon_desc_datadist_get_iparam(C, 1), static inline void
chameleon_desc_datadist_get_iparam(C, 0), chameleon_pzplrnk_2dbc( CHAM_context_t *chamctxt,
chameleon_desc_datadist_get_iparam(C, 1), int K,
NULL, NULL, NULL, NULL ); CHAM_desc_t *WA,
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE, CHAM_desc_t *WB,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb), CHAM_desc_t *C,
C->mb * chameleon_desc_datadist_get_iparam(C, 0), C->nt * C->nb, 0, 0, unsigned long long int seedA,
C->mb * chameleon_desc_datadist_get_iparam(C, 0), C->nt * C->nb, unsigned long long int seedB,
chameleon_desc_datadist_get_iparam(C, 0), RUNTIME_option_t *options )
chameleon_desc_datadist_get_iparam(C, 1), {
NULL, NULL, NULL, NULL ); RUNTIME_sequence_t *sequence = options->sequence;
CHAMELEON_Complex64_t zbeta;
int m, n, k, KT;
int tempmm, tempnn, tempkk;
int p, q, myp, myq;
KT = (K + C->mb - 1) / C->mb; KT = (K + C->mb - 1) / C->mb;
myp = C->myrank / chameleon_desc_datadist_get_iparam(C, 1); p = chameleon_desc_datadist_get_iparam( C, 0 );
myq = C->myrank % chameleon_desc_datadist_get_iparam(C, 1); q = chameleon_desc_datadist_get_iparam( C, 1 );
myp = C->myrank / q;
myq = C->myrank % q;
for (k = 0; k < KT; k++) { for (k = 0; k < KT; k++) {
tempkk = k == KT-1 ? K - k * WA.nb : WA.nb; tempkk = k == KT-1 ? K - k * WA->nb : WA->nb;
zbeta = k == 0 ? 0. : 1.; zbeta = k == 0 ? 0. : 1.;
for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { for (n = myq; n < C->nt; n += q) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, options,
tempkk, tempnn, WB(myp, n), tempkk, tempnn, WB(myp, n),
WB.m, k * WB.mb, n * WB.nb, seedB ); WB->m, k * WB->mb, n * WB->nb, seedB );
} }
for (m = myp; m < C->mt; m+=chameleon_desc_datadist_get_iparam(C, 0)) { for (m = myp; m < C->mt; m += p) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb; tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
INSERT_TASK_zplrnt( INSERT_TASK_zplrnt(
&options, options,
tempmm, tempkk, WA(m, myq), tempmm, tempkk, WA(m, myq),
WA.m, m * WA.mb, k * WA.nb, seedA ); WA->m, m * WA->mb, k * WA->nb, seedA );
for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { for (n = myq; n < C->nt; n+=q) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb; tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
INSERT_TASK_zgemm( INSERT_TASK_zgemm(
&options, options,
ChamNoTrans, ChamNoTrans, ChamNoTrans, ChamNoTrans,
tempmm, tempnn, tempkk, C->mb, tempmm, tempnn, tempkk, C->mb,
1., WA(m, myq), 1., WA(m, myq),
WB(myp, n), WB(myp, n),
zbeta, C(m, n)); zbeta, C(m, n));
} }
RUNTIME_data_flush( sequence, WA(m, 0) ); RUNTIME_data_flush( sequence, WA(m, myq) );
} }
for (n = myq; n < C->nt; n+=chameleon_desc_datadist_get_iparam(C, 1)) { for (n = myq; n < C->nt; n+=q) {
RUNTIME_data_flush( sequence, WB(0, n) ); RUNTIME_data_flush( sequence, WB(myp, n) );
} }
} }
}
/**
* Rank-k matrix generator.
*/
void
chameleon_pzplrnk( int K,
CHAM_desc_t *C,
unsigned long long int seedA,
unsigned long long int seedB,
RUNTIME_sequence_t *sequence,
RUNTIME_request_t *request )
{
CHAM_context_t *chamctxt;
RUNTIME_option_t options;
CHAM_desc_t WA, WB;
int p, q;
chamctxt = chameleon_context_self();
if (sequence->status != CHAMELEON_SUCCESS) {
return;
}
RUNTIME_options_init( &options, chamctxt, sequence, request );
p = chameleon_desc_datadist_get_iparam( C, 0 );
q = chameleon_desc_datadist_get_iparam( C, 1 );
if ( ( chamctxt->generic_enabled != CHAMELEON_TRUE ) &&
( C->get_rankof_init == chameleon_getrankof_2d ) &&
( (chameleon_desc_datadist_get_iparam(C, 0) != 1) ||
(chameleon_desc_datadist_get_iparam(C, 1) != 1) ) )
{
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mt * C->mb, C->nb * q, 0, 0,
C->mt * C->mb, C->nb * q, p, q,
NULL, NULL, NULL, NULL );
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * p, C->nt * C->nb, 0, 0,
C->mb * p, C->nt * C->nb, p, q,
NULL, NULL, NULL, NULL );
chameleon_pzplrnk_2dbc( chamctxt, K, &WA, &WB, C, seedA, seedB, &options );
}
else {
int np = p * q;
chameleon_desc_init( &WA, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mt * C->mb, C->nb * np, 0, 0,
C->mt * C->mb, C->nb * np, 1, np,
NULL, NULL, NULL, NULL );
chameleon_desc_init( &WB, CHAMELEON_MAT_ALLOC_TILE,
ChamComplexDouble, C->mb, C->nb, (C->mb * C->nb),
C->mb * np, C->nt * C->nb, 0, 0,
C->mb * np, C->nt * C->nb, np, 1,
NULL, NULL, NULL, NULL );
chameleon_pzplrnk_generic( chamctxt, K, &WA, &WB, C, seedA, seedB, &options );
}
RUNTIME_desc_flush( &WA, sequence ); RUNTIME_desc_flush( &WA, sequence );
RUNTIME_desc_flush( &WB, sequence ); RUNTIME_desc_flush( &WB, sequence );
RUNTIME_desc_flush( C, sequence ); RUNTIME_desc_flush( C, sequence );
chameleon_sequence_wait( chamctxt, sequence ); chameleon_sequence_wait( chamctxt, sequence );
chameleon_desc_destroy( &WA ); chameleon_desc_destroy( &WA );
chameleon_desc_destroy( &WB ); chameleon_desc_destroy( &WB );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment