Mentions légales du service

Skip to content
Snippets Groups Projects
Commit d9b1c53b authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

zgemm: Add stationnary A gemm variant

zgemm-Astat: make sure the correct nodes are registering C for reduction
parent cb46581e
No related branches found
No related tags found
1 merge request!334SUMMA GEMM with A stationnary
......@@ -30,6 +30,142 @@
#define WA(m, n) WA, m, n
#define WB(m, n) WB, m, n
/**
* Parallel tile matrix-matrix multiplication.
* Generic algorithm for any data distribution.
*/
static inline void
chameleon_pzgemm_Astat( CHAM_context_t *chamctxt, cham_trans_t transA, cham_trans_t transB,
CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B,
CHAMELEON_Complex64_t beta, CHAM_desc_t *C,
RUNTIME_option_t *options )
{
RUNTIME_sequence_t *sequence = options->sequence;
CHAMELEON_Complex64_t zbeta;
CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
int m, n, k;
int tempmm, tempnn, tempkn, tempkm;
int myrank = RUNTIME_comm_rank( chamctxt );
/* Set C tiles to redux mode. */
for (n = 0; n < C->nt; n++) {
for (m = 0; m < C->mt; m++) {
/* The node owns the C tile. */
if ( C->get_rankof( C(m, n) ) == myrank ) {
reduceC[ n * C->nt + m ] = 1;
RUNTIME_zgersum_set_methods( C(m, n) );
continue;
}
/*
* The node owns the A tile that will define the locality of the
* computations.
*/
if ( transA == ChamNoTrans ) {
for (k = 0; k < A->nt; k++) {
if ( A->get_rankof( A(m, k) ) == myrank ) {
RUNTIME_zgersum_set_methods( C(m, n) );
break;
}
}
}
else {
for (k = 0; k < A->mt; k++) {
if ( A->get_rankof( A(k, m) ) == myrank ) {
RUNTIME_zgersum_set_methods( C(m, n) );
break;
}
}
}
}
}
for (n = 0; n < C->nt; n++) {
tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
for (m = 0; m < C->mt; m++) {
tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
/* Scale C */
INSERT_TASK_zlascal( options, ChamUpperLower, tempmm, tempnn, C->mb,
beta, C, m, n );
/*
* A: ChamNoTrans / B: ChamNoTrans
*/
if (transA == ChamNoTrans) {
if (transB == ChamNoTrans) {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
INSERT_TASK_zgemm_Astat(
options,
transA, transB,
tempmm, tempnn, tempkn, A->mb,
alpha, A(m, k), /* lda * Z */
B(k, n), /* ldb * Y */
zone, C(m, n)); /* ldc * Y */
}
}
/*
* A: ChamNoTrans / B: Cham[Conj]Trans
*/
else {
for (k = 0; k < A->nt; k++) {
tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
INSERT_TASK_zgemm_Astat(
options,
transA, transB,
tempmm, tempnn, tempkn, A->mb,
alpha, A(m, k), /* lda * Z */
B(n, k), /* ldb * Z */
zone, C(m, n)); /* ldc * Y */
}
}
}
/*
* A: Cham[Conj]Trans / B: ChamNoTrans
*/
else {
if (transB == ChamNoTrans) {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
INSERT_TASK_zgemm_Astat(
options,
transA, transB,
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), /* lda * X */
B(k, n), /* ldb * Y */
zone, C(m, n)); /* ldc * Y */
}
}
/*
* A: Cham[Conj]Trans / B: Cham[Conj]Trans
*/
else {
for (k = 0; k < A->mt; k++) {
tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
INSERT_TASK_zgemm_Astat(
options,
transA, transB,
tempmm, tempnn, tempkm, A->mb,
alpha, A(k, m), /* lda * X */
B(n, k), /* ldb * Z */
zone, C(m, n)); /* ldc * Y */
}
}
}
RUNTIME_zgersum_submit_tree( options, C(m, n) );
RUNTIME_data_flush( sequence, C(m, n) );
}
}
(void)chamctxt;
}
/**
* Parallel tile matrix-matrix multiplication
* SUMMA algorithm for 2D block-cyclic data distribution.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment