Mentions légales du service

Skip to content
Snippets Groups Projects

5 - Dist/SPMM

Merged Tony Delarue requested to merge tdelarue/spm:dist/spmm into master
1 file
+ 107
13
Compare changes
  • Side-by-side
  • Inline
+ 107
13
@@ -40,12 +40,13 @@ __fct_conj( spm_complex64_t val ) {
@@ -40,12 +40,13 @@ __fct_conj( spm_complex64_t val ) {
struct __spm_zmatvec_s {
struct __spm_zmatvec_s {
int follow_x;
int follow_x;
spm_int_t baseval, n, nnz;
spm_int_t baseval, n, gN, nnz;
spm_complex64_t alpha;
spm_complex64_t alpha;
const spm_int_t *rowptr;
const spm_int_t *rowptr;
const spm_int_t *colptr;
const spm_int_t *colptr;
const spm_complex64_t *values;
const spm_complex64_t *values;
 
const spm_int_t *loc2glob;
const spm_complex64_t *x;
const spm_complex64_t *x;
spm_int_t incx;
spm_int_t incx;
@@ -93,7+94,7 @@
@@ -93,7+94,7 @@
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
static inline int
static inline int
__spm_zmatvec_sy_csc( const __spm_zmatvec_t *args )
__spm_zmatvec_sy_csc( const __spm_zmatvec_t *args )
{
{
spm_int_t baseval = args->baseval;
spm_int_t baseval = args->baseval;
@@ -102,7+103,7 @@
@@ -102,7+103,7 @@
const spm_int_t *rowptr = args->rowptr;
const spm_int_t *rowptr = args->rowptr;
const spm_int_t *colptr = args->colptr;
const spm_int_t *colptr = args->colptr;
const spm_complex64_t *values = args->values;
const spm_complex64_t *values = args->values;
 
const spm_int_t *loc2glob = args->loc2glob;
const spm_complex64_t *x = args->x;
const spm_complex64_t *x = args->x;
spm_int_t incx = args->incx;
spm_int_t incx = args->incx;
spm_complex64_t *y = args->y;
spm_complex64_t *y = args->y;
spm_int_t incy = args->incy;
spm_int_t incy = args->incy;
const __conj_fct_t conjA_fct = args->conjA_fct;
const __conj_fct_t conjA_fct = args->conjA_fct;
const __conj_fct_t conjAt_fct = args->conjAt_fct;
const __conj_fct_t conjAt_fct = args->conjAt_fct;
spm_int_t col, row, i;
spm_int_t col, gcol, row, i;
 
for( col=0; col<n; col++, colptr++ )
for( col=0; col<n; col++, colptr++ )
{
{
 
gcol = (loc2glob == NULL) ? col : loc2glob[col];
for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ )
for( i=colptr[0]; i<colptr[1]; i++, rowptr++, values++ )
{
{
row = *rowptr - baseval;
row = *rowptr - baseval;
if ( row != gcol ) {
if ( row != col ) {
y[ row * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ];
y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ];
y[ gcol * incy ] += alpha * conjAt_fct( *values ) * x[ row * incx ];
y[ col * incy ] += alpha * conjAt_fct( *values ) * x[ row * incx ];
}
}
else {
else {
y[ col * incy ] += alpha * conjA_fct( *values ) * x[ row * incx ];
y[ gcol * incy ] += alpha * conjA_fct( *values ) * x[ gcol * incx ];
}
}
}
}
}
}
@@ -167,7+170,7 @@
@@ -167,7+170,7 @@
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
static inline int
static inline int
__spm_zmatvec_sy_ijv( const __spm_zmatvec_t *args )
__spm_zmatvec_sy_ijv( const __spm_zmatvec_t *args )
{
{
spm_int_t baseval = args->baseval;
spm_int_t baseval = args->baseval;
@@ -200,4+203,4 @@
@@ -200,4+203,4 @@
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
static inline int
static inline int
__spm_zmatvec_ge_ijv( const __spm_zmatvec_t *args )
__spm_zmatvec_ge_ijv( const __spm_zmatvec_t *args )
{
{
spm_int_t baseval = args->baseval;
spm_int_t baseval = args->baseval;
 
spm_int_t n = args->n;
 
spm_int_t gN = args->gN;
spm_int_t nnz = args->nnz;
spm_int_t nnz = args->nnz;
spm_complex64_t alpha = args->alpha;
spm_complex64_t alpha = args->alpha;
const spm_int_t *rowptr = args->rowptr;
const spm_int_t *rowptr = args->rowptr;
const spm_int_t *colptr = args->colptr;
const spm_int_t *colptr = args->colptr;
const spm_complex64_t *values = args->values;
const spm_complex64_t *values = args->values;
 
const spm_int_t *loc2glob = args->loc2glob;
const spm_complex64_t *x = args->x;
const spm_complex64_t *x = args->x;
spm_int_t incx = args->incx;
spm_int_t incx = args->incx;
spm_complex64_t *y = args->y;
spm_complex64_t *y = args->y;
spm_int_t incy = args->incy;
spm_int_t incy = args->incy;
const __conj_fct_t conjA_fct = args->conjA_fct;
const __conj_fct_t conjA_fct = args->conjA_fct;
spm_int_t col, row, i;
spm_int_t col, row, i;
 
spm_int_t *glob2loc = NULL;
 
 
if( loc2glob != NULL ) {
 
glob2loc = malloc( gN * sizeof(spm_int_t) );
 
memset(glob2loc, 0xff, gN * sizeof(spm_int_t) );
 
for (i=0; i<n; i++, loc2glob++)
 
{
 
glob2loc[ *loc2glob ] = i;
 
}
 
}
for( i=0; i<nnz; i++, colptr++, rowptr++, values++ )
for( i=0; i<nnz; i++, colptr++, rowptr++, values++ )
{
{
row = *rowptr - baseval;
row = *rowptr - baseval;
col = *colptr - baseval;
col = (glob2loc == NULL) ? *colptr - baseval : glob2loc[ *colptr - baseval ];
y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ];
y[ row * incy ] += alpha * conjA_fct( *values ) * x[ col * incx ];
}
}
 
 
if(glob2loc != NULL) {
 
free(glob2loc);
 
}
 
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
@@ -275,11 +296,13 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
@@ -275,11 +296,13 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
args->follow_x = 0;
args->follow_x = 0;
args->baseval = spmFindBase( A );
args->baseval = spmFindBase( A );
args->n = A->n;
args->n = A->n;
 
args->gN = A->gN;
args->nnz = A->nnz;
args->nnz = A->nnz;
args->alpha = alpha;
args->alpha = alpha;
args->rowptr = A->rowptr;
args->rowptr = A->rowptr;
args->colptr = A->colptr;
args->colptr = A->colptr;
args->values = A->values;
args->values = A->values;
 
args->loc2glob = A->loc2glob;
args->x = B;
args->x = B;
args->incx = incx;
args->incx = incx;
args->y = C;
args->y = C;
@@ -325,6 +348,8 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
@@ -325,6 +348,8 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
break;
break;
case SpmCSR:
case SpmCSR:
{
{
 
/* Mat-Mat product is not handled in distributed for now */
 
assert( args->loc2glob == NULL );
/* Switch pointers and side to get the correct behaviour */
/* Switch pointers and side to get the correct behaviour */
if ( ((side == SpmLeft) && (transA != SpmNoTrans)) ||
if ( ((side == SpmLeft) && (transA != SpmNoTrans)) ||
((side == SpmRight) && (transA == SpmNoTrans)) )
((side == SpmRight) && (transA == SpmNoTrans)) )
@@ -360,6 +385,39 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
@@ -360,6 +385,39 @@ __spm_zmatvec_args_init( __spm_zmatvec_t *args,
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
 
static inline void
 
z_spmCopyC( const spmatrix_t *spm,
 
const spm_complex64_t *xloc,
 
spm_complex64_t *xglob )
 
{
 
spm_int_t i;
 
spm_int_t *loc2glob;
 
if(spm->loc2glob == NULL) {
 
return;
 
}
 
 
loc2glob = spm->loc2glob;
 
memset( xglob, 0, spm->gN * sizeof(spm_complex64_t) );
 
for( i=0; i<spm->n; i++, loc2glob++ ) {
 
xglob[ (*loc2glob) ] = xloc[i];
 
}
 
}
 
 
static inline void
 
z_spmCopyB( const spmatrix_t *spm,
 
const spm_complex64_t *xloc,
 
spm_complex64_t *xglob)
 
{
 
if( (spm->mtxtype == SpmGeneral) || (spm->loc2glob == NULL) ) {
 
return;
 
}
 
z_spmCopyC( spm, xloc, xglob );
 
#if defined(SPM_WITH_MPI)
 
MPI_Allreduce( MPI_IN_PLACE, xglob, spm->gN, SPM_MPI_COMPLEX64, MPI_SUM, spm->comm );
 
#endif
 
}
 
 
/**
/**
*******************************************************************************
*******************************************************************************
*
*
@@ -459,6 +517,7 @@ spm_zspmm( spm_side_t side,
@@ -459,6 +517,7 @@ spm_zspmm( spm_side_t side,
int rc = SPM_SUCCESS;
int rc = SPM_SUCCESS;
spm_int_t M, N, ldx, ldy, r;
spm_int_t M, N, ldx, ldy, r;
__spm_zmatvec_t args;
__spm_zmatvec_t args;
 
spm_complex64_t *Ctmp, *Btmp;
if ( transB != SpmNoTrans ) {
if ( transB != SpmNoTrans ) {
fprintf(stderr, "transB != SpmNoTrans not supported yet in spmv computations\n");
fprintf(stderr, "transB != SpmNoTrans not supported yet in spmv computations\n");
@@ -492,13 +551,30 @@ spm_zspmm( spm_side_t side,
@@ -492,13 +551,30 @@ spm_zspmm( spm_side_t side,
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
 
ldc = (A->loc2glob != NULL) ? A->gN : ldc;
 
Ctmp = (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL;
 
ldb = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : ldb;
 
Btmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : NULL;
 
__spm_zmatvec_args_init( &args, side, transA,
__spm_zmatvec_args_init( &args, side, transA,
alpha, A, B, ldb, C, ldc );
alpha, A, Btmp, ldb, Ctmp, ldc );
for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) {
for( r=0; (r < N) && (rc == SPM_SUCCESS); r++ ) {
args.x = B + r * ldx;
z_spmCopyB( A, B + r * ldx, Btmp );
args.y = C + r * ldy;
z_spmCopyC( A, C + r * ldy, Ctmp );
 
 
args.x = (Btmp == NULL) ? B + r * ldx : Btmp;
 
args.y = (Ctmp == NULL) ? C + r * ldy : Ctmp;
rc = args.loop_fct( &args );
rc = args.loop_fct( &args );
 
 
z_spmReduce( A, Ctmp, C + r * ldy );
 
}
 
 
if(A->loc2glob != NULL) {
 
free(Ctmp);
 
if(A->mtxtype != SpmGeneral){
 
free(Btmp);
 
}
}
}
return rc;
return rc;
@@ -559,6 +635,7 @@ spm_zspmv( spm_trans_t trans,
@@ -559,6 +635,7 @@ spm_zspmv( spm_trans_t trans,
{
{
int rc = SPM_SUCCESS;
int rc = SPM_SUCCESS;
__spm_zmatvec_t args;
__spm_zmatvec_t args;
 
spm_complex64_t *ytmp, *xtmp;
if ( beta == 0. ) {
if ( beta == 0. ) {
memset( y, 0, A->n * sizeof(spm_complex64_t) );
memset( y, 0, A->n * sizeof(spm_complex64_t) );
@@ -571,10 +648,27 @@ spm_zspmv( spm_trans_t trans,
@@ -571,10 +648,27 @@ spm_zspmv( spm_trans_t trans,
return SPM_SUCCESS;
return SPM_SUCCESS;
}
}
__spm_zmatvec_args_init( &args, SpmLeft, trans,
ytmp = (A->loc2glob != NULL) ? malloc(A->gN * sizeof(spm_complex64_t)) : y;
alpha, A, x, incx, y, incy );
incy = (A->loc2glob != NULL) ? A->gN : incy;
 
xtmp = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? malloc(A->gN * sizeof(spm_complex64_t)) : (spm_complex64_t *)x;
 
incx = ((A->loc2glob != NULL) && (A->mtxtype != SpmGeneral)) ? A->gN : incx;
 
z_spmCopyB( A, x, xtmp );
 
z_spmCopyC( A, y, ytmp );
 
 
__spm_zmatvec_args_init( &args, SpmLeft, trans,
 
alpha, A, xtmp, incx, ytmp, incy );
rc = args.loop_fct( &args );
rc = args.loop_fct( &args );
 
z_spmReduce( A, ytmp, y );
 
 
if(A->loc2glob != NULL) {
 
free(ytmp);
 
if(A->mtxtype != SpmGeneral){
 
free(xtmp);
 
}
 
}
 
 
return rc;
return rc;
}
}
Loading