diff --git a/include/main.h b/include/main.h index dfc04f7991a7c2f9241c10dca576c3648f53d43b..9669b8023c2958f103d10557d2955571a2641bcb 100644 --- a/include/main.h +++ b/include/main.h @@ -1,6 +1,4 @@ -#ifndef __MAIN_H_INCLUDE -#define __MAIN_H_INCLUDE 1 - +#pragma once #include "config.h" /* ================================================================================= */ @@ -40,62 +38,34 @@ enum numModel { Default is _bem. Can be set with --bem, --fem, --fembem. */ -_EXTERN_TEST_FEMBEM_ enum numModel testedModel -#ifdef ___TEST_FEMBEM___ -=_bem -#endif - ; +extern enum numModel testedModel; /*! \brief Number of right hand sides for the test */ -_EXTERN_TEST_FEMBEM_ int nbRHS -#ifdef ___TEST_FEMBEM___ -= 1 -#endif - ; - +extern int nbRHS; + /*! \brief Sparsity of the right hand sides (1 non-zero value every 'n' values). Default is nbpts/80.log10(nbpts). The objective of using 'sparseRHS' is to speed-up the computation in produitClassique() in classic.c. */ -_EXTERN_TEST_FEMBEM_ int sparseRHS -#ifdef ___TEST_FEMBEM___ -= -1 -#endif - ; - +extern int sparseRHS; + /*! \brief Use new way to compute RHS. Default is True. */ -_EXTERN_TEST_FEMBEM_ int newRHS -#ifdef ___TEST_FEMBEM___ -= 1 -#endif - ; +extern int newRHS; /*! \brief Number of points in the points cloud (BEM+FEM) */ -_EXTERN_TEST_FEMBEM_ int nbPts -#ifdef ___TEST_FEMBEM___ -= 16000 -#endif - ; +extern int nbPts; /*! \brief Tile size for the CHAMELEON solver (change it with '--chameleon-tile') */ -_EXTERN_TEST_FEMBEM_ int tileSize -#ifdef ___TEST_FEMBEM___ -= 1024 -#endif - ; +extern int tileSize; /*! \brief Number of points in the points cloud (BEM main cylinder only) */ -_EXTERN_TEST_FEMBEM_ int nbPtsMain ; +extern int nbPtsMain ; /*! \brief Number of points in the points cloud (BEM part only) */ -_EXTERN_TEST_FEMBEM_ int nbPtsBEM ; +extern int nbPtsBEM ; /*! \brief RHS for my tests */ -_EXTERN_TEST_FEMBEM_ void *rhs -#ifdef ___TEST_FEMBEM___ -= NULL -#endif - ; +extern void *rhs; /*! \brief Using partial interactions between BEM and FEM unknowns @@ -104,123 +74,58 @@ _EXTERN_TEST_FEMBEM_ void *rhs It is used to mimic the case where a part of the BEM mesh is NOT the outer surface of the FEM mesh. It changes the content of the matrix and hence the results, timings, accuracy, etc. */ -_EXTERN_TEST_FEMBEM_ int partial_fembem -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int partial_fembem; /*! \brief Verbosity flag */ -_EXTERN_TEST_FEMBEM_ int verboseTest -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int verboseTest; /*! \brief NTiles recursive clustering enabled */ -_EXTERN_TEST_FEMBEM_ int use_ntiles_rec -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int use_ntiles_rec; /*! \brief flag to activate result check */ -_EXTERN_TEST_FEMBEM_ int check_result -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int check_result; /*! \brief flag to activate traces generation */ -_EXTERN_TEST_FEMBEM_ int generate_traces -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int generate_traces; /*! \brief flag to active an overmeshed detail */ -_EXTERN_TEST_FEMBEM_ int useDetail -#ifdef __TEST_FEMBEM__ -= 0 -#endif - ; +extern int useDetail; /*! \brief Flag to compute and display matrix norms during Hmat tests */ -_EXTERN_TEST_FEMBEM_ int computeNorm -#ifdef __TEST_FEMBEM__ -= 0 -#endif - ; +extern int computeNorm; /*! \brief Symmetry flag for the content of the matrices (0=nosym, 1=sym, 2=sym pos def, 1 by default) */ -_EXTERN_TEST_FEMBEM_ int symMatContent -#ifdef ___TEST_FEMBEM___ -= 1 -#endif - ; +extern int symMatContent; /*! \brief Symmetry flag for the solver (true by default) */ -_EXTERN_TEST_FEMBEM_ int symMatSolver -#ifdef ___TEST_FEMBEM___ -= 1 -#endif - ; +extern int symMatSolver; /*! \brief Simple precision accuracy flag */ -_EXTERN_TEST_FEMBEM_ int simplePrec -#ifdef ___TEST_FEMBEM___ -= 0 -#endif - ; +extern int simplePrec; /*! \brief Complex scalar type flag */ -_EXTERN_TEST_FEMBEM_ int complexALGO -#ifdef ___TEST_FEMBEM___ -= 1 -#endif - ; +extern int complexALGO; /*! \brief Scalar type used by the tests */ -_EXTERN_TEST_FEMBEM_ ScalarType stype -#ifdef ___TEST_FEMBEM___ -= DOUBLE_COMPLEX -#endif - ; +extern ScalarType stype; /*! \brief Wavelength (for oscillatory kernels). */ -_EXTERN_TEST_FEMBEM_ double lambda -#ifdef ___TEST_FEMBEM___ -=-1. -#endif - ; +extern double lambda; /*! \brief Number of points per wavelength (for oscillatory kernels) on the main cylinder. */ -_EXTERN_TEST_FEMBEM_ double ptsPerLambda -#ifdef ___TEST_FEMBEM___ -=10. -#endif - ; +extern double ptsPerLambda; /*! \brief Number of points per wavelength (for oscillatory kernels) on the detail. */ -_EXTERN_TEST_FEMBEM_ double ptsPerLambdaDetail -#ifdef ___TEST_FEMBEM___ -=-1. -#endif - ; +extern double ptsPerLambdaDetail; /*! \brief Radius of the leaves in the trees. */ -_EXTERN_TEST_FEMBEM_ double radiusLeaf -#ifdef ___TEST_FEMBEM___ -=0. -#endif - ; +extern double radiusLeaf; /*! \brief Write a VTK file of the mesh. Enabled with option --write-mesh. */ -_EXTERN_TEST_FEMBEM_ int writeMesh -#ifdef ___TEST_FEMBEM___ -=0 -#endif - ; +extern int writeMesh; + +/*! \brief Write a UNV file of the mesh. Enabled with option --write-mesh-unv. */ +extern int writeMeshUNV; /*! \brief List of algorithms that we can test. */ enum algo { @@ -238,114 +143,68 @@ enum algo { } ; /*! \brief Algorithms that we want to test. */ -_EXTERN_TEST_FEMBEM_ enum algo testedAlgo -#ifdef ___TEST_FEMBEM___ -=_undefined -#endif - ; +extern enum algo testedAlgo; /*! \brief Flag to use simple assembly routine for HMAT (default is 0). It can be changed with --use_simple_assembly (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int use_simple_assembly -#ifdef ___TEST_FEMBEM___ -=0 -#endif - ; +extern int use_simple_assembly; /*! \brief Value of the Hmat clustering divider (number of children per box, default is 2) It can be changed with "--hmat_divider" (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int divider -#ifdef ___TEST_FEMBEM___ -=2 -#endif - ; +extern int divider; /*! \brief Flag to compute the residual norm after the Hmat solve (default is 0). It can be changed with --hmat_residual (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int hmat_residual -#ifdef ___TEST_FEMBEM___ -=0 -#endif - ; +extern int hmat_residual; /*! \brief Flag to use Hmat iterative refinement (default is 0). It can be changed with --hmat_refine (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int hmat_refine -#ifdef ___TEST_FEMBEM___ -=0 -#endif - ; +extern int hmat_refine; /*! \brief Flag to use Hmat shuffle clustering (default is 0). It can be changed with --use_hmat_shuffle (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int use_hmat_shuffle -#ifdef ___TEST_FEMBEM___ -=0 -#endif - ; +extern int use_hmat_shuffle; /*! \brief Value of the Hmat shuffle clustering minimum divider (minimum number of children per box, default is 2) It can be changed with "--hmat_divider_min" (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int divider_min -#ifdef ___TEST_FEMBEM___ -=2 -#endif - ; +extern int divider_min; /*! \brief Value of the Hmat shuffle clustering maximum divider (maximum number of children per box, default is 4) It can be changed with "--hmat_divider_max" (read in readFlagsTestHMAT() in hmat.c) */ -_EXTERN_TEST_FEMBEM_ int divider_max -#ifdef ___TEST_FEMBEM___ -=4 -#endif - ; +extern int divider_max; -/*! \brief number of elements in z_matComp_FEM[] - */ -_EXTERN_TEST_FEMBEM_ size_t nnz_FEM -#ifdef ___TEST_FEMBEM___ -=0 -#endif -; +/*! \brief Flag to use HODLR storage and algorithms (default is 0). -/*! \brief allocated size of z_matComp_FEM[] + It can be changed with "--hodlr" (read in readFlagsTestHMAT() in hmat.c) +*/ +extern int use_hodlr; + +/*! \brief number of elements in z_matComp_FEM[] */ -_EXTERN_TEST_FEMBEM_ size_t nbEl_FEM -#ifdef ___TEST_FEMBEM___ -=0 -#endif -; +extern size_t nnz_FEM; /*! \brief Array of non-zero values of the FEM matrix */ -_EXTERN_TEST_FEMBEM_ ZnonZero*z_matComp_FEM -#ifdef ___TEST_FEMBEM___ -=NULL -#endif -; +extern ZnonZero*z_matComp_FEM; /*! \brief beginning of each line in z_matComp_FEM[] */ -_EXTERN_TEST_FEMBEM_ size_t *ptr_ordered_FEM -#ifdef ___TEST_FEMBEM___ -=NULL -#endif -; -/* ============================================================================================================ */ - /*! \brief User context for building MPF matrices and vectors in testMPF */ +extern size_t *ptr_ordered_FEM; + +/*! \brief User context for building MPF matrices and vectors in testMPF */ typedef struct { /*! \brief Pointer on the data used to assembly Mat/vec with computeDenseBlockRHS() */ void *dataVec; @@ -357,7 +216,7 @@ typedef struct { */ int colDim; } contextTestFEMBEM; -/* ============================================================================================================ */ + #ifndef HAVE_HMAT // If we compile without hmat, we need these phony declarations to use prepare_block()/advanced_compute_hmat() in computeDenseBlockFEMBEM() typedef struct hmat_block_info_struct { @@ -367,26 +226,25 @@ typedef struct hmat_block_info_struct { char (*is_guaranteed_null_col)(const struct hmat_block_info_struct * block_info, int block_col_offset, int stratum); } hmat_block_info_t; struct hmat_block_compute_context_t { - void* user_data; - int row_start, row_count, col_start, col_count; - int stratum; - void* block; + void* user_data; + int row_start, row_count, col_start, col_count; + int stratum; + void* block; }; typedef enum { - hmat_factorization_none = -1, - hmat_factorization_lu, - hmat_factorization_ldlt, - hmat_factorization_llt + hmat_factorization_none = -1, + hmat_factorization_lu, + hmat_factorization_ldlt, + hmat_factorization_llt } hmat_factorization_t; #endif -/* ============================================================================================================ */ -int computeKernelBEM(double *coord1, double *coord2, int self, Z_type *kernel) ; + +int computeKernelBEM(double *coord1, double *coord2, int self, double _Complex *kernel) ; int produitClassiqueBEM(int ffar, void *sol, MPI_Comm myComm) ; int produitClassique(void **sol, MPI_Comm myComm) ; int computeRelativeError(void *sol, void *ref, double *eps) ; int computeVecNorm(void *ref, double *norm); int sommePara(double *buf) ; -int computeCoordCylinderMain(int i, double *coord) ; int computeCoordCylinder(int i, double *coord) ; int computeCoordCylinderDetail(int i, double *coord) ; int initCylinder(int *argc, char ***argv) ; @@ -396,6 +254,7 @@ int getMeshStepDetail(double *m) ; int initPipe(int *argc, char ***argv) ; int computeCoordPipe(int i, double *coord) ; int createFemMatrix() ; +int freeFemMatrix() ; int produitClassiqueFEM(void *sol, MPI_Comm myComm) ; double* createPipe(void) ; int computeKernelFEM(int i, int j, Z_type *kernel) ; @@ -405,7 +264,7 @@ int computeRhs(void) ; void computeDenseBlockData (int *LigInf, int *LigSup, int *ColInf, int *ColSup, int *___Inf, int *___Sup, int *iloc, int *jloc, int *tailleS, int *tailleL, void *bloc, void *context); double getTime() ; int displayArray(char *s, void *f, int m, int n) ; -int testHMAT(void) ; +int testHMAT(double * relative_error) ; int prepareTEST(void); double* createCylinder(void) ; void interactionKernel(void* data, int i, int j, void* result) ; @@ -416,10 +275,10 @@ void prepare_hmat(int, int, int, int, int*, int*, int*, int*, void*, hmat_block_ void advanced_compute_hmat(struct hmat_block_compute_context_t*); int init_hmat_interface() ; int readFlagsTestHMAT(int *argc, char ***argv) ; -int testCHAMELEON(void); -int testHLIBPRO(void); +int testCHAMELEON(double * relative_error); +int testHLIBPRO(double * relative_error); int initHLIBPRO(int* argc, char*** argv) ; -int testHCHAMELEON(void); +int testHCHAMELEON(double * relative_error); #ifdef HAVE_CHAMELEON extern cham_flttype_t chameleonType; extern int (*CHAMELEON_sytrf_Tile)( cham_uplo_t, CHAM_desc_t * ); @@ -434,7 +293,7 @@ int testFEMBEM_initChameleon( cham_flttype_t stype ); int CHAMELEON_gemm_Tile( CHAM_desc_t *descA, CHAM_desc_t *descX, CHAM_desc_t *descY ); int CHAMELEON_generate_matrix( cham_flttype_t flttype, int NB, int PQ[2], - CHAM_desc_t **descA ); +CHAM_desc_t **descA ); int CHAMELEON_destroy_matrix( CHAM_desc_t *descA ); #endif @@ -452,14 +311,14 @@ struct HCHAM_desc_s { }; int HCHAMELEON_generate_matrix( cham_flttype_t flttype, int NB, int PQ[2], - HCHAM_desc_t *hdescA, - hmat_interface_t *hi ); +HCHAM_desc_t *hdescA, +hmat_interface_t *hi ); int HCHAMELEON_destroy_matrix( HCHAM_desc_t *hdescA ); hmat_info_t HCHAMELEON_getinfo( HCHAM_desc_t *hdesc ); CHAM_desc_t *HCHAMELEON_uncompress_matrix( HCHAM_desc_t *hdesc ); void HCHAMELEON_lapmr( char forward, char trans, int nrhs, - HCHAM_desc_t *hdesc, - char *Bptr ); + HCHAM_desc_t *hdesc, + char *Bptr ); #endif #ifdef HAVE_HMAT @@ -467,16 +326,15 @@ struct HMAT_desc_s; typedef struct HMAT_desc_s HMAT_desc_t; struct HMAT_desc_s { - hmat_matrix_t *hmatrix; - hmat_clustering_algorithm_t *clustering; - hmat_cluster_tree_t *cluster_tree; - hmat_admissibility_t *admissibilityCondition; - contextTestFEMBEM *myCtx; + hmat_matrix_t *hmatrix; + hmat_clustering_algorithm_t *clustering; + hmat_cluster_tree_t *cluster_tree; + hmat_admissibility_t *admissibilityCondition; + contextTestFEMBEM *myCtx; }; HMAT_desc_t *HMAT_generate_matrix( hmat_interface_t *hi ); void HMAT_destroy_matrix ( hmat_interface_t *hi, - HMAT_desc_t *hdesc ); -#endif -/* ============================================================================================================ */ + HMAT_desc_t *hdesc ); #endif + diff --git a/include/util.h b/include/util.h index 88e14b228183217c269493789b548b25c5d91484..6753c5c7335b31d77a60013b057f381fef35a62b 100644 --- a/include/util.h +++ b/include/util.h @@ -315,6 +315,7 @@ struct mpf_hmat_settings_t { hmat_compress_t compressionMethod; double epsilon; double acaEpsilon; + int max_leaf_size; }; _EXTERN_TEST_FEMBEM_ struct mpf_hmat_settings_t mpf_hmat_settings; struct mpf_hmat_create_compression_args_t { @@ -391,6 +392,11 @@ inline static void mpf_backtrace(){} /*! \brief Evaluates the expression x, if it is false then it displays an error message and aborts the code. */ #define ASSERTA(x) { if (!(x)) { SETERRA(1, "Assert failure %s", #x); }} +#if !defined (MIN) + /*! \brief Minimum value of two elements. */ +#define MIN(a,b) ( ((a)<(b)) ? (a) : (b) ) +#endif + #define MpfCalloc(a,b) calloc(a,b) #define MpfMalloc(a,b) malloc(a) #define MpfRealloc(a,b) realloc(a,b) @@ -415,6 +421,7 @@ int Mpf_printf(MPI_Comm comm, const char *fmt, ...); int MpfArgGetDouble( int *Argc, char **argv, int rflag, const char *name, double *val ) ; int MpfArgHasName( int *Argc, char **argv, int rflag, const char *name ); int MpfArgGetInt( int *Argc, char **argv, int rflag, const char *name, int *val ); +int MPI_AllreduceL(const void *sendbuf, void *recvbuf, ssize_t count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) ; int S_sortAndCompact(SnonZero *mat, int *size); int sortAndCompact(DnonZero *mat, int *size); int C_sortAndCompact(CnonZero *mat, int *size); diff --git a/src/chameleon.c b/src/chameleon.c index d1af80f5d396d60f8a211ac2290e05099980dc18..98c91fe9d8a5ee451358c0b66f8b57510862793b 100644 --- a/src/chameleon.c +++ b/src/chameleon.c @@ -91,7 +91,6 @@ int CHAMELEON_gemm_Tile( CHAM_desc_t *descA, return ierr; } -/* ============================================================================================================ */ // indices are 0 based, bounds included static int CHAMELEON_build_callback_FEMBEM( const CHAM_desc_t *desc, @@ -120,7 +119,7 @@ CHAM_desc_t **descA ) int ierr; int N = nbPts; cham_uplo_t uplo = symMatSolver ? ChamLower : ChamUpperLower; - contextTestFEMBEM *myCtx = (contextTestFEMBEM*)MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRQ(myCtx); + contextTestFEMBEM *myCtx = MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRQ(myCtx); myCtx->colDim = N; /** diff --git a/src/classic.c b/src/classic.c index 262007eb4aefff6df1fa45fa68cbdbf8404cd6fa..86217fe922a0f07b2f34e266ba33f70cc6e0655a 100644 --- a/src/classic.c +++ b/src/classic.c @@ -31,34 +31,38 @@ int produitClassiqueBEM(int ffar, void *sol, MPI_Comm myComm) { Z_type *z_sol=sol, *z_rhs=rhs ; D_type *d_sol=sol, *d_rhs=rhs ; -#ifdef _OPENMP - if (omp_get_thread_num()==0) -#endif - { - ierr=Mpf_progressBar(myComm, jg+1, nbPtsBEM+1) ; - } - ierr=computeCoordCylinder(j, &(coord_j[0])) ; CHKERRA(ierr); /* Boucle sur les lignes */ for (i=myProc ; i<nbPtsBEM ; i+=nbProcs) { ierr=computeCoordCylinder(i, &(coord_i[0])) ; CHKERRA(ierr); - ierr=computeKernelBEM(&(coord_i[0]), &(coord_j[0]), i==j, &Aij) ; - if (complexALGO) { - for (k=0 ; k<nbRHS ; k++) { + { + ierr=computeKernelBEM(&(coord_i[0]), &(coord_j[0]), i==j, (double _Complex *)&Aij) ; + if (complexALGO) { + for (k=0 ; k<nbRHS ; k++) { #pragma omp atomic - z_sol[i+k*nbPts].r += Aij.r * z_rhs[j+k*nbPts].r - Aij.i * z_rhs[j+k*nbPts].i ; + z_sol[(size_t)k*nbPts+i].r += Aij.r * z_rhs[(size_t)k*nbPts+j].r - Aij.i * z_rhs[(size_t)k*nbPts+j].i ; #pragma omp atomic - z_sol[i+k*nbPts].i += Aij.r * z_rhs[j+k*nbPts].i + Aij.i * z_rhs[j+k*nbPts].r ; - } - } else { - for (k=0 ; k<nbRHS ; k++) { + z_sol[(size_t)k*nbPts+i].i += Aij.r * z_rhs[(size_t)k*nbPts+j].i + Aij.i * z_rhs[(size_t)k*nbPts+j].r ; + } + } else { + for (k=0 ; k<nbRHS ; k++) { #pragma omp atomic - d_sol[i+k*nbPts] += Aij.r * d_rhs[j+k*nbPts] ; + d_sol[(size_t)k*nbPts+i] += Aij.r * d_rhs[(size_t)k*nbPts+j] ; + } } - } + } /* if testDofNeighbour(.. */ } /* for (i=0 */ -#pragma omp atomic - jg+=sparseRHS ; + + int jg_bak; +#pragma omp atomic capture + jg_bak = jg+=sparseRHS ; + +#ifdef _OPENMP + if (omp_get_thread_num()==0) +#endif + { + ierr=Mpf_progressBar(myComm, jg_bak+1, nbPtsBEM+1) ; + } } /* for (j=0 */ ierr=Mpf_progressBar(myComm, nbPtsBEM+1, nbPtsBEM+1) ; @@ -73,7 +77,7 @@ int produitClassique(void **sol, MPI_Comm myComm) { int ierr; double temps_initial, temps_final, temps_cpu; - void *solCLA=(void*)MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(solCLA) ; + void *solCLA = MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(solCLA) ; if (0) // piece of code to test the relative norms of FEM and BEM parts in the output of the product // because if one of the 2 is negligible, then our test_FEMBEM doesn't realy test it... @@ -82,11 +86,11 @@ int produitClassique(void **sol, MPI_Comm myComm) { // Compare the norms of the BEM and FEM parts ierr = produitClassiqueBEM(2, solCLA, myComm) ; CHKERRQ(ierr) ; ierr = computeVecNorm(solCLA, &normBEM); CHKERRQ(ierr); - memset(solCLA, 0, nbPts*nbRHS* (complexALGO ? sizeof(Z_type) : sizeof(D_type))); + memset(solCLA, 0, (size_t)nbPts*nbRHS* (complexALGO ? sizeof(Z_type) : sizeof(D_type))); ierr = produitClassiqueFEM(solCLA, myComm) ; CHKERRQ(ierr) ; ierr = computeVecNorm(solCLA, &normFEM); CHKERRQ(ierr); - memset(solCLA, 0, nbPts*nbRHS* (complexALGO ? sizeof(Z_type) : sizeof(D_type))); + memset(solCLA, 0, (size_t)nbPts*nbRHS* (complexALGO ? sizeof(Z_type) : sizeof(D_type))); Mpf_printf(myComm, "normBEM=%g normFEM=%g ratio FEM/BEM=%g\n", normBEM, normFEM, normFEM/normBEM); } temps_initial = getTime (); diff --git a/src/compare.c b/src/compare.c index 3fb66af03fc2881cef65287039d408ea0296f3f7..8e2a5f69bb0f8e2dc61a02b8f867a2d18d2127f3 100644 --- a/src/compare.c +++ b/src/compare.c @@ -18,7 +18,7 @@ frobenius_update( double *scale, double *sumsq, const double value ) } int computeRelativeError(void *sol, void *ref, double *eps) { - int i, ierr, rank ; + int ierr, rank ; double normRef[2], normDelta[2]; Z_type *z_sol=sol, *z_ref=ref ; D_type *d_sol=sol, *d_ref=ref ; @@ -30,7 +30,7 @@ int computeRelativeError(void *sol, void *ref, double *eps) { normDelta[1] = 0.; normRef[0] = 1.; normRef[1] = 0.; - for (i=0 ; i<nbPts*nbRHS ; i++) { + for (size_t i=0 ; i<(size_t)nbPts*nbRHS ; i++) { if (complexALGO) { frobenius_update( normDelta, normDelta+1, (z_sol[i].r - z_ref[i].r) ); frobenius_update( normDelta, normDelta+1, (z_sol[i].i - z_ref[i].i) ); @@ -55,7 +55,7 @@ int computeRelativeError(void *sol, void *ref, double *eps) { } int computeVecNorm(void *ref, double *norm) { - int i, ierr, rank ; + int ierr, rank ; double normRef ; Z_type *z_ref=ref ; D_type *d_ref=ref ; @@ -64,7 +64,7 @@ int computeVecNorm(void *ref, double *norm) { *norm=0.; if (rank==0) { normRef=0. ; - for (i=0 ; i<nbPts*nbRHS ; i++) { + for (size_t i=0 ; i<(size_t)nbPts*nbRHS ; i++) { if (complexALGO) { normRef += z_ref[i].r*z_ref[i].r+z_ref[i].i*z_ref[i].i ; } else { @@ -78,12 +78,10 @@ int computeVecNorm(void *ref, double *norm) { } int sommePara(double *buf) { - int ierr, n ; - - n = (complexALGO?2:1)*nbPts*nbRHS ; + size_t n = (size_t)(complexALGO?2:1)*nbPts*nbRHS ; /* En parallele, somme les resultats des differents processeurs vers tous les proc */ /* MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) */ - ierr=MPI_Allreduce(MPI_IN_PLACE, buf, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; + int ierr=MPI_AllreduceL(MPI_IN_PLACE, buf, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; return 0 ; } diff --git a/src/cylinder.c b/src/cylinder.c index 1cd0bc3cfe0de457d7a96c422551387d973d1261..d50087c208dbac988bdc620c6e0ca097ab2bb741 100644 --- a/src/cylinder.c +++ b/src/cylinder.c @@ -26,7 +26,6 @@ static int nbPtsLoop=0; /*! \brief Number of points per loops in the BEM mesh (detail cylinder) */ static int nbPtsLoopDetail=0; -/* ============================================================================================================ */ /*! \brief Computes coordinates of point number 'i' The points are computed on a cylinder defined by 'radius' and 'height'. The number of points is given by nbPtsMain. @@ -36,7 +35,8 @@ static int nbPtsLoopDetail=0; \param coord the coordinates x y z of this point (output) \return 0 for success */ -int computeCoordCylinderMain(int i, double *coord) { +inline static int computeCoordCylinderMain(int i, double *coord) { + ASSERTQ(testedModel == _bem); static double angleStep=0. ; static double zStep=0. ; double theta ; @@ -73,6 +73,8 @@ int computeCoordCylinderMain(int i, double *coord) { */ int computeCoordCylinder(int i, double *coord) { ASSERTQ(i<nbPtsBEM && i>=0); + if (testedModel == _fembem) + return computeCoordPipe(i, coord); if (useDetail && i >= nbPtsMain) { i = i - nbPtsMain; return computeCoordCylinderDetail(i, coord); @@ -92,6 +94,7 @@ int computeCoordCylinder(int i, double *coord) { \return 0 for success */ int computeCoordCylinderDetail(int i, double *coord) { + ASSERTQ(testedModel == _bem); static double angleStep = 0. ; static double zStep = 0. ; double theta ; @@ -117,18 +120,18 @@ int computeCoordCylinderDetail(int i, double *coord) { /* Le detail est oriente selon y, perpendiculaire au cylindre principal, a mi-hauteur */ theta = (double)i * angleStep ; coord[0] = radiusD * sin(theta) ; - coord[1] = zStep * (i+1) + radius ; + coord[1] = zStep * (double)(i+1) + radius ; coord[2] = radiusD * cos(theta) + 0.5 * height ; return 0 ; } -/* ============================================================================================================ */ + double* createCylinder(void) { - double* result = (double*) MpfCalloc((size_t)3 * nbPts, sizeof(double)); + double* result = MpfCalloc((size_t)3 * nbPts, sizeof(double)); int i, ierr; #pragma omp parallel for private(ierr) schedule(dynamic,10000) for (i = 0; i < nbPts; i++) { - ierr=computeCoordCylinder(i, &(result[3*i]) ) ; CHKERRA(ierr) ; + ierr=computeCoordCylinder(i, &(result[(size_t)3*i]) ) ; CHKERRA(ierr) ; } int rank; @@ -141,7 +144,12 @@ double* createCylinder(void) { "byte_order=\"LittleEndian\" compressor=\"vtkZLibDataCompressor\">\n"); fprintf(fvtk, "\t<UnstructuredGrid>\n"); - int nbTria = 2*(nbPts-nbPtsLoop-nbPtsLoopDetail-1); + ASSERTA(nbPtsMain-nbPtsLoop-1 < INT_MAX/2); + int nbTria = 2*(nbPtsMain-nbPtsLoop-1); + if (nbPtsLoopDetail>0) { + ASSERTA(nbTria < INT_MAX-2*(nbPts-nbPtsLoopDetail-nbPtsMain-1)); + nbTria += 2*(nbPts-nbPtsLoopDetail-nbPtsMain-1); + } fprintf(fvtk, "\t\t<Piece NumberOfPoints=\"%d\" NumberOfCells=\"%d\">\n", nbPts, nbTria); // Write the vertices fprintf(fvtk, "\t\t\t<Points>\n"); @@ -164,7 +172,7 @@ double* createCylinder(void) { // 2nd Triangle fprintf(fvtk, "%d %d %d\n", iVtk+1, iVtk+nbPtsLoop, iVtk+nbPtsLoop+1); } - /* Loop on the triangles (main cylinder) */ + /* Loop on the triangles (detail) */ for (int iVtk = nbPtsMain; iVtk+nbPtsLoopDetail+1 < nbPts; iVtk++) { // 1st triangle fprintf(fvtk, "%d %d %d\n", iVtk, iVtk+1, iVtk+nbPtsLoopDetail); @@ -195,13 +203,83 @@ double* createCylinder(void) { fprintf(fvtk, "\t\t\t</Cells>\n"); fprintf(fvtk, "\t\t</Piece>\n\t</UnstructuredGrid>\n</VTKFile>\n"); fclose(fvtk); + Mpf_printf(MPI_COMM_WORLD, "Done writing 'mesh.vtu'\n") ; + } + + if (rank==0 && writeMeshUNV) { +#define sNODE_UNV_ID " 2411" +#define sELT_UNV_ID " 2412" +#define sGRP_UNV_ID " 2435" +#define sUNV_SEPARATOR " -1" +#define sNODE_UNV_DESCR " 1 1 11" +#define sELT_TRIA3_DESC " 91 1 1 5 3" +#define sELT_TETRA4_DESC " 111 1 2 9 4" + Mpf_printf(MPI_COMM_WORLD, "Writing 'mesh.unv'... ") ; + + FILE *funv=fopen("mesh.unv", "w"); + /* -------- */ + /* Vertices */ + /* -------- */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sNODE_UNV_ID ); + double* coord=result; + for( i = 1; i<=nbPts; i++, coord+=3 ) { + fprintf(funv, "%10d%s\n", i, sNODE_UNV_DESCR ); + fprintf(funv, "%25.16E%25.16E%25.16E\n", coord[0], coord[1], coord[2]) ; + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + /* -------- */ + /* Elements */ + /* -------- */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sELT_UNV_ID ); + int nbTria=0; + /* Loop on the triangles (main cylinder) */ + for (int iVtk = 1; iVtk+nbPtsLoop+1 <= nbPtsMain; iVtk++) { + // 1st triangle + fprintf(funv, "%10d%s\n", ++nbTria, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", iVtk, iVtk+1, iVtk+nbPtsLoop) ; + // 2nd Triangle + fprintf(funv, "%10d%s\n", ++nbTria, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", iVtk+1, iVtk+nbPtsLoop, iVtk+nbPtsLoop+1); + } + /* Loop on the triangles (detail) */ + for (int iVtk = nbPtsMain+1; iVtk+nbPtsLoopDetail+1 <= nbPts; iVtk++) { + // 1st triangle + fprintf(funv, "%10d%s\n", ++nbTria, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", iVtk, iVtk+1, iVtk+nbPtsLoopDetail) ; + // 2nd Triangle + fprintf(funv, "%10d%s\n", ++nbTria, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", iVtk+1, iVtk+nbPtsLoopDetail, iVtk+nbPtsLoopDetail+1); + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + /* ------ */ + /* Groups */ + /* ------ */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sGRP_UNV_ID ); + /* Group C_EXT for all the triangles */ + fprintf(funv, "%10d%10d%10d%10d%10d%10d%10d%10d\n", 1, 0, 0, 0, 0, 0, 0, nbTria) ; + fprintf(funv, "C_EXT\n") ; + for (i=1 ; i<=nbTria ; i++) { + fprintf(funv, "%10d%10d%10d%10d", 8, i, 0, 0) ; + if ( (i%2==0) || (i==nbTria) ) + fprintf(funv, "\n") ; + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fclose(funv); + Mpf_printf(MPI_COMM_WORLD, "Done.\n") ; } + // In case we pass a second time in this routine + writeMesh = 0; + writeMeshUNV = 0; + return result; } -/* ============================================================================================================ */ -/*! \brief Reads command line arguments defining the shape of the cylinder - + +/*! \brief Reads command line arguments defining the shape of the cylinder + \return 0 for success */ int initCylinder(int *argc, char ***argv) { @@ -226,7 +304,6 @@ int initCylinderDetail(int *argc, char ***argv) { double coord[3] ; double step=0; - // ATTENTION : LES MESURES DU CYLINDRE DU DETAIL SONT EN NOMBRE DE LAMBDA if (MpfArgGetDouble(argc, *argv, 1, "-radius_detail", &radiusDetail)) { @@ -255,7 +332,6 @@ int initCylinderDetail(int *argc, char ***argv) { return 0 ; } -/* ============================================================================================================ */ int getMeshStep(double *m) { if (meshStep==0.) SETERRQ(1, "meshStep not yet initialized. Come back later.") ; @@ -271,4 +347,3 @@ int getMeshStepDetail(double *m) { *m = meshStepDetail ; return 0 ; } -/* ============================================================================================================ */ diff --git a/src/hchameleon.c b/src/hchameleon.c index 7d701637f71798a4cad5766a65bae51cd0427089..fad9c0f5367f427462def1ec0099f82dae7601ea 100644 --- a/src/hchameleon.c +++ b/src/hchameleon.c @@ -13,6 +13,7 @@ /** * Detect if the tile is local or not */ +#if 0 inline static int chameleon_desc_islocal( const CHAM_desc_t *A, int m, int n ) { #if defined(CHAMELEON_USE_MPI) @@ -22,6 +23,7 @@ inline static int chameleon_desc_islocal( const CHAM_desc_t *A, int m, int n ) return 1; #endif /* defined(CHAMELEON_USE_MPI) */ } +#endif // indices are 0 based, bounds included /* Initialize a given tile with an h-matrix, given the row and cluster tree */ @@ -146,7 +148,7 @@ hmat_interface_t *hi ) * We exploit the fact that row and column distributions are * identical to initialize only on rows. */ - hdescA->perm = (int *)MpfCalloc( descA->lm, sizeof(int) ); + hdescA->perm = MpfCalloc( descA->lm, sizeof(int) ); hmat_cluster_tree_t *main_cluster = (hmat_cluster_tree_t *) hmat_create_cluster_tree_from_builder( points, 3, nbPts, ct_builder ); @@ -155,7 +157,7 @@ hmat_interface_t *hi ) memcpy( hdescA->perm,hmat_cluster_get_indices( main_cluster ), descA->lm * sizeof(int) ); - hdescA->clusters = (hmat_cluster_tree_t **)MpfCalloc( descA->lmt + 1, sizeof(hmat_cluster_tree_t *) ); + hdescA->clusters = MpfCalloc( descA->lmt + 1, sizeof(hmat_cluster_tree_t *) ); clusters = hdescA->clusters; if ( descA->lmt == 1 ) { *clusters = main_cluster; @@ -215,7 +217,6 @@ HCHAMELEON_destroy_matrix( HCHAM_desc_t *hdescA ) hmat_delete_admissibility( hdescA->admissibilityCondition ); - hdescA->hi->dump_info( hdescA->hmatrix, "testHCHAMELEON_matrix"); /* Destroys the HMAT structure */ @@ -249,7 +250,6 @@ HCHAMELEON_destroy_matrix( HCHAM_desc_t *hdescA ) return 0; } -/* ============================================================================================================ */ /* Provides a flat matrix (array of values) given an hmatrix structure */ typedef int (*core_lacpy_fct_t)( cham_uplo_t, int, int, const void *, int, void *, int ); diff --git a/src/help.c b/src/help.c index 73c55a93d032e9dbda0b37ebdc65516f86d51c1b..9ed0ba4ca2c7079b1dab03a1a55ca5ffff8dca97 100644 --- a/src/help.c +++ b/src/help.c @@ -7,7 +7,7 @@ int printHelp() { printf("\ntest_FEMBEM usage:\n-----------------\n\n" " --bem/--fem/--fembem: Numerical model (bem is dense, fem is sparse). Default: bem.\n" - " -nbpts: Number of unknowns of the global linear system. Default: 16000.\n" + " -nbpts: Number of unknowns of the global linear system. Default: 16000 (can be in floating point notation, like 1.2e4).\n" " -nbrhs: Number of right-hand sides of the global linear system. Default: 1.\n" " -s/-d/-c/-z: Scalar type of the computation (real computation uses 1/R kernel in BEM, complex computation uses oscillatory exp(ikr)/r kernel). Default: z.\n" " -v: Verbose mode, to display values of data (extremely verbose). Default is OFF.\n" @@ -16,6 +16,7 @@ int printHelp() { " -lambda: Wavelength (for oscillatory kernels). Mutually exclusive with -ptsperlambda. Default lambda is set with 10 points per wavelength.\n" " -ptsperlambda: Points per wavelength on the main cylinder (for oscillatory kernels). Default: 10.\n" " --write-mesh: Write a VTK file 'mesh.vtu' of the mesh. Default is OFF.\n" + " --write-mesh-unv: Write a UNV file 'mesh.unv' of the mesh. Default is OFF.\n" " --compute_norm: Compute and display matrix norms during Hmat tests. Default: OFF.\n" "\n" " BEM tests use a surfacic cylinder with an optionnal cylinder detail, defined with:\n" @@ -71,8 +72,10 @@ int printHelp() { " --use_hmat_shuffle: Using Hmat shuffle clustering. Default: NO.\n" " --hmat_divider_min: Value of the Hmat shuffle clustering minimum divider (minimum number of children per box). Default: 2.\n" " --hmat_divider_max: Value of the Hmat shuffle clustering maximum divider (maximum number of children per box). Default: 4.\n" + " --hodlr: Using HODLR. Default: NO.\n" " --hmat-eps-assemb: Compression epsilon. Default: 1e-4.\n" " --hmat-eps-recompr: Recompression epsilon. Default: 1e-4.\n" + " --hmat-leaf-size: Maximum leaf size. Default: -1 (set by hmat itself).\n" " Compression algorithm: default is ACA+, it can be changed with:\n" " --hmat-svd: Using SVD compression.\n" " --hmat-aca-partial: Using ACA with partial pivoting.\n" @@ -124,4 +127,3 @@ int printHelp() { return 0; } - diff --git a/src/hmat.c b/src/hmat.c index 14be0ce11aae56e4553ff68e4eb51a9e7ab37eb2..b32335884d153de89af58ff2d6c82451442e0274 100644 --- a/src/hmat.c +++ b/src/hmat.c @@ -5,7 +5,7 @@ int init_hmat_interface() { if(mpf_hmat_settings.interface_initialized) return 0; for(int i = HMAT_SIMPLE_PRECISION; i <= HMAT_DOUBLE_COMPLEX; i++) { - mpf_hmat_settings.interfaces[i] = (hmat_interface_t *) MpfCalloc(1, sizeof(hmat_interface_t)); + mpf_hmat_settings.interfaces[i] = MpfCalloc(1, sizeof(hmat_interface_t)); switch(mpf_hmat_settings.engine) { case mpf_hmat_seq: hmat_init_default_interface(mpf_hmat_settings.interfaces[i], i); break; #ifdef HMAT_HAVE_TOYRT @@ -70,6 +70,12 @@ int readFlagsTestHMAT(int *argc, char ***argv) { ASSERTQ(divider_max>=divider_min); } + /* Flag to use HODLR storage and algorithms */ + if (MpfArgHasName(argc, *argv, 1, "--hodlr")) { + use_hodlr=1; + Mpf_printf(MPI_COMM_WORLD, "Using HODLR\n") ; + } + return 0; } @@ -77,7 +83,7 @@ int readFlagsTestHMAT(int *argc, char ***argv) { void interactionKernel(void* user_context, int i, int j, void* result) { int ierr; double coord_i[3], coord_j[3]; - Z_type Aij=Mpf_z_zero, Bij=Mpf_z_zero ; + double _Complex Aij = 0, Bij = 0; contextTestFEMBEM *myCtx = (contextTestFEMBEM *)user_context; // Update indices (if we compute only a sub-part of the whole matrix) @@ -93,19 +99,18 @@ void interactionKernel(void* user_context, int i, int j, void* result) { // FEM part if (testedModel == _fembem || testedModel == _fem) { - ierr=computeKernelFEM(i, j, &Bij); CHKERRA(ierr); - Aij.r += Bij.r; - Aij.i += Bij.i; + ierr=computeKernelFEM(i, j, (Z_type*)&Bij); CHKERRA(ierr); + Aij += Bij; } switch(stype) { case SIMPLE_PRECISION: case DOUBLE_PRECISION: - *((double*)result) = Aij.r; + *((double*)result) = Aij; break; case SIMPLE_COMPLEX: case DOUBLE_COMPLEX: - *((Z_type*)result) = Aij; + *((double _Complex*)result) = Aij; break; default: SETERRA(1, "Unknown stype"); @@ -134,11 +139,11 @@ typedef struct { static void free_block_data(void *v) { block_data_t* p = (block_data_t*) v; - if (p->col_points) MpfFree(p->col_points); p->col_points=NULL; - if (p->row_points) MpfFree(p->row_points); p->row_points=NULL; - if (p->dof_col_used) MpfFree(p->dof_col_used); p->dof_col_used=NULL; - if (p->dof_row_used) MpfFree(p->dof_row_used); p->dof_row_used=NULL; - if (p) MpfFree(p); p=NULL; + if (p->col_points) MpfFree(p->col_points); + if (p->row_points) MpfFree(p->row_points); + if (p->dof_col_used) MpfFree(p->dof_col_used); + if (p->dof_row_used) MpfFree(p->dof_row_used); + if (p) MpfFree(p); } /** Callback for hmat block info */ @@ -198,8 +203,8 @@ prepare_hmat(int row_start, info->release_user_data = free_block_data; /* Arrays to store non-null rows and columns */ - bdata->dof_row_used=(int*)MpfCalloc(row_count, sizeof(int)); CHKPTRA(bdata->dof_row_used); - bdata->dof_col_used=(int*)MpfCalloc(col_count, sizeof(int)); CHKPTRA(bdata->dof_col_used); + bdata->dof_row_used = MpfCalloc(row_count, sizeof(int)); CHKPTRA(bdata->dof_row_used); + bdata->dof_col_used = MpfCalloc(col_count, sizeof(int)); CHKPTRA(bdata->dof_col_used); /* precompute and store the coordinates of the points (rows and columns) and tag BEM rows and columns as non-null @@ -285,7 +290,7 @@ advanced_compute_hmat(struct hmat_block_compute_context_t *b) { for (i = 0; i < b->row_count; ++i, dValues+=step, row_point++) { // Idem with dof_row_used[] int row = bdata->row_hmat2client ? bdata->row_hmat2client[i + b->row_start + bdata->row_start] : i + b->row_start + bdata->row_start; int row_g = row + myCtx->rowOffset; - Z_type Aij=Mpf_z_zero, Bij=Mpf_z_zero ; + double _Complex Aij = 0, Bij = 0; // BEM part if (row_g < nbPtsBEM && col_g < nbPtsBEM) { @@ -294,19 +299,18 @@ advanced_compute_hmat(struct hmat_block_compute_context_t *b) { // FEM part if (testedModel == _fembem || testedModel == _fem) { - ierr=computeKernelFEM(row_g, col_g, &Bij); CHKERRA(ierr); - Aij.r += Bij.r; - Aij.i += Bij.i; + ierr=computeKernelFEM(row_g, col_g, (Z_type*)&Bij); CHKERRA(ierr); + Aij += Bij; } switch(stype) { case SIMPLE_PRECISION: case DOUBLE_PRECISION: - *((double*)dValues) = Aij.r; + *((double*)dValues) = Aij; break; case SIMPLE_COMPLEX: case DOUBLE_COMPLEX: - *((Z_type*)dValues) = Aij; + *(double _Complex*)dValues = Aij; break; default: SETERRA(1, "Unknown stype"); @@ -379,13 +383,12 @@ void computeDenseBlockFEMBEM_Cprec(int *LigInf, int *LigSup, int *ColInf, int *C int *___Inf, int *___Sup, int *iloc, int *jloc, int *tailleS, int *tailleL, void *bloc, void *context) { int id, ic, il, inxBloc ; - char *zbloc ; ASSERTA(context); - zbloc=(char*)MpfCalloc((size_t)(*tailleS)*(*___Sup - *___Inf), sizeof(double)*(complexALGO?2:1)); CHKPTRA(zbloc) ; + void * zbloc = MpfCalloc((size_t)(*tailleS)*(*___Sup - *___Inf), sizeof(double)*(complexALGO?2:1)); CHKPTRA(zbloc) ; /* Appelle la routine en double precision */ - computeDenseBlockFEMBEM( LigInf, LigSup, ColInf, ColSup , + computeDenseBlockFEMBEM( LigInf, LigSup, ColInf, ColSup, ___Inf, ___Sup, iloc, jloc, tailleS, tailleL, zbloc, context) ; @@ -404,9 +407,9 @@ void computeDenseBlockFEMBEM_Cprec(int *LigInf, int *LigSup, int *ColInf, int *C } } - if (zbloc) MpfFree(zbloc) ; zbloc=NULL ; + MpfFree(zbloc); } -/* ================================================================================== */ + #ifdef HAVE_HMAT HMAT_desc_t *HMAT_generate_matrix( hmat_interface_t *hi ) { @@ -432,6 +435,10 @@ HMAT_desc_t *HMAT_generate_matrix( hmat_interface_t *hi ) { } else { clustering = hmat_create_clustering_median(); + if(mpf_hmat_settings.max_leaf_size > 0) { + Mpf_printf(MPI_COMM_WORLD, "HMat maximum leaf size: %d\n", mpf_hmat_settings.max_leaf_size); + clustering = hmat_create_clustering_max_dof(clustering, mpf_hmat_settings.max_leaf_size); + } hmat_set_clustering_divider(clustering, divider); ct_builder = hmat_create_cluster_tree_builder( clustering ); } @@ -445,10 +452,15 @@ HMAT_desc_t *HMAT_generate_matrix( hmat_interface_t *hi ) { Mpf_printf(MPI_COMM_WORLD, "ClusterTree node count = %d\n", hmat_tree_nodes_count(cluster_tree)); /* Create the H-matrix with this cluster tree and an admissibility criteria */ - hmat_admissibility_param_t admissibility_param; - hmat_init_admissibility_param(&admissibility_param); - admissibility_param.eta = 3.0; - hmat_admissibility_t *admissibilityCondition = hmat_create_admissibility(&admissibility_param); + hmat_admissibility_t *admissibilityCondition; + if (use_hodlr) + admissibilityCondition = hmat_create_admissibility_hodlr(); + else { + hmat_admissibility_param_t admissibility_param; + hmat_init_admissibility_param(&admissibility_param); + admissibility_param.eta = 3.0; + admissibilityCondition = hmat_create_admissibility(&admissibility_param); + } // hmat_admissibility_t *admissibilityCondition = hmat_create_admissibility_standard(3.0); hmat_matrix_t* hmatrix = hi->create_empty_hmatrix_admissibility(cluster_tree, cluster_tree, symMatSolver, admissibilityCondition); @@ -481,7 +493,7 @@ HMAT_desc_t *HMAT_generate_matrix( hmat_interface_t *hi ) { ctx.prepare = prepare_hmat; ctx.advanced_compute = advanced_compute_hmat; } - contextTestFEMBEM *myCtx = (contextTestFEMBEM*)MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRA(myCtx); + contextTestFEMBEM *myCtx = MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRA(myCtx); myCtx->colDim = nbPts; ctx.user_context = myCtx; hi->assemble_generic(hmatrix, &ctx); @@ -506,9 +518,8 @@ void HMAT_destroy_matrix( hmat_interface_t *hi, hmat_delete_admissibility(hdesc->admissibilityCondition); hmat_delete_cluster_tree(hdesc->cluster_tree); hmat_delete_clustering(hdesc->clustering); - if (hdesc->myCtx) MpfFree(hdesc->myCtx) ; hdesc->myCtx=NULL ; + if (hdesc->myCtx) MpfFree(hdesc->myCtx); MpfFree(hdesc); } } #endif -/* ================================================================================== */ diff --git a/src/kernel.c b/src/kernel.c index 4ad5e8e74bde55bee462ac1673a814d19be4adfb..5415bd0574e3beb369f1eb6a672b2dc7b1b690f6 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -1,6 +1,6 @@ #include "main.h" extern double meshStep; -int computeKernelBEM(double *coord1, double *coord2, int self, Z_type *kernel) { +int computeKernelBEM(double *coord1, double *coord2, int self, double complex *kernel) { double r, v[3], k=0. ; v[0]=coord1[0]-coord2[0] ; @@ -11,22 +11,18 @@ int computeKernelBEM(double *coord1, double *coord2, int self, Z_type *kernel) { if (complexALGO) { k=2*M_PI/lambda; - - kernel->r=cos(k*r)/(M_PI*4.*r) ; - kernel->i=sin(k*r)/(M_PI*4.*r) ; + *kernel = cexp(I * k * r) / (4 * M_PI * r); } else - kernel->r=1./(M_PI*4.*r) ; + *kernel = 1 / (4 * M_PI * r); if (symMatContent==0) { // non symmetric case // s = prod scal nu.r avec nu=(1,1,1)/sqrt(3) double s = (v[0]+v[1]+v[2])/1.7320508075688772; double fs=1+atan(s)/M_PI; - kernel->r *= fs; - kernel->i *= fs; + *kernel *= fs; } if (symMatContent==2 && self) { // symmetric positive definite case : add 'n.max(A)' times Identity - kernel->r *= (double)nbPts; + *kernel *= nbPts; } return 0; } - diff --git a/src/main.c b/src/main.c index a9a1de37d2912eb452f8bf2cd340d4b4dcdaa179..0d6efdafd3ee245aa59e8fc3808d09191588d615 100644 --- a/src/main.c +++ b/src/main.c @@ -2,18 +2,67 @@ #include "main.h" +// instantiation of globales defined in main.h +enum numModel testedModel = _bem; +int nbRHS = 1; +int sparseRHS = -1; +int newRHS = 1; +int nbPts = 16000; +int tileSize = 1024; +int nbPtsMain; +int nbPtsBEM; +void *rhs = NULL; +void *solFMMfar = NULL; +void *solCLAfar = NULL; +void *solFMMnear = NULL; +void *solCLAnear = NULL; +int nbPtsPerLeaf = 50; +int nbAlgoRuns = 1; +char *sTYPE_ELEMENT_ARRAY_test_fembem[] = {NULL}; +int coupled = 0; +int partial_fembem = 0; +int verboseTest = 0; +int use_ntiles_rec = 0; +int check_result = 0; +int generate_traces = 0; +int useDetail; +int computeNorm; +int symMatContent = 1; +int symMatSolver = 1; +int simplePrec = 0; +int complexALGO = 1; +ScalarType stype = DOUBLE_COMPLEX; +double lambda = -1.; +double ptsPerLambda = 10.; +double ptsPerLambdaDetail = -1.; +double radiusLeaf = 0.; +int writeMesh = 0; +int writeMeshUNV = 0; +enum algo testedAlgo = _undefined; +int use_simple_assembly = 0; +int divider = 2; +int hmat_residual = 0; +int hmat_refine = 0; +int use_hmat_shuffle = 0; +int divider_min = 2; +int divider_max = 4; +int use_hodlr = 0; +size_t nnz_FEM = 0; +ZnonZero *z_matComp_FEM = NULL; +size_t *ptr_ordered_FEM = NULL; + /*! \brief Main routine \return 0 for success */ int main(int argc, char **argv) { int ierr ; - double step ; + double step, max_error = DBL_MAX; + MpfArgGetDouble(&argc, argv, 1, "--max-error", &max_error); if (MpfArgHasName(&argc, argv, 1, "-h") || MpfArgHasName(&argc, argv, 1, "--help") ) { ierr=printHelp() ; return ierr; } - /* --- Choix de l'algo a tester (_undefined par defaut) --- */ if (MpfArgHasName(&argc, argv, 1, "-gemvhmat") > 0) { testedAlgo = _gemvHMAT ; @@ -136,7 +185,11 @@ int main(int argc, char **argv) { verboseTest=1 ; Mpf_printf(MPI_COMM_WORLD, "Mode verbose\n") ; } - if (MpfArgGetInt(&argc, argv, 1, "-nbpts", &nbPts)) { + double nbf; + if (MpfArgGetDouble(&argc, argv, 1, "-nbpts", &nbf)) { + if (nbf<0 || nbf>INT_MAX) + SETERRQ(1, "Incorrect value '-nbpts %.0f' (must be between 0 and %d)", nbf, INT_MAX); + nbPts = (int)nbf; Mpf_printf(MPI_COMM_WORLD, "Reading nbPts = %d\n", nbPts) ; } if (MpfArgHasName(&argc, argv, 1, "-ntilesrec") > 0) { @@ -161,7 +214,12 @@ int main(int argc, char **argv) { /* --- Write a VTK file 'mesh.vtu' of the mesh --- */ if (MpfArgHasName(&argc, argv, 1, "--write-mesh")) { writeMesh=1; - Mpf_printf(MPI_COMM_WORLD, "Write a VTK file 'mesh.vtu' of the mesh\n") ; + Mpf_printf(MPI_COMM_WORLD, "Activate writing of VTK file 'mesh.vtu'\n") ; + } + /* --- Write a UNV file 'mesh.unv' of the mesh --- */ + if (MpfArgHasName(&argc, argv, 1, "--write-mesh-unv")) { + writeMeshUNV=1; + Mpf_printf(MPI_COMM_WORLD, "Activate writing of UNV file 'mesh.unv'\n") ; } switch(testedModel){ @@ -192,7 +250,6 @@ int main(int argc, char **argv) { Mpf_printf(MPI_COMM_WORLD, "Using OLD right-hand sides.\n"); } - /* Added detail on the main cylinder */ if (MpfArgHasName(&argc, argv, 1, "-with_detail")) { useDetail = 1; @@ -292,36 +349,44 @@ int main(int argc, char **argv) { /* Prepare the mesh and RHS used for the FMM/HMAT tests */ ierr = prepareTEST() ; CHKERRQ(ierr) ; - + double relative_error; /* Run the test */ switch(testedAlgo) { case _gemvHMAT: case _solveHMAT: case _inverseHMAT: - ierr = testHMAT() ; CHKERRQ(ierr) ; + ierr = testHMAT(&relative_error) ; CHKERRQ(ierr) ; break; case _gemvCHAMELEON: case _solveCHAMELEON: - ierr = testCHAMELEON(); CHKERRQ(ierr); + ierr = testCHAMELEON(&relative_error); CHKERRQ(ierr); break; case _gemvHLIBPRO: case _solveHLIBPRO: - ierr = testHLIBPRO(); CHKERRQ(ierr); + ierr = testHLIBPRO(&relative_error); CHKERRQ(ierr); break; case _gemvHCHAMELEON: case _gemmHCHAMELEON: case _solveHCHAMELEON: - ierr = testHCHAMELEON(); CHKERRQ(ierr); + ierr = testHCHAMELEON(&relative_error); CHKERRQ(ierr); break; default: SETERRQ(1, "Unknown algorithm=%d", testedAlgo); break; } - + int error_exit = 0; + if(relative_error > max_error) { + error_exit = 1; + Mpf_printf(MPI_COMM_WORLD, "Error is too high (%g > %g), exiting with error.", relative_error, max_error); + } Mpf_printf(MPI_COMM_WORLD, "\ntest_FEMBEM : end of computation\n"); - - ierr=SCAB_Exit(&argc, argv) ; - if (rhs) MpfFree(rhs) ; rhs=NULL; - return ierr ; + if (rhs) { + MpfFree(rhs); + rhs = NULL; + } + + ierr=SCAB_Exit(&argc, argv); CHKERRQ(ierr); + + return error_exit; } diff --git a/src/pipe.c b/src/pipe.c index 8e39cdc5a213d6565cfdd048bbbd505fc3f66397..b96e6f7fded6a816ab724a2278f467fa6c752c0f 100644 --- a/src/pipe.c +++ b/src/pipe.c @@ -1,7 +1,5 @@ #include "main.h" -/* ============================================================================================================ */ - extern double radius, height, meshStep; /*! \brief Number of point on one loop around the pipe */ @@ -14,7 +12,6 @@ static int n_z=0; /* routines de tri dans MAT/IMPLS/SPARSE/matutils_sparse.c */ extern int compare_ZnonZero_Line(const void *pt1, const void *pt2) ; -/* ============================================================================================================ */ /*! \brief Reads command line arguments defining the shape of the cylinder \return 0 for success @@ -37,7 +34,6 @@ int initPipe(int *argc, char ***argv) { return 0; } -/* ============================================================================================================ */ /*! \brief Computes coordinates of point number 'i' The points are computed on a pipe defined by 'radius' and 'height'. The number of points is given by nbPts. @@ -61,7 +57,7 @@ int computeCoordPipe(int i, double *coord) { angleStep*n_l = 2.pi zStep=heigth/(n_z.n_l) */ - double dn_z=pow(height*height*nbPts/M_PI/radius/radius, 1./3.); + double dn_z=pow(height*height*(double)nbPts/M_PI/radius/radius, 1./3.); double dn_r=radius*dn_z/2./height; double dn_l=4.*M_PI*dn_r; n_l=(int)ceil(dn_l); @@ -98,17 +94,20 @@ int computeCoordPipe(int i, double *coord) { return 0; } -/* ============================================================================================================ */ + double* createPipe(void) { - double* result = (double*) MpfCalloc(3 * nbPts, sizeof(double)); CHKPTRA(result); + double* result = MpfCalloc((size_t)3 * nbPts, sizeof(double)); CHKPTRA(result); int i, ierr; #pragma omp parallel for private(ierr) schedule(dynamic,10000) for (i = 0; i < nbPts; i++) { - ierr=computeCoordPipe(i, &(result[3*i]) ) ; CHKERRA(ierr) ; + ierr=computeCoordPipe(i, &(result[(size_t)3*i]) ) ; CHKERRA(ierr) ; } return result; } -/* ============================================================================================================ */ + +/*! \brief allocated size of z_matComp_FEM[] */ +static size_t nbEl_FEM = 0; + static int addValue(int lig, int col, double val) { // With option partial_fembem, we nullify two thirds of the interaction between BEM and FEM unknowns: @@ -131,132 +130,229 @@ static int addValue(int lig, int col, double val) { nnz_FEM++; return 0; } -/* ============================================================================================================ */ + +int freeFemMatrix() { + if (z_matComp_FEM) MpfFree(z_matComp_FEM); + z_matComp_FEM=NULL; + if (ptr_ordered_FEM) MpfFree(ptr_ordered_FEM); + ptr_ordered_FEM=NULL; + nnz_FEM=0; + nbEl_FEM=0; + return 0; +} + int createFemMatrix() { int nbHexa=0, ierr, rank, iVtk; int defTetra[5][4]= { {0,1,2,4}, {1,4,5,7}, {1,2,3,7}, {2,4,6,7}, {1,2,4,7}}; - FILE *fvtk=NULL; + FILE *fvtk=NULL, *funv=NULL; + int nbTetra=0, nbTria=0; ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); - if (rank==0 && writeMesh) { - fvtk=fopen("mesh.vtu", "w"); - // VTK Header - fprintf(fvtk, "<?xml version=\"1.0\"?>\n"); - fprintf(fvtk, "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" " - "byte_order=\"LittleEndian\" compressor=\"vtkZLibDataCompressor\">\n"); - fprintf(fvtk, "\t<UnstructuredGrid>\n"); - fprintf(fvtk, "\t\t<Piece NumberOfPoints=\"%d\" NumberOfCells=\"%d\">\n", - n_r*n_z*n_l, 5*(n_r*n_l*n_z-n_l*n_z-(n_r-1)*(n_l+1))); - // Write the vertices - fprintf(fvtk, "\t\t\t<Points>\n"); - fprintf(fvtk, "\t\t\t\t<DataArray type=\"Float32\"" - " NumberOfComponents=\"3\" format=\"ascii\">\n"); - double coord[3]; - for (iVtk = 0; iVtk < n_r*n_z*n_l; iVtk++) { - ierr = computeCoordPipe(iVtk, coord); CHKERRQ(ierr); - fprintf(fvtk, "%g %g %g\n", (float) (coord[0]), (float)(coord[1]), (float)(coord[2])); + if (rank==0) { + if (writeMesh) { + fvtk=fopen("mesh.vtu", "w"); + // VTK Header + fprintf(fvtk, "<?xml version=\"1.0\"?>\n"); + fprintf(fvtk, "<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" " + "byte_order=\"LittleEndian\" compressor=\"vtkZLibDataCompressor\">\n"); + fprintf(fvtk, "\t<UnstructuredGrid>\n"); + fprintf(fvtk, "\t\t<Piece NumberOfPoints=\"%d\" NumberOfCells=\"%d\">\n", + n_r*n_z*n_l, 5*(n_r*n_l*n_z-n_l*n_z-(n_r-1)*(n_l+1))); + // Write the vertices + fprintf(fvtk, "\t\t\t<Points>\n"); + fprintf(fvtk, "\t\t\t\t<DataArray type=\"Float32\"" + " NumberOfComponents=\"3\" format=\"ascii\">\n"); + double coord[3]; + for (iVtk = 0; iVtk < n_r*n_z*n_l; iVtk++) { + ierr = computeCoordPipe(iVtk, coord); CHKERRQ(ierr); + fprintf(fvtk, "%g %g %g\n", (float) (coord[0]), (float)(coord[1]), (float)(coord[2])); + } + fprintf(fvtk, "\t\t\t\t</DataArray>\n"); + fprintf(fvtk, "\t\t\t</Points>\n"); + // VTK Elements + fprintf(fvtk, "\t\t\t<Cells>\n"); + fprintf(fvtk, "\t\t\t\t<DataArray " + "type=\"Int32\" Name=\"connectivity\" format=\"ascii\">\n"); + } + if (writeMeshUNV) { +#define sNODE_UNV_ID " 2411" +#define sELT_UNV_ID " 2412" +#define sGRP_UNV_ID " 2435" +#define sUNV_SEPARATOR " -1" +#define sNODE_UNV_DESCR " 1 1 11" +#define sELT_TRIA3_DESC " 91 1 1 5 3" +#define sELT_TETRA4_DESC " 111 1 2 9 4" + + Mpf_printf(MPI_COMM_WORLD, "Writing 'mesh.unv'... ") ; + funv=fopen("mesh.unv", "w"); + /* -------- */ + /* Vertices */ + /* -------- */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sNODE_UNV_ID ); + double coord[3]; + for(int i = 1; i<=n_r*n_z*n_l; i++ ) { + ierr = computeCoordPipe(i-1, coord); CHKERRQ(ierr); + fprintf(funv, "%10d%s\n", i, sNODE_UNV_DESCR ); + fprintf(funv, "%25.16E%25.16E%25.16E\n", coord[0], coord[1], coord[2]) ; + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + /* -------- */ + /* Elements */ + /* -------- */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sELT_UNV_ID ); } - fprintf(fvtk, "\t\t\t\t</DataArray>\n"); - fprintf(fvtk, "\t\t\t</Points>\n"); - // VTK Elements - fprintf(fvtk, "\t\t\t<Cells>\n"); - fprintf(fvtk, "\t\t\t\t<DataArray " - "type=\"Int32\" Name=\"connectivity\" format=\"ascii\">\n"); - } - /* Loop on the hexaedre: We loop on the bottom-left-exterior corner of the hexaedre and check + /* Loop on the hexaedre: We loop on the bottom-left-exterior corner of the hexaedre and check that the 7 other vertices are valid. */ - int i_hexa; - for (i_hexa=0 ; i_hexa<nbPts ; i_hexa++) { - div_t res=div(i_hexa, n_z*n_l); + int i_hexa; + for (i_hexa=0 ; i_hexa<nbPts ; i_hexa++) { + div_t res=div(i_hexa, n_z*n_l); - int i_helix = res.quot; - // the hexaedre is based on helices 'i_helix' and 'i_helix+1', so both must be in [0, n_r-1] - if (i_helix+1 > n_r-1) continue; + int i_helix = res.quot; + // the hexaedre is based on helices 'i_helix' and 'i_helix+1', so both must be in [0, n_r-1] + if (i_helix+1 > n_r-1) continue; - int i_face = res.rem; - // the outer face is based on points i_face, i_face+1, i_face+n_l, i_face+n_l+1, they must be in [0, n_z.n_l-1] - if (i_face+n_l+1 > n_z*n_l-1) continue; + int i_face = res.rem; + // the outer face is based on points i_face, i_face+1, i_face+n_l, i_face+n_l+1, they must be in [0, n_z.n_l-1] + if (i_face+n_l+1 > n_z*n_l-1) continue; - nbHexa++; + nbHexa++; - /* Vertices of the current hexaedre, in [0..nbPts[ - Some of these value might exceed nbPts, they correspond to points that wont carry any unknown - and will be skipped during the loop on the unknowns below. + /* Vertices of the current hexaedre, in [0..nbPts[ */ - int i_vert[8]; - i_vert[0] = i_helix * n_z*n_l + i_face; // actually it is i_hexa - i_vert[1] = i_helix * n_z*n_l + i_face+1; - i_vert[2] = i_helix * n_z*n_l + i_face+n_l; - i_vert[3] = i_helix * n_z*n_l + i_face+n_l+1; - i_vert[4] = (i_helix+1) * n_z*n_l + i_face; - i_vert[5] = (i_helix+1) * n_z*n_l + i_face+1; - i_vert[6] = (i_helix+1) * n_z*n_l + i_face+n_l; - i_vert[7] = (i_helix+1) * n_z*n_l + i_face+n_l+1; - - /* Loop on the tetraedron inside this hexaedra (5 tetra per hexa, defTetra[i] gives the vertices + int i_vert[8]; + i_vert[0] = i_helix * n_z*n_l + i_face; // actually it is i_hexa + i_vert[1] = i_helix * n_z*n_l + i_face+1; + i_vert[2] = i_helix * n_z*n_l + i_face+n_l; + i_vert[3] = i_helix * n_z*n_l + i_face+n_l+1; + i_vert[4] = (i_helix+1) * n_z*n_l + i_face; + i_vert[5] = (i_helix+1) * n_z*n_l + i_face+1; + i_vert[6] = (i_helix+1) * n_z*n_l + i_face+n_l; + i_vert[7] = (i_helix+1) * n_z*n_l + i_face+n_l+1; + + /* Loop on the tetraedron inside this hexaedra (5 tetra per hexa, defTetra[i] gives the vertices composing each of them) */ - int i_tetra; - for (i_tetra=0 ; i_tetra<5 ; i_tetra++) { - int i_vert_t[4]; // vertices of the current tetraedron, in [0..nbPts[ - int i, j; - for (i=0 ; i<4 ; i++) - i_vert_t[i] = i_vert[defTetra[i_tetra][i]]; - - // Loop on the unkowns (=vertices) in this tetrahedron - // Each unknown interacts with the other unknown in the same tetrahedron - // Value of the interaction is 0.1 for self, 0.05 otherwise - for (i=0 ; i<4 ; i++) - if (i_vert_t[i] >= 0 && i_vert_t[i] < nbPts) - for (j=0 ; j<4 ; j++) - if (i_vert_t[j] >= 0 && i_vert_t[j] < nbPts) - addValue(i_vert_t[i], i_vert_t[j], i==j ? 0.1 : 0.05); - - if (rank==0 && writeMesh) - fprintf(fvtk, "%d %d %d %d\n", i_vert_t[0], i_vert_t[1], i_vert_t[2], i_vert_t[3]); - + int i_tetra; + for (i_tetra=0 ; i_tetra<5 ; i_tetra++) { + int i_vert_t[4]; // vertices of the current tetraedron, in [0..nbPts[ + int i, j; + for (i=0 ; i<4 ; i++) + i_vert_t[i] = i_vert[defTetra[i_tetra][i]]; + + // Loop on the unkowns (=vertices) in this tetrahedron + // Each unknown interacts with the other unknown in the same tetrahedron + // Value of the interaction is 0.1 for self, 0.05 otherwise + for (i=0 ; i<4 ; i++) + if (i_vert_t[i] >= 0 && i_vert_t[i] < nbPts) + for (j=0 ; j<4 ; j++) + if (i_vert_t[j] >= 0 && i_vert_t[j] < nbPts) + addValue(i_vert_t[i], i_vert_t[j], i==j ? 0.1 : 0.05); + + if (writeMesh) + fprintf(fvtk, "%d %d %d %d\n", i_vert_t[0], i_vert_t[1], i_vert_t[2], i_vert_t[3]); + if (writeMeshUNV) { + fprintf(funv, "%10d%s\n", ++nbTetra, sELT_TETRA4_DESC ) ; + fprintf(funv, "%10d%10d%10d%10d\n", i_vert_t[0]+1, i_vert_t[1]+1, i_vert_t[2]+1, i_vert_t[3]+1); + } + } } - } - if (rank==0 && writeMesh) { - // Check that the number of hexas created matches the one computed at the beginning and written in the vtk header - ASSERTQ(nbHexa == n_r*n_l*n_z-n_l*n_z-(n_r-1)*(n_l+1)); - fprintf(fvtk, "\t\t\t\t</DataArray>\n"); - /* Ecriture des offsets */ - fprintf(fvtk, "\t\t\t\t<DataArray " - "type=\"Int32\" Name=\"offsets\" format=\"ascii\">\n"); - for (iVtk = 0; iVtk < nbHexa*5; iVtk++) { - fprintf(fvtk, "%d ", iVtk*4+4); - if (iVtk % 10 == 9) - fprintf(fvtk, "\n"); + if (writeMesh) { + // Check that the number of hexas created matches the one computed at the beginning and written in the vtk header + ASSERTQ(nbHexa == n_r*n_l*n_z-n_l*n_z-(n_r-1)*(n_l+1)); + fprintf(fvtk, "\t\t\t\t</DataArray>\n"); + /* Ecriture des offsets */ + fprintf(fvtk, "\t\t\t\t<DataArray " + "type=\"Int32\" Name=\"offsets\" format=\"ascii\">\n"); + for (iVtk = 0; iVtk < nbHexa*5; iVtk++) { + fprintf(fvtk, "%d ", iVtk*4+4); + if (iVtk % 10 == 9) + fprintf(fvtk, "\n"); + } + fprintf(fvtk, "\t\t\t\t</DataArray>\n"); + + /* Ecriture des types d'elements */ + fprintf(fvtk, "\t\t\t\t<DataArray " + "type=\"UInt8\" Name=\"types\" format=\"ascii\">\n"); + for (iVtk = 0; iVtk < nbHexa*5; iVtk++) { + fprintf(fvtk, "%d ", 10); // For VTK_TETRA + if (iVtk % 10 == 9) + fprintf(fvtk, "\n"); + } + fprintf(fvtk, "\t\t\t\t</DataArray>\n"); + fprintf(fvtk, "\t\t\t</Cells>\n"); + fprintf(fvtk, "\t\t</Piece>\n\t</UnstructuredGrid>\n</VTKFile>\n"); + fclose(fvtk); + Mpf_printf(MPI_COMM_WORLD, "Done writing 'mesh.vtu'\n") ; } - fprintf(fvtk, "\t\t\t\t</DataArray>\n"); - - /* Ecriture des types d'elements */ - fprintf(fvtk, "\t\t\t\t<DataArray " - "type=\"UInt8\" Name=\"types\" format=\"ascii\">\n"); - for (iVtk = 0; iVtk < nbHexa*5; iVtk++) { - fprintf(fvtk, "%d ", 10); // For VTK_TETRA - if (iVtk % 10 == 9) - fprintf(fvtk, "\n"); + if (writeMeshUNV) { + // Add the triangles on the outer helix + for (int i_face=0 ; i_face<n_z*n_l ; i_face++) { + // the outer face is based on points i_face, i_face+1, i_face+n_l, i_face+n_l+1, they must be in [0, n_z.n_l-1] + if (i_face+n_l+1 > n_z*n_l-1) continue; + + /* Vertices of the current outer triangles in [0..n_z*n_l[ + */ + int i_vert[4]; + i_vert[0] = i_face; + i_vert[1] = i_face+1; + i_vert[2] = i_face+n_l; + i_vert[3] = i_face+n_l+1; + // Add the 2 triangles using vertices 0,1,2 and 2,1,3 (inward normal) + fprintf(funv, "%10d%s\n", ++nbTria + nbTetra, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", i_vert[0]+1, i_vert[1]+1, i_vert[2]+1); + fprintf(funv, "%10d%s\n", ++nbTria + nbTetra, sELT_TRIA3_DESC ) ; + fprintf(funv, "%10d%10d%10d\n", i_vert[2]+1, i_vert[1]+1, i_vert[3]+1); + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + /* ------ */ + /* Groups */ + /* ------ */ + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fprintf(funv, "%s\n", sGRP_UNV_ID ); + /* Group V_D1 for all the tetrahedra */ + fprintf(funv, "%10d%10d%10d%10d%10d%10d%10d%10d\n", 1, 0, 0, 0, 0, 0, 0, nbTetra) ; + fprintf(funv, "V_D1\n") ; + for (int i=1 ; i<=nbTetra ; i++) { + fprintf(funv, "%10d%10d%10d%10d", 8, i, 0, 0) ; + if ( (i%2==0) || (i==nbTetra) ) + fprintf(funv, "\n") ; + } + /* Group I_D1_EXT for all the triangles */ + fprintf(funv, "%10d%10d%10d%10d%10d%10d%10d%10d\n", 1, 0, 0, 0, 0, 0, 0, nbTria) ; + fprintf(funv, "I_D1_EXT\n") ; + for (int i=1 ; i<=nbTria ; i++) { + fprintf(funv, "%10d%10d%10d%10d", 8, i+nbTetra, 0, 0) ; + if ( (i%2==0) || (i==nbTria) ) + fprintf(funv, "\n") ; + } + fprintf(funv, "%s\n",sUNV_SEPARATOR ) ; + fclose(funv); + Mpf_printf(MPI_COMM_WORLD, "Done.\n") ; } - fprintf(fvtk, "\t\t\t\t</DataArray>\n"); - fprintf(fvtk, "\t\t\t</Cells>\n"); - fprintf(fvtk, "\t\t</Piece>\n\t</UnstructuredGrid>\n</VTKFile>\n"); - fclose(fvtk); + + Mpf_printf(MPI_COMM_WORLD, "Number of hexaedres = %d\n", nbHexa); + Mpf_printf(MPI_COMM_WORLD, "Number of NNZ before sort = %zu\n", nnz_FEM); + // Passer en multithread ? + ierr=Z_sortAndCompactL(z_matComp_FEM, &nnz_FEM) ; CHKERRQ(ierr) ; + z_matComp_FEM=(ZnonZero*)MpfRealloc(z_matComp_FEM,nnz_FEM*sizeof(ZnonZero)); CHKPTRQ(z_matComp_FEM) ; + Mpf_printf(MPI_COMM_WORLD, "Number of NNZ after sort = %zu\n", nnz_FEM); } - Mpf_printf(MPI_COMM_WORLD, "Number of hexaedres = %d\n", nbHexa); - Mpf_printf(MPI_COMM_WORLD, "Number of NNZ before sort = %zu\n", nnz_FEM); - // Passer en multithread ? - ierr=Z_sortAndCompactL(z_matComp_FEM, &nnz_FEM) ; CHKERRQ(ierr) ; - z_matComp_FEM=(ZnonZero*)MpfRealloc(z_matComp_FEM,nnz_FEM*sizeof(ZnonZero)); CHKPTRQ(z_matComp_FEM) ; - Mpf_printf(MPI_COMM_WORLD, "Number of NNZ after sort = %zu\n", nnz_FEM); + // we broadcast the mesh in MPI : z_matComp_FEM, nnz_FEM. + ierr = MPI_Bcast(&nnz_FEM, sizeof(size_t), MPI_BYTE, 0, MPI_COMM_WORLD); CHKERRQ(ierr); + if (rank) { + z_matComp_FEM=(ZnonZero*)MpfRealloc(z_matComp_FEM,nnz_FEM*sizeof(ZnonZero)); CHKPTRQ(z_matComp_FEM) ; + } + for (size_t off=0 ; off<nnz_FEM*sizeof(ZnonZero) ; off += 1e9) { + ierr = MPI_Bcast((char*)z_matComp_FEM + off, MIN(nnz_FEM*sizeof(ZnonZero)-off, 1e9), MPI_BYTE, 0, MPI_COMM_WORLD); CHKERRQ(ierr); + } /* Store in ptr_ordered_FEM the beginning of each line in z_matComp */ - ptr_ordered_FEM = (size_t*)MpfCalloc(nbPts+1,sizeof(size_t)); CHKPTRQ(ptr_ordered_FEM); - + ptr_ordered_FEM = MpfCalloc(nbPts+1,sizeof(size_t)); CHKPTRQ(ptr_ordered_FEM); size_t i, k=0 ; for (i=0;i<nnz_FEM;i++) while (k <= z_matComp_FEM[i].i ) { @@ -268,12 +364,15 @@ int createFemMatrix() { k++ ; } + // In case we pass a second time in this routine + writeMesh = 0; + writeMeshUNV = 0; + return 0; } -/* ============================================================================================================ */ int computeKernelFEM(int i, int j, Z_type *kernel) { - ZnonZero z_cle={i, j, {0.}}; + ZnonZero z_cle={i, j, { 0.}}; ASSERTA(z_matComp_FEM); // Line 'i' begins at position ptr_ordered_FEM[i] and has size ptr_ordered_FEM[i+1]-ptr_ordered_FEM[i] ZnonZero *z_res = (ZnonZero*)bsearch(&z_cle, z_matComp_FEM+ptr_ordered_FEM[i], ptr_ordered_FEM[i+1]-ptr_ordered_FEM[i], sizeof(ZnonZero), compare_ZnonZero_Line); @@ -284,7 +383,6 @@ int computeKernelFEM(int i, int j, Z_type *kernel) { return 0; } -/* ============================================================================================================ */ int produitClassiqueFEM(void *sol, MPI_Comm myComm) { int ierr, nbProcs, myProc; size_t l, jg; @@ -309,13 +407,6 @@ int produitClassiqueFEM(void *sol, MPI_Comm myComm) { size_t i, j, k; Z_type Aij; -#ifdef _OPENMP - if (omp_get_thread_num()==0) -#endif - { - ierr=Mpf_progressBar(myComm, (int)(jg/100)+1, (int)(nnz_FEM/100)+2) ; - } - if (z_matComp_FEM[l].j % sparseRHS == 0) { Aij=z_matComp_FEM[l].v; i=z_matComp_FEM[l].i; @@ -323,18 +414,25 @@ int produitClassiqueFEM(void *sol, MPI_Comm myComm) { for (k=0 ; k<nbRHS ; k++) { if (complexALGO) { #pragma omp atomic - z_sol[i+k*nbPts].r += Aij.r * z_rhs[j+k*nbPts].r - Aij.i * z_rhs[j+k*nbPts].i ; + z_sol[(size_t)i+k*nbPts].r += Aij.r * z_rhs[(size_t)j+k*nbPts].r - Aij.i * z_rhs[(size_t)j+k*nbPts].i ; #pragma omp atomic - z_sol[i+k*nbPts].i += Aij.r * z_rhs[j+k*nbPts].i + Aij.i * z_rhs[j+k*nbPts].r ; + z_sol[(size_t)i+k*nbPts].i += Aij.r * z_rhs[(size_t)j+k*nbPts].i + Aij.i * z_rhs[(size_t)j+k*nbPts].r ; } else { #pragma omp atomic - d_sol[i+k*nbPts] += Aij.r * d_rhs[j+k*nbPts] ; + d_sol[(size_t)i+k*nbPts] += Aij.r * d_rhs[(size_t)j+k*nbPts] ; } } /* for (k=0 ... */ } /* if (z_matComp[l].j ... */ -#pragma omp atomic - jg++ ; + size_t jg_bak; +#pragma omp atomic capture + jg_bak = ++jg ; +#ifdef _OPENMP + if (omp_get_thread_num()==0) +#endif + { + ierr=Mpf_progressBar(myComm, (int)(jg_bak/100)+1, (int)(nnz_FEM/100)+2) ; + } } ierr=Mpf_progressBar(myComm, (int)(nnz_FEM/100)+2, (int)(nnz_FEM/100)+2) ; @@ -343,7 +441,7 @@ int produitClassiqueFEM(void *sol, MPI_Comm myComm) { return 0; } -/* ============================================================================================================ */ + /*! \brief Computes a block for the matrix of FEM interactions. Row and column indices are 1-based and included. @@ -456,7 +554,7 @@ void computeSparseBlockFEMBEM ( int *LigInf, int *LigSup, int *ColInf, int *ColS for (row=LigInf_g-1 ; row<=LigSup_g-1 && row<nbPtsBEM; row++) { ierr=computeCoordCylinder(row, &(coord_i[0])); CHKERRA(ierr); for (col=ColInf_g-1 ; col<=ColSup_g-1 && col<nbPtsBEM; col++) { - Z_type Aij=Mpf_z_zero; + double _Complex Aij = 0; ierr=computeCoordCylinder(col, &(coord_j[0])); CHKERRA(ierr); ierr=computeKernelBEM(&(coord_i[0]), &(coord_j[0]), row==col, &Aij); CHKERRA(ierr); @@ -467,26 +565,25 @@ void computeSparseBlockFEMBEM ( int *LigInf, int *LigSup, int *ColInf, int *ColS case SIMPLE_PRECISION: s_matComp->i=i; s_matComp->j=j; - s_matComp->v=(S_type)(Aij.r); + s_matComp->v = Aij; s_matComp++; break; case DOUBLE_PRECISION: d_matComp->i=i; d_matComp->j=j; - d_matComp->v=(D_type)(Aij.r); + d_matComp->v = Aij; d_matComp++; break; case SIMPLE_COMPLEX: c_matComp->i=i; c_matComp->j=j; - c_matComp->v.r=(S_type)(Aij.r); - c_matComp->v.i=(S_type)(Aij.i); + c_matComp->cv = Aij; c_matComp++; break; case DOUBLE_COMPLEX: z_matComp->i=i; z_matComp->j=j; - z_matComp->v=Aij; + z_matComp->cv = Aij; z_matComp++; break; default: diff --git a/src/prepareTEST.c b/src/prepareTEST.c index b11142f271d2505cec81774349b22fc2239bc951..4c7950d72e2a3005d039fb5eb1b3f585fd4ca793 100644 --- a/src/prepareTEST.c +++ b/src/prepareTEST.c @@ -21,7 +21,7 @@ int prepareTEST(void) { /* Cree le second membre du produit mat-vec */ Mpf_printf(MPI_COMM_WORLD, "Computing RHS...\n") ; - rhs=(void*)MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(rhs) ; + rhs = MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(rhs) ; ierr = computeRhs() ; CHKERRQ(ierr) ; if (verboseTest) { ierr = displayArray("rhs", rhs, nbPts, nbRHS) ; CHKERRQ(ierr) ; @@ -43,9 +43,9 @@ int displayArray(char *s, void *f, int m, int n) { for (j=0 ; j<n ; j++) for (i=0 ; i<m ; i++) if (complexALGO) - Mpf_printf(MPI_COMM_WORLD, "%s[%d, %d]=(%e, %e)\n", s, i, j, z_f[i+j*m].r, z_f[i+j*m].i) ; + Mpf_printf(MPI_COMM_WORLD, "%s[%d, %d]=(%e, %e)\n", s, i, j, z_f[(size_t)j*m+i].r, z_f[(size_t)j*m+i].i) ; else - Mpf_printf(MPI_COMM_WORLD, "%s[%d, %d]=%e\n", s, i, j, d_f[i+j*m]) ; + Mpf_printf(MPI_COMM_WORLD, "%s[%d, %d]=%e\n", s, i, j, d_f[(size_t)j*m+i]) ; return 0 ; } diff --git a/src/rhs.c b/src/rhs.c index b2c2a60f3d5e29b3b02f3656aaa01a906714ce89..7b9716ad077a393be98d1146eb8f4762b2b25a4f 100644 --- a/src/rhs.c +++ b/src/rhs.c @@ -5,7 +5,7 @@ extern double height; int computeRhs(void) { double coord[3], OP[3] ; int i, j, ierr ; - Z_type *z_rhs=rhs ; + double complex *z_rhs=rhs ; D_type *d_rhs=rhs ; double small = simplePrec ? 1.e-20 : 1.e-100; double k=2.*M_PI/lambda; @@ -19,21 +19,21 @@ int computeRhs(void) { if (newRHS) { if (complexALGO) { // OP = fake incident plane wave coming from direction rotating around the object - ierr = computeCoord( j*nbPts/nbRHS%nbPts , &(OP[0])) ; CHKERRQ(ierr); + ierr = computeCoord( (size_t)j*nbPts/nbRHS%nbPts , &(OP[0])) ; CHKERRQ(ierr); OP[2] -= height/2.; // to have plane waves coming from below too - double k_ps = k*(coord[0]*OP[0]+coord[1]*OP[1]+coord[2]*OP[2]); - z_rhs[i+j*nbPts].r=cos(k_ps); - z_rhs[i+j*nbPts].i=sin(k_ps); + z_rhs[(size_t)j * nbPts + i] = cexp( + I * k * (coord[0] * OP[0] + coord[1] * OP[1] + coord[2] * OP[2])); } else { - d_rhs[i+j*nbPts]=cos(coord[0]/(j+1)+coord[1]+sin(j)*coord[2])+sin(j) ; + d_rhs[(size_t)j*nbPts+i]=cos(coord[0]/(j+1)+coord[1]+sin(j)*coord[2])+sin(j) ; } } else { // Old way of computing RHS. They were actually always rank 2! Not suitable to really test mat-rhs... if (complexALGO) { - z_rhs[i+j*nbPts].r=cos(coord[0]+coord[1]+coord[2])+sin(j) ; - z_rhs[i+j*nbPts].i=coord[0]-coord[1]+coord[2]-cos(j) ; + z_rhs[(size_t)j * nbPts + i] = + cos(coord[0] + coord[1] + coord[2]) + sin(j) + + I * (coord[0] - coord[1] + coord[2] - cos(j)); } else { - d_rhs[i+j*nbPts]=cos(coord[0]+coord[1]+coord[2])+sin(j) ; + d_rhs[(size_t)j*nbPts+i]=cos(coord[0]+coord[1]+coord[2])+sin(j) ; } } } @@ -42,14 +42,13 @@ int computeRhs(void) { // instead of zero, otherwise run-time optimisations produce unrealistic timers with FMM) for (j=0 ; j<nbRHS ; j++) { if (complexALGO) { - z_rhs[i+j*nbPts].r=small ; - z_rhs[i+j*nbPts].i=small ; + z_rhs[(size_t)j * nbPts + i] = small + I * small; } else { - d_rhs[i+j*nbPts]=small ; + d_rhs[(size_t)j*nbPts+i]=small ; } } } - + return 0 ; } @@ -57,7 +56,8 @@ void computeDenseBlockData (int *LigInf, int *LigSup, int *ColInf, int *ColSup, int tailleL__ ; int iloc_____ ; int jloc_____ ; - int il,ic,inxBloc, inxGlob ; + int il,ic,inxBloc; + size_t inxGlob ; S_type *s_bloc=bloc ; D_type *d_bloc=bloc ; C_type *c_bloc=bloc ; @@ -74,7 +74,7 @@ void computeDenseBlockData (int *LigInf, int *LigSup, int *ColInf, int *ColSup, for (ic = ColInf[0]-1; ic < ColSup[0] ; ic++) { for (il = LigInf[0]-1; il < LigSup[0] ; il++) { inxBloc = tailleL__*(ic-ColInf[0]+1) + il-LigInf[0]+1 + iloc_____ + jloc_____ * tailleL__ ; - inxGlob = (ic + myCtx->colOffset) * nbPts + (il + myCtx->rowOffset); + inxGlob = (size_t)(ic + myCtx->colOffset) * nbPts + (il + myCtx->rowOffset); switch (stype) { case SIMPLE_PRECISION: diff --git a/src/testCHAMELEON.c b/src/testCHAMELEON.c index 6314823cbf17811a5494ada5ab3f42bbac67990e..cd0c8588543a048d74b94b29819b123d9ac675b9 100644 --- a/src/testCHAMELEON.c +++ b/src/testCHAMELEON.c @@ -1,25 +1,22 @@ #include "main.h" -/* ============================================================================================================ */ /*! \brief Runs the test using CHAMELEON solver */ -int testCHAMELEON(void) { +int testCHAMELEON(double * relative_error) { #ifdef HAVE_CHAMELEON int ierr; - double temps_initial, temps_final, temps_cpu, eps; + double temps_initial, temps_final, temps_cpu; size_t vectorSize = (size_t)nbPts * nbRHS * (complexALGO ? 2 : 1); - /* ===================================================================================== */ /* Realise le produit matrice vecteur classique : A.rhs -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing Classical product...\n") ; void *solCLA=NULL; ierr = produitClassique(&solCLA, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; - /* ===================================================================================== */ /* Cree la Matrice CHAMELEON */ - /* ===================================================================================== */ - testFEMBEM_initChameleon( stype ); + + testFEMBEM_initChameleon( (cham_flttype_t)stype ); /* Get the default NB parameter */ int NB ; @@ -39,7 +36,7 @@ int testCHAMELEON(void) { temps_final = getTime (); temps_cpu = temps_final - temps_initial ; if ( verboseTest ) { - void *lapackA = (void*)MpfCalloc( nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); + void *lapackA = MpfCalloc( (size_t)nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); ierr = CHAMELEON_Desc2Lap( ChamUpperLower, descA, lapackA, nbPts ); CHKERRQ(ierr); ierr = displayArray("frA", lapackA, nbPts, nbPts) ; CHKERRQ(ierr) ; @@ -48,15 +45,13 @@ int testCHAMELEON(void) { } Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuCHAMELEONCreation = %f \n", temps_cpu) ; - /* ===================================================================================== */ /* Switch on the various CHAMELEON tests */ - /* ===================================================================================== */ + switch(testedAlgo) { case _gemvCHAMELEON: - /* ===================================================================================== */ /* Appelle le produit mat.vec : A.rhs -> solCHAM */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing CHAMELEON product...\n") ; temps_initial = getTime (); @@ -91,7 +86,7 @@ int testCHAMELEON(void) { temps_cpu = temps_final - temps_initial ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuGEMVCHAMELEON = %f \n", temps_cpu) ; - void *solCHAM = (void*)MpfCalloc( vectorSize, sizeof(D_type) ); CHKPTRQ(solCHAM); + void *solCHAM = MpfCalloc( vectorSize, sizeof(D_type) ); CHKPTRQ(solCHAM); /* convert back to lapack format */ ierr = CHAMELEON_Desc2Lap( ChamUpperLower, descY, solCHAM, nbPts); CHKERRQ(ierr); @@ -99,6 +94,7 @@ int testCHAMELEON(void) { ierr = CHAMELEON_Desc_Destroy (&descX); CHKERRQ(ierr); ierr = CHAMELEON_Desc_Destroy (&descY); CHKERRQ(ierr); + ASSERTQ(nbPts < INT_MAX/nbRHS); ierr = MPI_Bcast(solCHAM, nbRHS*nbPts, Mpf_mpitype[stype], 0, MPI_COMM_WORLD ); CHKERRQ(ierr); if (simplePrec) { @@ -111,17 +107,18 @@ int testCHAMELEON(void) { } /* Compare the two vectors solCLA and solCHAM */ Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr = computeRelativeError( solCHAM, solCLA, &eps); CHKERRQ(ierr); + ierr = computeRelativeError( solCHAM, solCLA, relative_error); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); - if (solCHAM) MpfFree(solCHAM) ; solCHAM=NULL ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); + if (solCHAM) MpfFree(solCHAM) ; + solCHAM=NULL ; break; case _solveCHAMELEON: - /* ===================================================================================== */ + /* Factorize the CHAMELEON Matrix */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n\n**** Factorizing CHAMELEON Mat...\n") ; if ( generate_traces ) { CHAMELEON_Enable( CHAMELEON_PROFILING_MODE ); @@ -138,7 +135,7 @@ int testCHAMELEON(void) { CHAMELEON_Disable( CHAMELEON_PROFILING_MODE ); } if ( verboseTest ) { - void *lapackA = (void*)MpfCalloc( nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); + void *lapackA = MpfCalloc( (size_t)nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); ierr = CHAMELEON_Desc2Lap( ChamUpperLower, descA, lapackA, nbPts ); CHKERRQ(ierr); ierr = displayArray("chamA", lapackA, nbPts, nbPts) ; CHKERRQ(ierr) ; @@ -171,6 +168,7 @@ int testCHAMELEON(void) { /* Convert back to lapack */ ierr = CHAMELEON_Desc2Lap( ChamUpperLower, descB, solCLA, nbPts); CHKERRQ(ierr); + ASSERTQ(nbPts < INT_MAX/nbRHS); ierr = MPI_Bcast(solCLA, nbRHS*nbPts, Mpf_mpitype[stype], 0, MPI_COMM_WORLD ); CHKERRQ(ierr); if (simplePrec) simpleToDouble(solCLA, vectorSize); @@ -183,19 +181,18 @@ int testCHAMELEON(void) { /* Compare the two vectors solCLA and rhs */ Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solCLA, rhs, &eps) ; CHKERRQ(ierr) ; - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); + ierr=computeRelativeError(solCLA, rhs, relative_error) ; CHKERRQ(ierr) ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); break ; default: break; } - /* ===================================================================================== */ /* Finish execution */ - /* ===================================================================================== */ ierr = CHAMELEON_Desc_Destroy (&descA); CHKERRQ(ierr); - if (solCLA) MpfFree(solCLA) ; solCLA=NULL ; + if (solCLA) MpfFree(solCLA) ; + solCLA=NULL ; #else Mpf_printf(MPI_COMM_WORLD, "No CHAMELEON support.\n") ; @@ -203,4 +200,3 @@ int testCHAMELEON(void) { return 0; } -/* ============================================================================================================ */ diff --git a/src/testHCHAMELEON.c b/src/testHCHAMELEON.c index cfc81db79394ff25edf47490574e233ab9fc2cb6..2baefafc1c14d0302423e0c6cc6e14b041700628 100644 --- a/src/testHCHAMELEON.c +++ b/src/testHCHAMELEON.c @@ -13,10 +13,10 @@ int get_worker_id( void ) /*! \brief Runs the test of H-chameleon matrices : gemv, solve */ -int testHCHAMELEON(void) { +int testHCHAMELEON(double * relative_error) { #if defined(HAVE_CHAMELEON) && defined(CHAMELEON_USE_HMAT) int ierr, rank, NB, np, dims[2] = { 0, 0 }; - double temps_initial, temps_final, temps_cpu, eps; + double temps_initial, temps_final, temps_cpu; size_t vectorSize = (size_t)nbPts * nbRHS * (complexALGO ? 2 : 1); Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> HAcaAccuracy = %.4e \n", mpf_hmat_settings.acaEpsilon) ; @@ -27,9 +27,8 @@ int testHCHAMELEON(void) { ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); CHKERRQ(ierr); - /* ===================================================================================== */ /* Realise le produit matrice vecteur classique : A.rhs -> solCLA */ - /* ===================================================================================== */ + void *solCLA = NULL; if ( check_result ) { Mpf_printf(MPI_COMM_WORLD, "\n**** Computing Classical product...\n") ; @@ -37,10 +36,9 @@ int testHCHAMELEON(void) { ierr = produitClassique(&solCLA, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; } - /* ===================================================================================== */ /* Cree la H-Matrice */ - /* ===================================================================================== */ - testFEMBEM_initChameleon( stype ); + + testFEMBEM_initChameleon( (cham_flttype_t)stype ); CHAMELEON_Enable( CHAMELEON_GENERIC ); // Summa is using lacpy that is not supported for hmat yet /* Get the default NB parameter */ @@ -86,7 +84,7 @@ int testHCHAMELEON(void) { if ( verboseTest ) { CHAM_desc_t *frA = HCHAMELEON_uncompress_matrix( &hdescA ); - void *lapackA = (void*)MpfCalloc( nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); + void *lapackA = MpfCalloc( (size_t)nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); ierr = CHAMELEON_Desc2Lap( ChamUpperLower, frA, lapackA, nbPts ); CHKERRQ(ierr); ierr = displayArray("frA", lapackA, nbPts, nbPts) ; CHKERRQ(ierr) ; @@ -111,14 +109,10 @@ int testHCHAMELEON(void) { Mpf_printf(MPI_COMM_WORLD, "Ratio: %g\n", (double)info.compressed_size/info.uncompressed_size); } /* Release the FEM matrix (can be quite large) */ - if (z_matComp_FEM) MpfFree(z_matComp_FEM); z_matComp_FEM=NULL; - if (ptr_ordered_FEM) MpfFree(ptr_ordered_FEM); ptr_ordered_FEM=NULL; - nnz_FEM=0; - nbEl_FEM=0; + ierr=freeFemMatrix();CHKERRQ(ierr); - /* ===================================================================================== */ /* Switch on the various H-CHAMELEON tests */ - /* ===================================================================================== */ + switch(testedAlgo) { case _gemmHCHAMELEON: { @@ -154,15 +148,14 @@ int testHCHAMELEON(void) { case _gemvHCHAMELEON: - /* ===================================================================================== */ /* Appelle le produit mat.vec : A.rhs -> solCHAM */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing H-CHAMELEON product...\n") ; void *solCHAM = NULL; temps_initial = getTime (); if ( rank == 0 ) { - solCHAM = (void*)MpfCalloc( vectorSize, sizeof(D_type) ); CHKPTRQ(solCHAM); + solCHAM = MpfCalloc( vectorSize, sizeof(D_type) ); CHKPTRQ(solCHAM); if ( simplePrec ) { doubleToSimple( rhs, vectorSize ); @@ -227,22 +220,22 @@ int testHCHAMELEON(void) { ierr = displayArray("solCHAM", solCHAM, nbPts, nbRHS) ; CHKERRQ(ierr) ; } - /* ===================================================================================== */ /* Compare les 2 produits matrice-vecteur */ - /* ===================================================================================== */ + if ( check_result ) { Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr = computeRelativeError( solCHAM, solCLA, &eps); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); + ierr = computeRelativeError( solCHAM, solCLA, relative_error); CHKERRQ(ierr); + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); } - if ( solCHAM ) MpfFree( solCHAM ); solCHAM = NULL; + if ( solCHAM ) MpfFree( solCHAM ); + solCHAM = NULL; break; case _solveHCHAMELEON: - /* ===================================================================================== */ + /* Factorize the H-matrix */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n\n**** Factorizing H-CHAMELEON...\n") ; if ( generate_traces ) { CHAMELEON_Enable( CHAMELEON_PROFILING_MODE ); @@ -262,7 +255,7 @@ int testHCHAMELEON(void) { if ( verboseTest ) { CHAM_desc_t *frA = HCHAMELEON_uncompress_matrix( &hdescA ); - void *lapackA = (void*)MpfCalloc( nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); + void *lapackA = MpfCalloc( (size_t)nbPts * nbPts, sizeof(D_type) ) ; CHKPTRQ(lapackA); ierr = CHAMELEON_Desc2Lap( ChamUpperLower, frA, lapackA, nbPts ); CHKERRQ(ierr); ierr = displayArray("hChamA", lapackA, nbPts, nbPts) ; CHKERRQ(ierr) ; @@ -287,9 +280,8 @@ int testHCHAMELEON(void) { Mpf_printf(MPI_COMM_WORLD, "Ratio: %g\n", (double)info.compressed_size/info.uncompressed_size); } - /* ===================================================================================== */ /* Solve the system : A-1.solCLA -> solCLA */ - /* ===================================================================================== */ + if ( check_result ) { Mpf_printf(MPI_COMM_WORLD, "\n**** Solve H-CHAMELEON\n") ; @@ -326,6 +318,7 @@ int testHCHAMELEON(void) { ierr = CHAMELEON_Desc2Lap( ChamUpperLower, descB, solCLA, nbPts); CHKERRQ(ierr); HCHAMELEON_lapmr( 'B', 'N', nbRHS, &hdescA, solCLA ); + ASSERTQ(nbPts < INT_MAX/nbRHS); ierr = MPI_Bcast(solCLA, nbRHS*nbPts, Mpf_mpitype[stype], 0, MPI_COMM_WORLD ); CHKERRQ(ierr); if ( simplePrec ) { simpleToDouble( solCLA, vectorSize ); @@ -339,21 +332,20 @@ int testHCHAMELEON(void) { /* Compare the two vectors solCLA and rhs */ Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solCLA, rhs, &eps) ; CHKERRQ(ierr) ; - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> ForwardError = %.4e \n", eps); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> ScaledForwardError = %.4e \n", eps / mpf_hmat_settings.acaEpsilon ); + ierr=computeRelativeError(solCLA, rhs, relative_error) ; CHKERRQ(ierr) ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> ForwardError = %.4e \n", *relative_error); + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> ScaledForwardError = %.4e \n", *relative_error / mpf_hmat_settings.acaEpsilon ); } break ; default: break; } - /* ===================================================================================== */ /* Finish execution */ - /* ===================================================================================== */ HCHAMELEON_destroy_matrix( &hdescA ); - if (solCLA) MpfFree(solCLA) ; solCLA=NULL ; + if (solCLA) MpfFree(solCLA) ; + solCLA=NULL ; hmat_tracing_dump("testHCHAMELEON_trace.json"); #else @@ -367,4 +359,3 @@ int testHCHAMELEON(void) { return 0; } -/* ============================================================================================================ */ diff --git a/src/testHLIBPRO.c b/src/testHLIBPRO.c index 8889554fedc3f2ed93ad71af5287423631a5ae60..d33c6406ae5fcff2a29e446ed3157799e04d5cf2 100644 --- a/src/testHLIBPRO.c +++ b/src/testHLIBPRO.c @@ -7,14 +7,13 @@ { char buf[1024]; hlib_error_desc( buf, 1024 ); \ printf( "\n%s\n\n", buf ); exit(1); } } -/* ============================================================================================================ */ static double assemblyEpsilon=1e-4; static double recompressionEpsilon=1e-4; static hlib_lrapx_t compressionMethod=HLIB_LRAPX_ACAPLUS; static int maxLeafSize=100; static hmat_factorization_t factorization_type=hmat_factorization_ldlt; static int nbCPUs=12; -/* ============================================================================================================ */ + static void ckernelHLIBpro ( const size_t n, const int *rowidx, const size_t m, const int *colidx, hlib_complex_t *matrix, void *user_context ) { size_t rowi, colj; @@ -26,7 +25,7 @@ static void ckernelHLIBpro ( const size_t n, const int *rowidx, const size_t m, } } } -/* ============================================================================================================ */ + static void kernelHLIBpro ( const size_t n, const int *rowidx, const size_t m, const int *colidx, hlib_real_t *matrix, void *user_context ) { size_t rowi, colj; @@ -38,7 +37,7 @@ static void kernelHLIBpro ( const size_t n, const int *rowidx, const size_t m, c } } } -/* ============================================================================================================ */ + /*! \brief Init parameters for HLIBpro solver */ int initHLIBPRO(int* argc, char*** argv) { @@ -94,12 +93,12 @@ int initHLIBPRO(int* argc, char*** argv) { return 0; } -/* ============================================================================================================ */ + /*! \brief Runs the test using HLIBPRO solver */ -int testHLIBPRO(void) { +int testHLIBPRO(double * relative_error) { int ierr, rank, sSize ; - double temps_initial, temps_final, temps_cpu, eps ; + double temps_initial, temps_final, temps_cpu; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> HAssemblyAccuracy = %.4e \n", assemblyEpsilon) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> HAssemblyRecompression = %.4e \n", recompressionEpsilon) ; @@ -116,16 +115,14 @@ int testHLIBPRO(void) { ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); CHKERRQ(ierr); - /* ===================================================================================== */ /* Realise le produit matrice vecteur classique : A.rhs -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing Classical product...\n") ; void *solCLA=NULL; ierr = produitClassique(&solCLA, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; - /* ===================================================================================== */ /* Cree la H-Matrice */ - /* ===================================================================================== */ + temps_initial = getTime (); Mpf_printf(MPI_COMM_WORLD, "\n**** Creating HLIBpro Matrix...\n") ; @@ -149,9 +146,9 @@ int testHLIBPRO(void) { double *points = testedModel==_bem ? createCylinder() : createPipe(); // HLIBpro demands an array of pointers for the vertices - double**vertices = (double**) MpfCalloc( (size_t)nbPts, sizeof(double*) ); CHKPTRQ(vertices); + double**vertices = MpfCalloc( (size_t)nbPts, sizeof(double*) ); CHKPTRQ(vertices); for ( i = 0; i < nbPts; i++ ) - vertices[i] = &points[3*i]; + vertices[i] = &points[(size_t)3*i]; hlib_coord_t coord = hlib_coord_import( (size_t)nbPts, 3, vertices, NULL, & info ); CHECK_INFO; hlib_clustertree_t ct = hlib_clt_build_bsp( coord, HLIB_BSP_CARD_MAX, maxLeafSize, & info ); CHECK_INFO; hlib_clt_print_ps( ct, "testHLIBpro_ct", & info ); CHECK_INFO; @@ -163,7 +160,7 @@ int testHLIBPRO(void) { Mpf_printf(MPI_COMM_WORLD, "HLIBpro: building BEM matrix using ACA+\n" ); hlib_acc_t acc = hlib_acc_fixed_eps( assemblyEpsilon ); - contextTestFEMBEM *myCtx = (contextTestFEMBEM*)MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRQ(myCtx); + contextTestFEMBEM *myCtx = MpfCalloc(1, sizeof(contextTestFEMBEM)) ; CHKPTRQ(myCtx); hlib_matrix_t hmatrix = stype==DOUBLE_COMPLEX ? hlib_matrix_build_ccoeff( bct, ckernelHLIBpro, (void*)myCtx, compressionMethod, acc, symMatSolver, & info ) : hlib_matrix_build_coeff( bct, kernelHLIBpro, (void*)myCtx, compressionMethod, acc, symMatSolver, & info ); CHECK_INFO; @@ -192,25 +189,22 @@ int testHLIBPRO(void) { hlib_vector_t vec_in = stype==DOUBLE_COMPLEX ? hlib_vector_cbuild( nbPts, & info ) : hlib_vector_build( nbPts, & info ); CHECK_INFO; hlib_vector_t vec_out= stype==DOUBLE_COMPLEX ? hlib_vector_cbuild( nbPts, & info ) : hlib_vector_build( nbPts, & info ); CHECK_INFO; - /* ===================================================================================== */ /* Switch on the various HLIBpro tests */ - /* ===================================================================================== */ + switch(testedAlgo) { case _gemvHLIBPRO: - /* ===================================================================================== */ /* Appelle le produit mat.vec : A.rhs -> solHLIBpro */ - /* ===================================================================================== */ Mpf_printf(MPI_COMM_WORLD, "\n**** Computing HLIBpro product...\n") ; - void *solHLIBpro=(void*)MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(solHLIBpro) ; + void *solHLIBpro = MpfCalloc((size_t)nbPts*nbRHS, complexALGO ? sizeof(Z_type) : sizeof(D_type)) ; CHKPTRQ(solHLIBpro) ; temps_initial = getTime (); for (i=0 ; i<nbRHS ; i++) { /* import the data into vec_in */ stype==DOUBLE_COMPLEX ? - hlib_vector_cimport( vec_in, (void*)((char*)rhs+i*nbPts*sSize), & info ) : - hlib_vector_import( vec_in, (void*)((char*)rhs+i*nbPts*sSize), & info ); CHECK_INFO; + hlib_vector_cimport( vec_in, (void*)((char*)rhs+(size_t)i*nbPts*sSize), & info ) : + hlib_vector_import( vec_in, (void*)((char*)rhs+(size_t)i*nbPts*sSize), & info ); CHECK_INFO; /* bring into H-ordering */ hlib_vector_permute( vec_in, hlib_clt_perm_e2i( ct, & info ), & info ); CHECK_INFO; /* Do the GEMV */ @@ -219,12 +213,13 @@ int testHLIBPRO(void) { hlib_vector_permute( vec_out, hlib_clt_perm_i2e( ct, & info ), & info ); CHECK_INFO; /* export the data out of vec_out */ stype==DOUBLE_COMPLEX ? - hlib_vector_cexport( vec_out, (void*)((char*)solHLIBpro+i*nbPts*sSize), & info ): - hlib_vector_export( vec_out, (void*)((char*)solHLIBpro+i*nbPts*sSize), & info ); CHECK_INFO; + hlib_vector_cexport( vec_out, (void*)((char*)solHLIBpro+(size_t)i*nbPts*sSize), & info ): + hlib_vector_export( vec_out, (void*)((char*)solHLIBpro+(size_t)i*nbPts*sSize), & info ); CHECK_INFO; } if (rank) { - if (solHLIBpro) MpfFree(solHLIBpro) ; solHLIBpro=NULL ; + if (solHLIBpro) MpfFree(solHLIBpro) ; + solHLIBpro=NULL ; } temps_final = getTime (); temps_cpu = (temps_final - temps_initial) ; @@ -233,19 +228,19 @@ int testHLIBPRO(void) { ierr = displayArray("solHLIBpro", solHLIBpro, nbPts, nbRHS) ; CHKERRQ(ierr) ; } - /* ===================================================================================== */ /* Compare les 2 produits matrice-vecteur */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solHLIBpro, solCLA, &eps) ; CHKERRQ(ierr) ; + ierr=computeRelativeError(solHLIBpro, solCLA, relative_error); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); - if (solHLIBpro) MpfFree(solHLIBpro) ; solHLIBpro=NULL ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); + if (solHLIBpro) MpfFree(solHLIBpro) ; + solHLIBpro=NULL ; break; case _solveHLIBPRO: - /* ===================================================================================== */ + /* Factorize the H-matrix */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Factorizing HLIBpro...\n") ; temps_initial = getTime (); @@ -264,7 +259,6 @@ int testHLIBPRO(void) { default:break; } - temps_final = getTime (); temps_cpu = (temps_final - temps_initial) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuFacto = %f \n", temps_cpu) ; @@ -284,17 +278,16 @@ int testHLIBPRO(void) { Mpf_printf(MPI_COMM_WORLD, "Uncompressed size: %ld\n", fullsize/sSize); Mpf_printf(MPI_COMM_WORLD, "Ratio: %g\n", (double)bytesize/fullsize); - /* ===================================================================================== */ /* Solve the system : A-1.solCLA -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Solve HLIBpro...\n") ; temps_initial = getTime (); for (i=0 ; i<nbRHS ; i++) { /* import the data into vec_in */ stype==DOUBLE_COMPLEX ? - hlib_vector_cimport( vec_in, (void*)((char*)solCLA+i*nbPts*sSize), & info ) : - hlib_vector_import( vec_in, (void*)((char*)solCLA+i*nbPts*sSize), & info ); CHECK_INFO; + hlib_vector_cimport( vec_in, (void*)((char*)solCLA+(size_t)i*nbPts*sSize), & info ) : + hlib_vector_import( vec_in, (void*)((char*)solCLA+(size_t)i*nbPts*sSize), & info ); CHECK_INFO; /* bring into H-ordering */ hlib_vector_permute( vec_in, hlib_clt_perm_e2i( ct, & info ), & info ); CHECK_INFO; /* Do the Solve */ @@ -304,21 +297,20 @@ int testHLIBPRO(void) { hlib_vector_permute( vec_out, hlib_clt_perm_i2e( ct, & info ), & info ); CHECK_INFO; /* export the data out of vec_out */ stype==DOUBLE_COMPLEX ? - hlib_vector_cexport( vec_out, (void*)((char*)solCLA+i*nbPts*sSize), & info ) : - hlib_vector_export( vec_out, (void*)((char*)solCLA+i*nbPts*sSize), & info ); CHECK_INFO; + hlib_vector_cexport( vec_out, (void*)((char*)solCLA+(size_t)i*nbPts*sSize), & info ) : + hlib_vector_export( vec_out, (void*)((char*)solCLA+(size_t)i*nbPts*sSize), & info ); CHECK_INFO; } temps_final = getTime (); temps_cpu = (temps_final - temps_initial) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuSolve = %f \n", temps_cpu) ; - /* ===================================================================================== */ /* Compare the two vectors solCLA and rhs */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solCLA, rhs, &eps) ; CHKERRQ(ierr) ; + ierr=computeRelativeError(solCLA, rhs, relative_error) ; CHKERRQ(ierr) ; - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); break; default: break; @@ -334,19 +326,21 @@ int testHLIBPRO(void) { MpfFree(points); points=NULL; MpfFree(vertices); vertices=NULL; - if (myCtx) MpfFree(myCtx) ; myCtx=NULL ; + if (myCtx) MpfFree(myCtx) ; + myCtx=NULL ; hlib_done( & info ); CHECK_INFO; return 0; } -/* ============================================================================================================ */ + #else int initHLIBPRO(int* argc, char*** argv) { return 0; } -int testHLIBPRO(void) { +int testHLIBPRO(double* relative_error) { Mpf_printf(MPI_COMM_WORLD, "No HLIBpro support.\n") ; + *relative_error = 0; return 0; } #endif diff --git a/src/testHMAT.c b/src/testHMAT.c index 39792b5cbd92d7ffcfd887a7100a61de303e05b8..b3ff1d9b0f4a6c1cc0fdb22ad3f9fe938e286960 100644 --- a/src/testHMAT.c +++ b/src/testHMAT.c @@ -2,11 +2,11 @@ /*! \brief Runs the test of H-matrices : gemv, solve */ -int testHMAT(void) { +int testHMAT(double * relative_error) { #ifdef HAVE_HMAT enter_context("testHMAT()"); int ierr, rank ; - double temps_initial, temps_final, temps_cpu, eps ; + double temps_initial, temps_final, temps_cpu; size_t vectorSize = (size_t)nbPts*nbRHS*(complexALGO?2:1); Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> HAcaAccuracy = %.4e \n", mpf_hmat_settings.acaEpsilon); @@ -17,16 +17,14 @@ int testHMAT(void) { ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); CHKERRQ(ierr); - /* ===================================================================================== */ /* Realise le produit matrice vecteur classique : A.rhs -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing Classical product...\n") ; void *solCLA=NULL; ierr = produitClassique(&solCLA, MPI_COMM_WORLD) ; CHKERRQ(ierr) ; - /* ===================================================================================== */ /* Cree la H-Matrice */ - /* ===================================================================================== */ + temps_initial = getTime (); Mpf_printf(MPI_COMM_WORLD, "\n**** Creating HMAT...\n") ; @@ -36,19 +34,28 @@ int testHMAT(void) { // hmat_factorization_none mean the user did not chose a factorization so we // must choose one for him if(mpf_hmat_settings.factorization_type == hmat_factorization_none) { - if(symMatSolver) - // use LDLT for real matrices - mpf_hmat_settings.factorization_type = complexALGO ? hmat_factorization_llt : hmat_factorization_ldlt; - else - mpf_hmat_settings.factorization_type = hmat_factorization_lu; + if (use_hodlr) { + if(symMatSolver) + mpf_hmat_settings.factorization_type = hmat_factorization_hodlrsym; + else + mpf_hmat_settings.factorization_type = hmat_factorization_hodlr; + } else { + if(symMatSolver) + // use LDLT for real matrices + mpf_hmat_settings.factorization_type = complexALGO ? hmat_factorization_llt : hmat_factorization_ldlt; + else + mpf_hmat_settings.factorization_type = hmat_factorization_lu; + } } - /* Check the coherency between the factorization chosen in MPF (with --hmat-lu/llt/ldlt) + /* Check the coherency between the factorization chosen in MPF (with --hmat-lu/llt/ldlt/hodlr/hodlr-sym) and the symmetry of the matrix (chosen with --sym/--nosym/--symcont) */ if (testedAlgo==_solveHMAT) switch(mpf_hmat_settings.factorization_type){ + case hmat_factorization_hodlrsym: case hmat_factorization_ldlt: case hmat_factorization_llt:ASSERTQ(symMatSolver); break; + // No 'hmat_factorization_hodlr' here, because it works on sym matrices case hmat_factorization_lu:ASSERTQ(!symMatSolver); break; default:break; } @@ -109,22 +116,17 @@ int testHMAT(void) { } /* Release the FEM matrix (can be quite large) */ - if (z_matComp_FEM) MpfFree(z_matComp_FEM); z_matComp_FEM=NULL; - if (ptr_ordered_FEM) MpfFree(ptr_ordered_FEM); ptr_ordered_FEM=NULL; - nnz_FEM=0; - nbEl_FEM=0; + ierr=freeFemMatrix();CHKERRQ(ierr); - /* ===================================================================================== */ /* Switch on the various HMAT tests */ - /* ===================================================================================== */ + switch(testedAlgo) { case _gemvHMAT: - /* ===================================================================================== */ /* Appelle le produit mat.vec : A.rhs -> solHMAT */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Computing HMAT product...\n") ; - void *solHMAT=(void*)MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solHMAT) ; + void *solHMAT = MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solHMAT) ; temps_initial = getTime (); if (simplePrec) @@ -139,7 +141,8 @@ int testHMAT(void) { } if (rank) { - if (solHMAT) MpfFree(solHMAT) ; solHMAT=NULL ; + if (solHMAT) MpfFree(solHMAT); + solHMAT=NULL; } temps_final = getTime (); temps_cpu = (temps_final - temps_initial) ; @@ -148,14 +151,14 @@ int testHMAT(void) { ierr = displayArray("solHMAT", solHMAT, nbPts, nbRHS) ; CHKERRQ(ierr) ; } - /* ===================================================================================== */ /* Compare les 2 produits matrice-vecteur */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solHMAT, solCLA, &eps) ; CHKERRQ(ierr) ; + ierr=computeRelativeError(solHMAT, solCLA, relative_error); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); - if (solHMAT) MpfFree(solHMAT) ; solHMAT=NULL ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); + if (solHMAT) MpfFree(solHMAT); + solHMAT=NULL ; break; case _solveHMAT: @@ -169,18 +172,17 @@ int testHMAT(void) { if (hmat_residual) { hmatrix_orig = hi->copy(hmatrix); // ideally, we do a copy with lower accuracy for factorisation /* Vector to duplicate solCLA */ - solCLA_orig=(void*)MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA_orig) ; + solCLA_orig = MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA_orig) ; /* Norm of the originaal RHS */ ierr = computeVecNorm(solCLA, &normOrig); CHKERRQ(ierr); } if (hmat_refine) { /* Vector to accumulate solCLA */ - solCLA_sum=(void*)MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA_sum) ; + solCLA_sum = MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA_sum) ; } - /* ===================================================================================== */ /* Factorize the H-matrix */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Factorizing HMAT...\n") ; temps_initial = getTime (); @@ -189,7 +191,7 @@ int testHMAT(void) { ctx.progress = &mpf_hmat_settings.progress; ctx.progress->user_data = (void*)MPI_COMM_WORLD; ctx.factorization = mpf_hmat_settings.factorization_type; - hi->factorize_generic(hmatrix, &ctx); + ierr = hi->factorize_generic(hmatrix, &ctx); CHKERRQ(ierr); temps_final = getTime (); temps_cpu = (temps_final - temps_initial) ; @@ -246,9 +248,8 @@ int testHMAT(void) { Mpf_printf(MPI_COMM_WORLD, "Ratio: %g\n", (double)info.compressed_size/info.uncompressed_size); } - /* ===================================================================================== */ /* Solve the system : A-1.solCLA -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Solve HMAT%s...\n", hmat_refine ? " with Iterative Refinement" : "") ; if ( verboseTest ) { @@ -267,7 +268,7 @@ _refineLoop: // Iterative Refinement loop if (simplePrec) doubleToSimple(solCLA, vectorSize); - ierr = hi->solve_systems(hmatrix, solCLA, nbRHS); + ierr = hi->solve_systems(hmatrix, solCLA, nbRHS); CHKERRQ(ierr); if (simplePrec) simpleToDouble(solCLA, vectorSize); @@ -276,9 +277,8 @@ _refineLoop: // Iterative Refinement loop temps_cpu = (temps_final - temps_initial) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuSolve%s = %f \n", postfix_async, temps_cpu) ; - /* ===================================================================================== */ /* Compute the residual solCLA_orig-A_orig.solCLA */ - /* ===================================================================================== */ + if (hmat_residual) { temps_initial = getTime (); if (simplePrec) { @@ -310,8 +310,8 @@ _refineLoop: // Iterative Refinement loop for (k=0 ; k<vectorSize ; k++) ((double*)solCLA_sum)[k] += ((double*)solCLA)[k]; } - ierr=computeRelativeError(solCLA_sum, rhs, &eps) ; CHKERRQ(ierr) ; - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error_%d = %.4e \n", nbIter, eps); + ierr=computeRelativeError(solCLA_sum, rhs, relative_error) ; CHKERRQ(ierr) ; + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error_%d = %.4e \n", nbIter, *relative_error); } if (hmat_refine) { /* Copy the updated RHS of the system solCLA_orig into solCLA */ @@ -321,7 +321,8 @@ _refineLoop: // Iterative Refinement loop if (hmatrix_orig) hi->destroy(hmatrix_orig); hmatrix_orig=NULL; - if (solCLA_orig) MpfFree(solCLA_orig) ; solCLA_orig=NULL ; + if (solCLA_orig) MpfFree(solCLA_orig); + solCLA_orig=NULL ; if (solCLA_sum) { // If solCLA_sum has been allocated, I use it as a replacement for solCLA MpfFree(solCLA) ; solCLA=solCLA_sum ; @@ -334,18 +335,17 @@ _refineLoop: // Iterative Refinement loop ierr = displayArray("x", solCLA, nbPts, nbRHS) ; CHKERRQ(ierr) ; } - /* ===================================================================================== */ /* Compare the two vectors solCLA and rhs */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solCLA, rhs, &eps) ; CHKERRQ(ierr) ; + ierr=computeRelativeError(solCLA, rhs, relative_error); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); break; case _inverseHMAT: - /* ===================================================================================== */ + /* Inverse the H-matrix */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Inversing HMAT...\n") ; temps_initial = getTime (); @@ -355,19 +355,18 @@ _refineLoop: // Iterative Refinement loop temps_cpu = (temps_final - temps_initial) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuInversion%s = %f \n", postfix_async, temps_cpu) ; - /* ===================================================================================== */ /* Compute the gemv : A-1.solCLA -> solCLA */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** GEMV HMAT...\n") ; temps_initial = getTime (); if (simplePrec) doubleToSimple(solCLA, vectorSize); - void *solCLA2=(void*)MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA) ; + void *solCLA2 = MpfCalloc(vectorSize, sizeof(D_type)) ; CHKPTRQ(solCLA) ; memcpy(solCLA2, solCLA, vectorSize*sizeof(D_type)); ierr = hi->gemv('N', Mpf_pone[stype], hmatrix, solCLA2, Mpf_zero[stype], solCLA, nbRHS); - if (solCLA2) MpfFree(solCLA2) ; solCLA2=NULL ; + MpfFree(solCLA2); if (simplePrec) simpleToDouble(solCLA, vectorSize); @@ -376,22 +375,23 @@ _refineLoop: // Iterative Refinement loop temps_cpu = (temps_final - temps_initial) ; Mpf_printf(MPI_COMM_WORLD,"<PERFTESTS> TpsCpuGEMV%s = %f \n", postfix_async, temps_cpu) ; - /* ===================================================================================== */ /* Compare the two vectors solCLA and rhs */ - /* ===================================================================================== */ + Mpf_printf(MPI_COMM_WORLD, "\n**** Comparing results...\n") ; - ierr=computeRelativeError(solCLA, rhs, &eps) ; CHKERRQ(ierr) ; + ierr=computeRelativeError(solCLA, rhs, relative_error); CHKERRQ(ierr); - Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", eps); + Mpf_printf(MPI_COMM_WORLD, "<PERFTESTS> Error = %.4e \n", *relative_error); break; default: break; } /* switch(testedAlgo) */ /* Destroys the HMAT structure */ +#ifdef TESTFEMBEM_DUMP_HMAT hi->dump_info(hmatrix, "testHMAT_matrix"); +#endif HMAT_destroy_matrix( hi, hdesc ); - if (solCLA) MpfFree(solCLA) ; solCLA=NULL ; + MpfFree(solCLA); hmat_tracing_dump("testHMAT_trace.json"); leave_context(); diff --git a/src/util.c b/src/util.c index ea8873279279ecda21cfa10a37b4177adfac26ad..ac7de08ab9a9cbcbb7e25438ffe62913158dbb4f 100644 --- a/src/util.c +++ b/src/util.c @@ -424,6 +424,32 @@ int MpfArgGetString( int *Argc, char **argv, int rflag, const char *name, char * return 1; } /* ================================================================================== */ +/*! \brief Equivalent of MPI_Allreduce() with a 64 bit \a count argument + */ +int MPI_AllreduceL(const void *sendbuf, void *recvbuf, ssize_t count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { + int ierr, scalsize; + size_t pos=0, pak=0; + /* MPI maximum message size (in bytes) */ + static size_t PACKET_SIZE=1e8; + + enter_context("MPI_AllreduceL"); + + ierr = MPI_Type_size(datatype, &scalsize); CHKERRQ(ierr); + pak = PACKET_SIZE/scalsize; // number of 'datatype' to send in each message + while (count>0) { + // We communicate by packet of 100 Mbytes + void *src = sendbuf==MPI_IN_PLACE ? MPI_IN_PLACE : (void*)((char*)sendbuf+pos) ; + void *dest = (void*)((char*)recvbuf+pos) ; + ierr = MPI_Allreduce(src, dest, (int)MIN(pak,count), datatype, op, comm); CHKERRQ(ierr); + pos += pak*scalsize; + count -= pak; + } + + leave_context(); + + return 0; +} +/* ================================================================================== */ /*! \brief Sort the array mat, remove the zeros and sum the elements placed at the same position For simple precision only (S) @@ -842,6 +868,7 @@ int SCAB_Init(int* argc, char*** argv) { mpf_hmat_settings.acaEpsilon = 1e-3; mpf_hmat_settings.compressionMethod = hmat_compress_aca_plus; mpf_hmat_settings.epsilon = 1e-3; + mpf_hmat_settings.max_leaf_size = -1; if(strcmp(hmat_get_version(), HMAT_VERSION)) Mpf_printf(MPI_COMM_WORLD, "***\n*** hmat version %s (compiled with version %s)\n", hmat_get_version(), HMAT_VERSION); else @@ -917,6 +944,9 @@ int SCAB_Init(int* argc, char*** argv) { mpf_hmat_settings.l0size); #endif + if (MpfArgGetInt(argc, *argv, 1, "--hmat-leaf-size", &mpf_hmat_settings.max_leaf_size)) + Mpf_printf(MPI_COMM_WORLD,"[HMat] Maximum leaf size: %d\n", mpf_hmat_settings.max_leaf_size); + if (MpfArgHasName( argc, *argv, 1, "--hmat-coarsening" ) > 0) { Mpf_printf(MPI_COMM_WORLD,"[HMat] Coarsening of the HMatrix\n"); settings.coarsening = 1;