diff --git a/src/chameleon.c b/src/chameleon.c index c6aa22e9d5e9e7c65cbe9839c1f0d5a215d9fcad..0f3a184039269033ed1b3e4817abfb72492c4e71 100644 --- a/src/chameleon.c +++ b/src/chameleon.c @@ -1,5 +1,6 @@ #include "main.h" #ifdef HAVE_CHAMELEON +#include "chameleon.h" cham_flttype_t chameleonType; int (*CHAMELEON_sytrf_Tile)( cham_uplo_t, CHAM_desc_t * ) = NULL; // LLt facto int (*CHAMELEON_sytrs_Tile)( cham_uplo_t, CHAM_desc_t *, CHAM_desc_t * ) = NULL; // LLt solve @@ -14,6 +15,7 @@ testFEMBEM_initChameleon( ScalarType stype ) { /* Set the scalar type and the functions used by CHAMELEON */ switch(stype) { +#if defined(CHAMELEON_PREC_S) case (SIMPLE_PRECISION) : chameleonType = ChamRealFloat; CHAMELEON_sytrf_Tile = CHAMELEON_spotrf_Tile; @@ -22,6 +24,8 @@ testFEMBEM_initChameleon( ScalarType stype ) { CHAMELEON_getrf_nopiv_Tile = CHAMELEON_sgetrf_nopiv_Tile; CHAMELEON_getrs_nopiv_Tile = CHAMELEON_sgetrs_nopiv_Tile; break ; +#endif +#if defined(CHAMELEON_PREC_D) case (DOUBLE_PRECISION) : chameleonType = ChamRealDouble; CHAMELEON_sytrf_Tile = CHAMELEON_dpotrf_Tile; @@ -30,6 +34,8 @@ testFEMBEM_initChameleon( ScalarType stype ) { CHAMELEON_getrf_nopiv_Tile = CHAMELEON_dgetrf_nopiv_Tile; CHAMELEON_getrs_nopiv_Tile = CHAMELEON_dgetrs_nopiv_Tile; break ; +#endif +#if defined(CHAMELEON_PREC_C) case (SIMPLE_COMPLEX) : chameleonType = ChamComplexFloat; CHAMELEON_sytrf_Tile = CHAMELEON_csytrf_Tile; @@ -38,6 +44,8 @@ testFEMBEM_initChameleon( ScalarType stype ) { CHAMELEON_getrf_nopiv_Tile = CHAMELEON_cgetrf_nopiv_Tile; CHAMELEON_getrs_nopiv_Tile = CHAMELEON_cgetrs_nopiv_Tile; break ; +#endif +#if defined(CHAMELEON_PREC_Z) case (DOUBLE_COMPLEX) : chameleonType = ChamComplexDouble; CHAMELEON_sytrf_Tile = CHAMELEON_zsytrf_Tile; @@ -46,6 +54,7 @@ testFEMBEM_initChameleon( ScalarType stype ) { CHAMELEON_getrf_nopiv_Tile = CHAMELEON_zgetrf_nopiv_Tile; CHAMELEON_getrs_nopiv_Tile = CHAMELEON_zgetrs_nopiv_Tile; break ; +#endif default : SETERRQ(1, "testHCHAMELEON : unknown scalar type\n") ; break ; @@ -60,30 +69,38 @@ int CHAMELEON_gemm_Tile( CHAM_desc_t *descA, int ierr = 0; switch(stype) { +#if defined(CHAMELEON_PREC_S) case (SIMPLE_PRECISION) : ierr = CHAMELEON_sgemm_Tile( ChamNoTrans, ChamNoTrans, (float)1., descA, descX, (float)0., descY ); CHKERRQ(ierr); break ; +#endif +#if defined(CHAMELEON_PREC_D) case (DOUBLE_PRECISION) : ierr = CHAMELEON_dgemm_Tile( ChamNoTrans, ChamNoTrans, (double)1., descA, descX, (double)0., descY); CHKERRQ(ierr); break ; +#endif +#if defined(CHAMELEON_PREC_C) case (SIMPLE_COMPLEX) : ierr = CHAMELEON_cgemm_Tile( ChamNoTrans, ChamNoTrans, (CHAMELEON_Complex32_t)1., descA, descX, (CHAMELEON_Complex32_t)0., descY); CHKERRQ(ierr); break ; +#endif +#if defined(CHAMELEON_PREC_Z) case (DOUBLE_COMPLEX) : ierr = CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, (CHAMELEON_Complex64_t)1., descA, descX, (CHAMELEON_Complex64_t)0., descY); CHKERRQ(ierr); break ; +#endif default : SETERRQ(1, "CHAMELEON_gemm_Tile: unknown scalar type\n") ; break ; @@ -93,9 +110,9 @@ int CHAMELEON_gemm_Tile( CHAM_desc_t *descA, // indices are 0 based, bounds included static int -CHAMELEON_build_callback_FEMBEM( const CHAM_desc_t *desc, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tile, void *op_args ) +CHAMELEON_build_callback_FEMBEM( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *desc, CHAM_tile_t *tile, ... ) { ASSERTA(tile); @@ -110,12 +127,15 @@ CHAMELEON_build_callback_FEMBEM( const CHAM_desc_t *desc, &Mpf_i_zero, &Mpf_i_one, &Mpf_i_one, &Mpf_i_one, &size_of_buffer, &(tile->ld), tile->mat, op_args ); + (void)ndata; return 0; } int CHAMELEON_generate_matrix( cham_flttype_t flttype, int NB, int PQ[2], CHAM_desc_t **descA ) { + cham_map_data_t data; + cham_map_operator_t op; int ierr; int N = nbPts; cham_uplo_t uplo = symMatSolver ? ChamLower : ChamUpperLower; @@ -133,8 +153,15 @@ CHAM_desc_t **descA ) CHKERRQ(ierr); // Assembly driven by chameleon and using the callback CHAMELEON_build_callback_FEMBEM() defined above - ierr = CHAMELEON_map_Tile( ChamW, uplo, *descA, - CHAMELEON_build_callback_FEMBEM, myCtx ); + data.access = ChamW; + data.desc = *descA; + + op.name = "BuildFEMBEM"; + op.cpufunc = CHAMELEON_build_callback_FEMBEM; + op.cudafunc = NULL; + op.hipfunc = NULL; + + ierr = CHAMELEON_mapv_Tile( uplo, 1, &data, &op, myCtx ); CHKERRQ(ierr); MpfFree( myCtx ); diff --git a/src/hchameleon.c b/src/hchameleon.c index 04c6c834d92c166169b5427c5c8e4e301182cf98..0fbc09d1716d0c5c4f9fd1eef15acdb257eabd1c 100644 --- a/src/hchameleon.c +++ b/src/hchameleon.c @@ -30,9 +30,9 @@ inline static int chameleon_desc_islocal( const CHAM_desc_t *A, int m, int n ) // indices are 0 based, bounds included /* Initialize a given tile with an h-matrix, given the row and cluster tree */ static int -HCHAMELEON_build_callback_FEMBEM( const CHAM_desc_t *desc, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tile, void *op_args ) +HCHAMELEON_build_callback_FEMBEM( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *desc, CHAM_tile_t *tile, ... ) { contextTestFEMBEM ctx_fembem; struct mpf_hmat_create_compression_args_t compression_ctx; @@ -193,8 +193,19 @@ hmat_interface_t *hi ) */ #if 1 cham_uplo_t uplo = symMatSolver ? ChamLower : ChamUpperLower; - ierr = CHAMELEON_map_Tile( ChamW, uplo, descA, - HCHAMELEON_build_callback_FEMBEM, hdescA ); + cham_map_data_t data; + cham_map_operator_t op; + + data.access = ChamW; + data.desc = descA; + + op.name = "BuildFEMBEM-H"; + op.cpufunc = HCHAMELEON_build_callback_FEMBEM; + op.cudafunc = NULL; + op.hipfunc = NULL; + + ierr = CHAMELEON_mapv_Tile( uplo, 1, &data, &op, hdescA ); + CHKERRQ(ierr); #else tile = descA->tiles; for(j=0; j < descA->lnt; j++) { @@ -253,7 +264,7 @@ HCHAMELEON_destroy_matrix( HCHAM_desc_t *hdescA ) } /* Provides a flat matrix (array of values) given an hmatrix structure */ -typedef int (*core_lacpy_fct_t)( cham_uplo_t, int, int, const void *, int, void *, int ); +typedef void (*core_lacpy_fct_t)( cham_uplo_t, int, int, const void *, int, void *, int ); static void TCORE_uncompress( HCHAM_desc_t *hdescA, @@ -301,23 +312,31 @@ TCORE_uncompress( HCHAM_desc_t *hdescA, free(col_ptr); } else { - int (*CORE_lacpy)( cham_uplo_t, int, int, + void (*CORE_lacpy)( cham_uplo_t, int, int, const void *, int, void *, int ) = NULL; switch( hdescA->super->dtyp ) { +#if defined(CHAMELEON_PREC_S) case ChamRealFloat: CORE_lacpy = (core_lacpy_fct_t)CORE_slacpy; break ; +#endif +#if defined(CHAMELEON_PREC_D) case ChamRealDouble: CORE_lacpy = (core_lacpy_fct_t)CORE_dlacpy; break ; +#endif +#if defined(CHAMELEON_PREC_C) case ChamComplexFloat: CORE_lacpy = (core_lacpy_fct_t)CORE_clacpy; break ; +#endif +#if defined(CHAMELEON_PREC_Z) case ChamComplexDouble: CORE_lacpy = (core_lacpy_fct_t)CORE_zlacpy; break ; +#endif default : - //SETERRQ(1, "HCHAMELEON_uncompress_matrix: unknown scalar type\n") ; + SETERRQ(1, "HCHAMELEON_uncompress_matrix: unknown scalar type\n") ; break ; } CORE_lacpy( ChamUpperLower, tileA->m, tileA->n, @@ -377,9 +396,9 @@ typedef struct getinfo_args_s { } getinfo_args_t; static int -HCHAMELEON_map_getinfo( const CHAM_desc_t *desc, - cham_uplo_t uplo, int m, int n, - CHAM_tile_t *tile, void *op_args ) +HCHAMELEON_map_getinfo( void *op_args, + cham_uplo_t uplo, int m, int n, int ndata, + const CHAM_desc_t *desc, CHAM_tile_t *tile, ... ) { getinfo_args_t *getinfo = (getinfo_args_t*)op_args; hmat_info_t info; @@ -390,6 +409,10 @@ HCHAMELEON_map_getinfo( const CHAM_desc_t *desc, getinfo->uncompress_size += info.uncompressed_size; pthread_mutex_unlock( &(getinfo->mutex) ); + (void)ndata; + (void)uplo; + (void)m; + (void)n; return 0; } @@ -404,9 +427,18 @@ hmat_info_t HCHAMELEON_getinfo( HCHAM_desc_t *hdesc ) .mutex = PTHREAD_MUTEX_INITIALIZER, }; cham_uplo_t uplo = symMatSolver ? ChamLower : ChamUpperLower; + cham_map_data_t data; + cham_map_operator_t op; + + data.access = ChamR; + data.desc = hdesc->super; + + op.name = "HCHAM_getInfo"; + op.cpufunc = HCHAMELEON_map_getinfo; + op.cudafunc = NULL; + op.hipfunc = NULL; - CHAMELEON_map_Tile( ChamR, uplo, hdesc->super, - HCHAMELEON_map_getinfo, &op_args ); + CHAMELEON_mapv_Tile( uplo, 1, &data, &op, &op_args ); MPI_Allreduce( &(op_args.compress_size), &ginfo.compressed_size, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD );