diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 89a192273bf4e635ebb50d25c87263b733dd436a..12e069dadc3a9bfae8a85332cfc0f065c9da1843 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,7 +113,7 @@ test_starpu_branches: test_starpu_master: <<: *test_script variables: - TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\"" + TESTS_RESTRICTION: "-R \"_shm_|_mpi_\"" VERSION: starpu dependencies: - build_starpu @@ -187,7 +187,7 @@ test_quark_branches: test_quark_master: <<: *test_script variables: - TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\"" + TESTS_RESTRICTION: "-R \"_shm_|_mpi_\"" VERSION: quark dependencies: - build_quark @@ -223,7 +223,7 @@ test_parsec_branches: test_parsec_master: <<: *test_script variables: - TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\"" + TESTS_RESTRICTION: "-R \"_shm_|_mpi_\"" VERSION: parsec dependencies: - build_parsec diff --git a/control/context.c b/control/context.c index 9464aa92203abcc49b73d3de9f4fff2afec408b6..881abe9746eae4441b2ba387dfc11474970c76e7 100644 --- a/control/context.c +++ b/control/context.c @@ -67,11 +67,11 @@ CHAM_context_t *chameleon_context_create() chamctxt->ncudas = 0; chamctxt->nthreads_per_worker= 1; - chamctxt->warnings_enabled = CHAMELEON_TRUE; - chamctxt->autotuning_enabled = CHAMELEON_TRUE; - chamctxt->parallel_enabled = CHAMELEON_FALSE; - chamctxt->profiling_enabled = CHAMELEON_FALSE; - chamctxt->progress_enabled = CHAMELEON_FALSE; + chamctxt->warnings_enabled = CHAMELEON_TRUE; + chamctxt->autotuning_enabled = CHAMELEON_TRUE; + chamctxt->parallel_enabled = CHAMELEON_FALSE; + chamctxt->profiling_enabled = CHAMELEON_FALSE; + chamctxt->progress_enabled = CHAMELEON_FALSE; chamctxt->householder = ChamFlatHouseholder; chamctxt->translation = ChamOutOfPlace; @@ -260,7 +260,7 @@ int CHAMELEON_Disable(int option) * \retval CHAMELEON_SUCCESS successful exit * */ -int CHAMELEON_Set(int param, int value) +int CHAMELEON_Set( int param, int value ) { CHAM_context_t *chamctxt; diff --git a/control/descriptor.c b/control/descriptor.c index 316c0244f804b6d71b94814ab078527ff07abdb7..7216518491ea24858167d648ab3ff69764b385ff 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -97,11 +97,11 @@ static int nbdesc = 0; * */ CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q, - void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), - int (*get_blkldd) ( const CHAM_desc_t*, int ), - int (*get_rankof) ( const CHAM_desc_t*, int, int )) + int lm, int ln, int i, int j, + int m, int n, int p, int q, + void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), + int (*get_blkldd) ( const CHAM_desc_t*, int ), + int (*get_rankof) ( const CHAM_desc_t*, int, int )) { CHAM_context_t *chamctxt; CHAM_desc_t desc; @@ -195,22 +195,22 @@ CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bs * Internal static descriptor initializer */ CHAM_desc_t chameleon_desc_init(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q) + int lm, int ln, int i, int j, + int m, int n, int p, int q) { return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d); + chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d); } /** * Internal static descriptor initializer for a block diagonal matrix */ CHAM_desc_t chameleon_desc_init_diag(cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, - int m, int n, int p, int q) + int lm, int ln, int i, int j, + int m, int n, int p, int q) { return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d_diag); + chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d_diag); } /** @@ -394,7 +394,7 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc ) * */ int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q) + int lm, int ln, int i, int j, int m, int n, int p, int q) { CHAM_context_t *chamctxt; CHAM_desc_t *desc; @@ -508,10 +508,10 @@ int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, * */ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q, - void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), - int (*get_blkldd) ( const CHAM_desc_t*, int ), - int (*get_rankof) ( const CHAM_desc_t*, int, int )) + int lm, int ln, int i, int j, int m, int n, int p, int q, + void* (*get_blkaddr)( const CHAM_desc_t*, int, int ), + int (*get_blkldd) ( const CHAM_desc_t*, int ), + int (*get_rankof) ( const CHAM_desc_t*, int, int )) { CHAM_context_t *chamctxt; CHAM_desc_t *desc; @@ -533,7 +533,7 @@ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t } *desc = chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - get_blkaddr, get_blkldd, get_rankof); + get_blkaddr, get_blkldd, get_rankof); /* if the user gives a pointer to the overall data (tiles) we can use it */ desc->use_mat = (mat == NULL) ? 0 : 1; @@ -605,8 +605,8 @@ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t * */ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q, - int (*get_rankof)( const CHAM_desc_t*, int, int )) + int lm, int ln, int i, int j, int m, int n, int p, int q, + int (*get_rankof)( const CHAM_desc_t*, int, int )) { #if !defined (CHAMELEON_SCHED_STARPU) (void)descptr; (void)dtyp; (void)mb; (void)nb; (void)bsiz; @@ -614,7 +614,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i (void)get_rankof; chameleon_error("CHAMELEON_Desc_Create_OOC_User", "Only StarPU supports on-demand tile allocation"); - return CHAMELEON_ERR_NOT_INITIALIZED; + return CHAMELEON_ERR_NOT_SUPPORTED; #else CHAM_context_t *chamctxt; CHAM_desc_t *desc; @@ -627,14 +627,15 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i chameleon_error("CHAMELEON_Desc_Create_OOC_User", "CHAMELEON not initialized"); return CHAMELEON_ERR_NOT_INITIALIZED; } + /* Allocate memory and initialize the descriptor */ desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t)); - if (desc == NULL) { + if ( desc == NULL ) { chameleon_error("CHAMELEON_Desc_Create_OOC_User", "malloc() failed"); return CHAMELEON_ERR_OUT_OF_RESOURCES; } - *desc = chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, - chameleon_getaddr_null, NULL, get_rankof); + *desc = chameleon_desc_init_user( dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q, + chameleon_getaddr_null, NULL, get_rankof ); /* memory of the matrix is completely handled by runtime */ desc->use_mat = 0; @@ -701,11 +702,11 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i * */ int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz, - int lm, int ln, int i, int j, int m, int n, int p, int q) + int lm, int ln, int i, int j, int m, int n, int p, int q) { return CHAMELEON_Desc_Create_OOC_User( descptr, dtyp, mb, nb, bsiz, - lm, ln, i, j, m, n, p, q, - chameleon_getrankof_2d ); + lm, ln, i, j, m, n, p, q, + chameleon_getrankof_2d ); } /** @@ -722,8 +723,7 @@ int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc) @@ -763,8 +763,7 @@ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc) * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Acquire (CHAM_desc_t *desc) { @@ -787,8 +786,7 @@ int CHAMELEON_Desc_Acquire (CHAM_desc_t *desc) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Release (CHAM_desc_t *desc) { @@ -800,7 +798,9 @@ int CHAMELEON_Desc_Release (CHAM_desc_t *desc) { * * @ingroup Descriptor * - * CHAMELEON_Desc_Flush - Flushes the data in the sequence when they won't be reused. This calls cleans up the distributed communication caches, and transfer the data back to the CPU. + * CHAMELEON_Desc_Flush - Flushes the data in the sequence when they won't be + * reused. This calls cleans up the distributed communication caches, and + * transfer the data back to the CPU. * ****************************************************************************** * @@ -809,12 +809,11 @@ int CHAMELEON_Desc_Release (CHAM_desc_t *desc) { * ****************************************************************************** * - * @return - * \retval CHAMELEON_SUCCESS successful exit + * @retval CHAMELEON_SUCCESS successful exit * */ int CHAMELEON_Desc_Flush( CHAM_desc_t *desc, - RUNTIME_sequence_t *sequence ) + RUNTIME_sequence_t *sequence ) { RUNTIME_desc_flush( desc, sequence ); return CHAMELEON_SUCCESS; @@ -838,11 +837,6 @@ int CHAMELEON_Desc_Flush( CHAM_desc_t *desc, * @param[in] user_tag_sep * The new value for tag_sep. * - ****************************************************************************** - * - * @return - * \retval none - * */ void CHAMELEON_user_tag_size(int user_tag_width, int user_tag_sep) { RUNTIME_comm_set_tag_sizes( user_tag_width, user_tag_sep ); diff --git a/doc/orgmode/chapters/using.org b/doc/orgmode/chapters/using.org index eb4c9cdc6bc8e6f20babaed269140971c67bca3f..26ecd8042e62465fcfea82e5a01623d8b92cf816 100644 --- a/doc/orgmode/chapters/using.org +++ b/doc/orgmode/chapters/using.org @@ -290,6 +290,18 @@ (simple and double precision) on /mirage/ and /sirocco/ machines are available for now. Database of models is subject to change. +*** Use out of core support with StarPU + <<sec:ooc>> + + If the matrix can not fit in the main memory, StarPU can automatically evict + tiles to the disk. The following variables need to be set: + * *STARPU_DISK_SWAP* environment variable to a place where to store + evicted tiles, for example: ~STARPU_DISK_SWAP=/tmp~ + * *STARPU_DISK_SWAP_BACKEND* environment variable to the I/O method, + for example: ~STARPU_DISK_SWAP_BACKEND=unistd_o_direct~ + * *STARPU_LIMIT_CPU_MEM* environment variable to the amount of memory + that can be used in MBytes, for example: ~STARPU_LIMIT_CPU_MEM=1000~ + ** Chameleon API Chameleon provides routines to solve dense general systems of diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 64baacbcab27622334cd26a2bf1adc09332e3852..24504bebf430871b91f7604f4df70552a34c1d32 100755 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -27,11 +27,6 @@ endif() if (CHAMELEON_PREC_D) add_subdirectory(lapack_to_chameleon) - if (CHAMELEON_SCHED_STARPU) - if (${STARPU_VERSION_MAJOR} GREATER 0 AND ${STARPU_VERSION_MINOR} GREATER 1) - add_subdirectory(out_of_core) - endif() - endif() else() message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_chameleon " "and out_core tutorials cannot be built (use only double arithmetic " diff --git a/example/out_of_core/CMakeLists.txt b/example/out_of_core/CMakeLists.txt deleted file mode 100644 index 0b39ead5f5e7c2b0ad0e5b325af16feb8cda777e..0000000000000000000000000000000000000000 --- a/example/out_of_core/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -### -# -# @file CMakeLists.txt -# -# @copyright 2009-2014 The University of Tennessee and The University of -# Tennessee Research Foundation. All rights reserved. -# @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, -# Univ. Bordeaux. All rights reserved. -# -### -# -# CHAMELEON example routines -# CHAMELEON is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI, -# University of Bordeaux, Bordeaux INP -# -# @version 1.0.0 -# @author Florent Pruvost -# @date 2016-08-23 -# -### -if (NOT CHAMELEON_SCHED_STARPU) - message(ERROR "This directory should not be included if CHAMELEON_SCHED_STARPU is not enabled") -endif() - -if(CHAMELEON_SIMULATION) - message(ERROR "This directory should not be included if CHAMELEON_SIMULATION is enabled") -endif() - -include_directories(${CMAKE_CURRENT_BINARY_DIR}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) - -# list of sources -set(OOC_SOURCES - out_of_core.c - ) - -# Define what libraries we have to link with -# ------------------------------------------ -unset(libs_for_ooc) -# ooc executable depends on chameleon, starpu and lapacke (already chameleon's dependencies) -list(APPEND libs_for_ooc chameleon) -# message(STATUS "libs_for_ooc: ${libs_for_ooc}") - -foreach(_ooc ${OOC_SOURCES}) - get_filename_component(_name_exe ${_ooc} NAME_WE) - add_executable(${_name_exe} ${_ooc}) - set_property(TARGET ${_name_exe} PROPERTY LINKER_LANGUAGE Fortran) - if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -undefined dynamic_lookup" ) - endif() - target_link_libraries(${_name_exe} ${libs_for_ooc}) - install(TARGETS ${_name_exe} - DESTINATION bin/example/out_of_core) -endforeach() - -#-------- Tests --------- -include(CTestLists.cmake) - -### -### END CMakeLists.txt -### diff --git a/example/out_of_core/CTestLists.cmake b/example/out_of_core/CTestLists.cmake deleted file mode 100644 index 1ca4036bdcaee19a5e752ad4e02f3ddafb465a6f..0000000000000000000000000000000000000000 --- a/example/out_of_core/CTestLists.cmake +++ /dev/null @@ -1,15 +0,0 @@ -# -# Check Example out_of_core -# - -set(TESTLIST - out_of_core - ) - -# OOC tests required to create a directory where the data will be written on disk -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ooc) - -# define tests -foreach(test ${TESTLIST}) - add_test(example_ooc_${test} ./${test}) -endforeach() diff --git a/example/out_of_core/out_of_core.c b/example/out_of_core/out_of_core.c deleted file mode 100644 index 763a140f124a600f52f04d01b3c0e52eecd2df64..0000000000000000000000000000000000000000 --- a/example/out_of_core/out_of_core.c +++ /dev/null @@ -1,190 +0,0 @@ -/** - * - * @file out_of_core.c - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon out_of_core example - * - * @version 1.0.0 - * @author Florent Pruvost - * @date 2014-10-29 - * - */ -#include "out_of_core.h" - -/* - * @brief ooc is driver example routine to test the out-of-core feature with StarPU - * @details TODO: write some details - */ -int main(int argc, char *argv[]) { - - size_t N; // matrix order - int NB; // number of rows and columns in tiles - int NRHS; // number of RHS vectors - int NCPU; // number of cores to use - int NGPU; // number of gpus (cuda devices) to use - int UPLO = ChamUpper; // where is stored L - - /* descriptors necessary for calling CHAMELEON tile interface */ - CHAM_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL; - - /* declarations to time the program and evaluate performances */ - double fmuls, fadds, flops, gflops, cpu_time; - - /* variable to check the numerical results */ - double anorm, bnorm, xnorm, eps, res; - int hres; - - /* initialize some parameters with default values */ - int iparam[IPARAM_SIZEOF]; - memset(iparam, 0, IPARAM_SIZEOF*sizeof(int)); - init_iparam(iparam); - - /* read arguments */ - read_args(argc, argv, iparam); - N = iparam[IPARAM_N]; - NB = iparam[IPARAM_NB]; - NRHS = iparam[IPARAM_NRHS]; - - /* compute the algorithm complexity to evaluate performances */ - fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) ); - fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) ); - flops = 1e-9 * (fmuls + fadds); - - /* initialize the number of thread if not given by the user in argv */ - if ( iparam[IPARAM_THRDNBR] == -1 ) { - get_thread_count( &(iparam[IPARAM_THRDNBR]) ); - } - NCPU = iparam[IPARAM_THRDNBR]; - NGPU = 0; - - /* print informations to user */ - print_header( argv[0], iparam); - - /* check that o direct will work */ - if (iparam[IPARAM_OUTOFCORE] > 0) { - if (! will_o_direct_work(NB)) { - print_o_direct_wont_work(); - return EXIT_FAILURE; - } - char maxMemoryAllowed[32]; - sprintf (maxMemoryAllowed, "%d", iparam[IPARAM_OUTOFCORE]); - setenv ("STARPU_LIMIT_CPU_MEM", maxMemoryAllowed, 1); - } - - /* Initialize CHAMELEON with main parameters */ - if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) { - fprintf(stderr, "Error initializing CHAMELEON library\n"); - return EXIT_FAILURE; - } - CHAMELEON_Set(CHAMELEON_TILE_SIZE, NB); - - /* limit ram memory */ - if (iparam[IPARAM_OUTOFCORE] > 0) { - int new_dd = starpu_disk_register (&starpu_disk_unistd_o_direct_ops, - (void*) "./ooc/", 1024*1024*10); - if (new_dd == -ENOENT){ - fprintf(stderr, "Can't write on ./ooc/\n"); - return EXIT_FAILURE; - } - } - - CHAMELEON_Desc_Create_User(&descA, NULL, ChamRealDouble, - NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1, - chameleon_getaddr_null, // specific function - chameleon_getblkldd_ccrb, - chameleon_getrankof_2d); - CHAMELEON_Desc_Create(&descB, NULL, ChamRealDouble, - NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1); - CHAMELEON_Desc_Create(&descX, NULL, ChamRealDouble, - NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1); - CHAMELEON_Desc_Create(&descAC, NULL, ChamRealDouble, - NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1); - - /* generate A matrix with random values such that it is spd */ - CHAMELEON_dplgsy_Tile( (double)N, ChamUpperLower, descA, 51 ); - - /* generate RHS */ - CHAMELEON_dplrnt_Tile( descB, 5673 ); - - /* copy A before facto. in order to check the result */ - CHAMELEON_dlacpy_Tile(ChamUpperLower, descA, descAC); - - /* copy B in X before solving - * same sense as memcpy(X, B, N*NRHS*sizeof(double)) but for descriptors */ - CHAMELEON_dlacpy_Tile(ChamUpperLower, descB, descX); - - /************************************************************/ - /* solve the system AX = B using the Cholesky factorization */ - /************************************************************/ - - cpu_time = -CHAMELEON_timer(); - - /* Cholesky factorization: - * A is replaced by its factorization L or L^T depending on uplo */ - CHAMELEON_dpotrf_Tile( UPLO, descA ); - - /* Solve: - * B is stored in X on entry, X contains the result on exit. - * Forward and back substitutions - */ - CHAMELEON_dpotrs_Tile( UPLO, descA, descX ); - - cpu_time += CHAMELEON_timer(); - - /* print informations to user */ - gflops = flops / cpu_time; - printf( "%9.3f %9.2f\n", cpu_time, gflops); - fflush( stdout ); - - /************************************************************/ - /* check if solve is correct i.e. AX-B = 0 */ - /************************************************************/ - - /* compute norms to check the result */ - anorm = CHAMELEON_dlange_Tile( ChamInfNorm, descAC); - bnorm = CHAMELEON_dlange_Tile( ChamInfNorm, descB); - xnorm = CHAMELEON_dlange_Tile( ChamInfNorm, descX); - - /* compute A*X-B, store the result in B */ - CHAMELEON_dgemm_Tile( ChamNoTrans, ChamNoTrans, - 1.0, descAC, descX, -1.0, descB ); - res = CHAMELEON_dlange_Tile( ChamInfNorm, descB ); - - /* check residual and print a message */ - eps = LAPACKE_dlamch_work( 'e' ); - - /* - * if hres = 0 then the test succeed - * else the test failed - */ - hres = ( (res / N / eps / (anorm * xnorm + bnorm )) > 100.0 ); - printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n"); - if (hres) { - printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n", - res, anorm, xnorm, bnorm, - res / N / eps / (anorm * xnorm + bnorm )); - } - else { - printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n", - res, anorm, xnorm, bnorm, - res / N / eps / (anorm * xnorm + bnorm )); - } - - /* free descriptors descA, descB, descX, descAC */ - CHAMELEON_Desc_Destroy( &descA ); - CHAMELEON_Desc_Destroy( &descB ); - CHAMELEON_Desc_Destroy( &descX ); - CHAMELEON_Desc_Destroy( &descAC ); - - /* Finalize CHAMELEON */ - CHAMELEON_Finalize(); - - return EXIT_SUCCESS; -} diff --git a/example/out_of_core/out_of_core.h b/example/out_of_core/out_of_core.h deleted file mode 100644 index c9bcea306b9b9be3c00cedd106f56f097fa283cd..0000000000000000000000000000000000000000 --- a/example/out_of_core/out_of_core.h +++ /dev/null @@ -1,196 +0,0 @@ -/** - * - * @file out_of_core.h - * - * @copyright 2009-2014 The University of Tennessee and The University of - * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - *** - * - * @brief Chameleon out_of_core example header - * - * @version 1.0.0 - * @author Florent Pruvost - * @date 2016-08-23 - * - */ -#ifndef _out_of_core_h_ -#define _out_of_core_h_ - -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> - -#if defined( _WIN32 ) || defined( _WIN64 ) -#define int64_t __int64 -#endif - -/* Define these so that the Microsoft VC compiler stops complaining - about scanf and friends */ -#define _CRT_SECURE_NO_DEPRECATE -#define _CRT_SECURE_NO_WARNINGS - -#if defined( _WIN32 ) || defined( _WIN64 ) -#include <windows.h> -#else /* Non-Windows */ -#include <unistd.h> -#include <sys/resource.h> -#endif - -#include <starpu.h> -#include "coreblas/lapacke.h" -#include "chameleon.h" -#include "control/common.h" - -/* Common functions for all steps of the tutorial */ - -static void get_thread_count(int *thrdnbr) { -#if defined WIN32 || defined WIN64 - sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr ); -#else - *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN); -#endif -} - -static int startswith(const char *s, const char *prefix) { - size_t n = strlen( prefix ); - if (strncmp( s, prefix, n )) - return 0; - return 1; -} - - -/* define complexity of algorithms - see Lawn 41 page 120 */ -#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) -#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) -#define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) -#define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) - -/* define some tools to time the program */ -#include <chameleon/timer.h> - -/* Integer parameters */ -enum iparam_ooc { - IPARAM_THRDNBR, /* Number of cores */ - IPARAM_N, /* Number of columns of the matrix */ - IPARAM_NB, /* Number of columns in a tile */ - IPARAM_NRHS, /* Number of RHS */ - IPARAM_OUTOFCORE, /* if > 0 --> how many memory accepted incore */ - /* else --> do not use ooc. */ - /* End */ - IPARAM_SIZEOF -}; - -/* Specific routines */ - -/** - * Initialize integer parameters - */ -static void init_iparam(int iparam[IPARAM_SIZEOF]){ - iparam[IPARAM_THRDNBR ] = -1; - iparam[IPARAM_N ] = 500; - iparam[IPARAM_NB ] = 128; - iparam[IPARAM_NRHS ] = 1; - iparam[IPARAM_OUTOFCORE ] = 2000; - } - -/** - * Print how to use the program - */ -static void show_help(char *prog_name) { - printf( "Usage:\n%s [options]\n\n", prog_name ); - printf( "Options are:\n" - " --help Show this help\n" - "\n" - " --n=X dimension (N). (default: 500)\n" - " --nb=X NB size. (default: 128)\n" - " --nrhs=X number of RHS. (default: 1)\n" - "\n" - " --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n" - " --ooc=N Allow to store N MiB in main memory. (default: )\n" - "\n"); -} - -/** - * Read arguments following ooc program call - */ -static void read_args(int argc, char *argv[], int *iparam){ - int i; - for (i = 1; i < argc && argv[i]; ++i) { - if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") || - startswith( argv[i], "--h") || startswith( argv[i], "-h") ) { - show_help( argv[0] ); - exit(0); - } else if (startswith( argv[i], "--n=" )) { - sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) ); - } else if (startswith( argv[i], "--nb=" )) { - sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) ); - } else if (startswith( argv[i], "--nrhs=" )) { - sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) ); - } else if (startswith( argv[i], "--threads=" )) { - sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) ); - } else if (startswith( argv[i], "--ooc=" )) { - sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_OUTOFCORE]) ); - } else { - fprintf( stderr, "Unknown option: %s\n", argv[i] ); - } - } -} - -/** - * Print a header message to summarize main parameters - */ -static void print_header(char *prog_name, int * iparam) { -#if defined(CHAMELEON_SIMULATION) - double eps = 0.; -#else - double eps = LAPACKE_dlamch_work( 'e' ); -#endif - - printf( "#\n" - "# CHAMELEON %d.%d.%d, %s\n" - "# Nb threads: %d\n" - "# Nb gpus: %d\n" - "# N: %d\n" - "# NB: %d\n" - "# IB: %d\n" - "# eps: %e\n" - "# ooc: %d\n" - "#\n", - CHAMELEON_VERSION_MAJOR, - CHAMELEON_VERSION_MINOR, - CHAMELEON_VERSION_MICRO, - prog_name, - iparam[IPARAM_THRDNBR], - 0, - iparam[IPARAM_N], - iparam[IPARAM_NB], - 32, - eps, - iparam[IPARAM_OUTOFCORE]); - - printf( "# M N K/NRHS seconds Gflop/s\n"); - printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]); - fflush( stdout ); - return; -} - -// Checking if all block size is a multiple of 4096 Bytes -static int -will_o_direct_work(int nb) { - if ((nb * nb * sizeof(float)) % 4096 != 0) - return 0; - return 1; -} - -static void -print_o_direct_wont_work(void) { - fprintf(stderr, "\n[chameleon] Using out-of-core in o_direct force your blocks' size to be\n" - "multiples of 4096. Tip : chose 'n' and 'nb' as both multiples of 32.\n"); -} - -#endif /* _out_of_core_h_ */ diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index 9dde64b0a19ad5add0d7976972792e888f3de805..bbd7e80c659d8e4981950cf6c509adf116e3f865 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -56,7 +56,7 @@ RUNTIME_context_destroy( CHAM_context_t *ctxt ); /** * @brief Enable a global option of the runtime. - * @warning Should be called only by RUNTIME_Enable() + * @warning Should be called only by CHAMELEON_Enable() * * @param[in] option * @arg CHAMELEON_PROFILING_MODE: start the profiling mode of the runtime. @@ -66,7 +66,7 @@ RUNTIME_enable( int option ); /** * @brief Disable a global option of the runtime. - * @warning Should be called only by RUNTIME_Disable() + * @warning Should be called only by CHAMELEON_Disable() * * @param[in] option * @arg CHAMELEON_PROFILING_MODE: stop the profiling mode of the runtime. diff --git a/timing/CTestLists.cmake b/timing/CTestLists.cmake index 9fd359c56eb65d8a4882ab33948c6d0dd687167c..529571f4de521913aad8dd35f50baa2829582396 100644 --- a/timing/CTestLists.cmake +++ b/timing/CTestLists.cmake @@ -2,14 +2,14 @@ # Check timing/ # -set(TEST_CMD_shm --n_range=500:2000:500 --nb=320 ) -set(TEST_CMD_shmgpu --n_range=500:2000:500 --nb=320 --gpus=1) -set(TEST_CMD_mpi --n_range=500:2000:500 --nb=320 --P=2) -set(TEST_CMD_mpigpu --n_range=500:2000:500 --nb=320 --P=2 --gpus=1) +set(TEST_CMD_shm --n_range=17:407:39 --nb=32 --ib=7 ) +set(TEST_CMD_shmgpu --n_range=170:4070:390 --nb=320 --ib=48 --gpus=1 ) +set(TEST_CMD_mpi --n_range=17:407:39 --nb=32 --ib=7 --P=2 ) +set(TEST_CMD_mpigpu --n_range=170:4070:390 --nb=320 --ib=48 --P=2 --gpus=1) -set(MPI_CMD_shm ) +set(MPI_CMD_shm ) set(MPI_CMD_shmgpu ) -set(MPI_CMD_mpi mpirun -np 4) +set(MPI_CMD_mpi mpirun -np 4) set(MPI_CMD_mpigpu mpirun -np 4) if (NOT CHAMELEON_SIMULATION) @@ -62,10 +62,29 @@ if (NOT CHAMELEON_SIMULATION) endforeach() endforeach() + if ( CHAMELEON_SCHED_STARPU ) + foreach(cat ${TEST_CATEGORIES}) + foreach(prec ${RP_CHAMELEON_PRECISIONS}) + string(TOUPPER ${prec} PREC) + if (CHAMELEON_PREC_${PREC}) + foreach(test ${TESTLIST}) + add_test(time_ooc_${cat}_${prec}${test} STARPU_DISK_SWAP=/tmp STARPU_LIMIT_CPU_MEM=1 ${MPI_CMD_${cat}} ./time_${prec}${test}_tile ${TEST_CMD_${cat}} --ooc --check) + endforeach() + endif() + endforeach() + foreach(prec ${CHAMELEON_PRECISIONS_ZC}) + string(TOUPPER ${prec} PREC) + if (CHAMELEON_PREC_${PREC}) + foreach(test ${TESTLIST_ZC}) + add_test(time_ooc_${cat}_${prec}${test} STARPU_DISK_SWAP=/tmp STARPU_LIMIT_CPU_MEM=1 ${MPI_CMD_${cat}} ./time_${prec}${test}_tile ${TEST_CMD_${cat}} --ooc --check) + endforeach() + endif() + endforeach() + endforeach() + endif() + if (CHAMELEON_USE_MPI AND MPI_C_FOUND) set( TEST_CATEGORIES mpi ) - set( TEST_CMD_mpi --P=2 --n_range=2000:2000:1 --nb=32) - set( TEST_CMD_mpigpu --P=2 --n_range=2000:2000:1 --nb=32 --gpus=1) #set( TEST_CATEGORIES ${TEST_CATEGORIES} mpi ) #if (CHAMELEON_USE_CUDA AND CUDA_FOUND) # set( TEST_CATEGORIES ${TEST_CATEGORIES} mpigpu )