Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 06beb57c authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Starpu - OOC

parent 75123d32
No related branches found
No related tags found
No related merge requests found
......@@ -113,7 +113,7 @@ test_starpu_branches:
test_starpu_master:
<<: *test_script
variables:
TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\""
TESTS_RESTRICTION: "-R \"_shm_|_mpi_\""
VERSION: starpu
dependencies:
- build_starpu
......@@ -187,7 +187,7 @@ test_quark_branches:
test_quark_master:
<<: *test_script
variables:
TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\""
TESTS_RESTRICTION: "-R \"_shm_|_mpi_\""
VERSION: quark
dependencies:
- build_quark
......@@ -223,7 +223,7 @@ test_parsec_branches:
test_parsec_master:
<<: *test_script
variables:
TESTS_RESTRICTION: "-R \"test_shm_|test_mpi_\""
TESTS_RESTRICTION: "-R \"_shm_|_mpi_\""
VERSION: parsec
dependencies:
- build_parsec
......
......@@ -67,11 +67,11 @@ CHAM_context_t *chameleon_context_create()
chamctxt->ncudas = 0;
chamctxt->nthreads_per_worker= 1;
chamctxt->warnings_enabled = CHAMELEON_TRUE;
chamctxt->autotuning_enabled = CHAMELEON_TRUE;
chamctxt->parallel_enabled = CHAMELEON_FALSE;
chamctxt->profiling_enabled = CHAMELEON_FALSE;
chamctxt->progress_enabled = CHAMELEON_FALSE;
chamctxt->warnings_enabled = CHAMELEON_TRUE;
chamctxt->autotuning_enabled = CHAMELEON_TRUE;
chamctxt->parallel_enabled = CHAMELEON_FALSE;
chamctxt->profiling_enabled = CHAMELEON_FALSE;
chamctxt->progress_enabled = CHAMELEON_FALSE;
chamctxt->householder = ChamFlatHouseholder;
chamctxt->translation = ChamOutOfPlace;
......@@ -260,7 +260,7 @@ int CHAMELEON_Disable(int option)
* \retval CHAMELEON_SUCCESS successful exit
*
*/
int CHAMELEON_Set(int param, int value)
int CHAMELEON_Set( int param, int value )
{
CHAM_context_t *chamctxt;
......
......@@ -97,11 +97,11 @@ static int nbdesc = 0;
*
*/
CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j,
int m, int n, int p, int q,
void* (*get_blkaddr)( const CHAM_desc_t*, int, int ),
int (*get_blkldd) ( const CHAM_desc_t*, int ),
int (*get_rankof) ( const CHAM_desc_t*, int, int ))
int lm, int ln, int i, int j,
int m, int n, int p, int q,
void* (*get_blkaddr)( const CHAM_desc_t*, int, int ),
int (*get_blkldd) ( const CHAM_desc_t*, int ),
int (*get_rankof) ( const CHAM_desc_t*, int, int ))
{
CHAM_context_t *chamctxt;
CHAM_desc_t desc;
......@@ -195,22 +195,22 @@ CHAM_desc_t chameleon_desc_init_user(cham_flttype_t dtyp, int mb, int nb, int bs
* Internal static descriptor initializer
*/
CHAM_desc_t chameleon_desc_init(cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j,
int m, int n, int p, int q)
int lm, int ln, int i, int j,
int m, int n, int p, int q)
{
return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d);
chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d);
}
/**
* Internal static descriptor initializer for a block diagonal matrix
*/
CHAM_desc_t chameleon_desc_init_diag(cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j,
int m, int n, int p, int q)
int lm, int ln, int i, int j,
int m, int n, int p, int q)
{
return chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d_diag);
chameleon_getaddr_ccrb, chameleon_getblkldd_ccrb, chameleon_getrankof_2d_diag);
}
/**
......@@ -394,7 +394,7 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc )
*
*/
int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q)
int lm, int ln, int i, int j, int m, int n, int p, int q)
{
CHAM_context_t *chamctxt;
CHAM_desc_t *desc;
......@@ -508,10 +508,10 @@ int CHAMELEON_Desc_Create(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp,
*
*/
int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q,
void* (*get_blkaddr)( const CHAM_desc_t*, int, int ),
int (*get_blkldd) ( const CHAM_desc_t*, int ),
int (*get_rankof) ( const CHAM_desc_t*, int, int ))
int lm, int ln, int i, int j, int m, int n, int p, int q,
void* (*get_blkaddr)( const CHAM_desc_t*, int, int ),
int (*get_blkldd) ( const CHAM_desc_t*, int ),
int (*get_rankof) ( const CHAM_desc_t*, int, int ))
{
CHAM_context_t *chamctxt;
CHAM_desc_t *desc;
......@@ -533,7 +533,7 @@ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t
}
*desc = chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
get_blkaddr, get_blkldd, get_rankof);
get_blkaddr, get_blkldd, get_rankof);
/* if the user gives a pointer to the overall data (tiles) we can use it */
desc->use_mat = (mat == NULL) ? 0 : 1;
......@@ -605,8 +605,8 @@ int CHAMELEON_Desc_Create_User(CHAM_desc_t **descptr, void *mat, cham_flttype_t
*
*/
int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q,
int (*get_rankof)( const CHAM_desc_t*, int, int ))
int lm, int ln, int i, int j, int m, int n, int p, int q,
int (*get_rankof)( const CHAM_desc_t*, int, int ))
{
#if !defined (CHAMELEON_SCHED_STARPU)
(void)descptr; (void)dtyp; (void)mb; (void)nb; (void)bsiz;
......@@ -614,7 +614,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i
(void)get_rankof;
chameleon_error("CHAMELEON_Desc_Create_OOC_User", "Only StarPU supports on-demand tile allocation");
return CHAMELEON_ERR_NOT_INITIALIZED;
return CHAMELEON_ERR_NOT_SUPPORTED;
#else
CHAM_context_t *chamctxt;
CHAM_desc_t *desc;
......@@ -627,14 +627,15 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i
chameleon_error("CHAMELEON_Desc_Create_OOC_User", "CHAMELEON not initialized");
return CHAMELEON_ERR_NOT_INITIALIZED;
}
/* Allocate memory and initialize the descriptor */
desc = (CHAM_desc_t*)malloc(sizeof(CHAM_desc_t));
if (desc == NULL) {
if ( desc == NULL ) {
chameleon_error("CHAMELEON_Desc_Create_OOC_User", "malloc() failed");
return CHAMELEON_ERR_OUT_OF_RESOURCES;
}
*desc = chameleon_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
chameleon_getaddr_null, NULL, get_rankof);
*desc = chameleon_desc_init_user( dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
chameleon_getaddr_null, NULL, get_rankof );
/* memory of the matrix is completely handled by runtime */
desc->use_mat = 0;
......@@ -701,11 +702,11 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i
*
*/
int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q)
int lm, int ln, int i, int j, int m, int n, int p, int q)
{
return CHAMELEON_Desc_Create_OOC_User( descptr, dtyp, mb, nb, bsiz,
lm, ln, i, j, m, n, p, q,
chameleon_getrankof_2d );
lm, ln, i, j, m, n, p, q,
chameleon_getrankof_2d );
}
/**
......@@ -722,8 +723,7 @@ int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb
*
******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* @retval CHAMELEON_SUCCESS successful exit
*
*/
int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc)
......@@ -763,8 +763,7 @@ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc)
*
******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* @retval CHAMELEON_SUCCESS successful exit
*
*/
int CHAMELEON_Desc_Acquire (CHAM_desc_t *desc) {
......@@ -787,8 +786,7 @@ int CHAMELEON_Desc_Acquire (CHAM_desc_t *desc) {
*
******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* @retval CHAMELEON_SUCCESS successful exit
*
*/
int CHAMELEON_Desc_Release (CHAM_desc_t *desc) {
......@@ -800,7 +798,9 @@ int CHAMELEON_Desc_Release (CHAM_desc_t *desc) {
*
* @ingroup Descriptor
*
* CHAMELEON_Desc_Flush - Flushes the data in the sequence when they won't be reused. This calls cleans up the distributed communication caches, and transfer the data back to the CPU.
* CHAMELEON_Desc_Flush - Flushes the data in the sequence when they won't be
* reused. This calls cleans up the distributed communication caches, and
* transfer the data back to the CPU.
*
******************************************************************************
*
......@@ -809,12 +809,11 @@ int CHAMELEON_Desc_Release (CHAM_desc_t *desc) {
*
******************************************************************************
*
* @return
* \retval CHAMELEON_SUCCESS successful exit
* @retval CHAMELEON_SUCCESS successful exit
*
*/
int CHAMELEON_Desc_Flush( CHAM_desc_t *desc,
RUNTIME_sequence_t *sequence )
RUNTIME_sequence_t *sequence )
{
RUNTIME_desc_flush( desc, sequence );
return CHAMELEON_SUCCESS;
......@@ -838,11 +837,6 @@ int CHAMELEON_Desc_Flush( CHAM_desc_t *desc,
* @param[in] user_tag_sep
* The new value for tag_sep.
*
******************************************************************************
*
* @return
* \retval none
*
*/
void CHAMELEON_user_tag_size(int user_tag_width, int user_tag_sep) {
RUNTIME_comm_set_tag_sizes( user_tag_width, user_tag_sep );
......
......@@ -290,6 +290,18 @@
(simple and double precision) on /mirage/ and /sirocco/ machines are
available for now. Database of models is subject to change.
*** Use out of core support with StarPU
<<sec:ooc>>
If the matrix can not fit in the main memory, StarPU can automatically evict
tiles to the disk. The following variables need to be set:
* *STARPU_DISK_SWAP* environment variable to a place where to store
evicted tiles, for example: ~STARPU_DISK_SWAP=/tmp~
* *STARPU_DISK_SWAP_BACKEND* environment variable to the I/O method,
for example: ~STARPU_DISK_SWAP_BACKEND=unistd_o_direct~
* *STARPU_LIMIT_CPU_MEM* environment variable to the amount of memory
that can be used in MBytes, for example: ~STARPU_LIMIT_CPU_MEM=1000~
** Chameleon API
Chameleon provides routines to solve dense general systems of
......
......@@ -27,11 +27,6 @@ endif()
if (CHAMELEON_PREC_D)
add_subdirectory(lapack_to_chameleon)
if (CHAMELEON_SCHED_STARPU)
if (${STARPU_VERSION_MAJOR} GREATER 0 AND ${STARPU_VERSION_MINOR} GREATER 1)
add_subdirectory(out_of_core)
endif()
endif()
else()
message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_chameleon "
"and out_core tutorials cannot be built (use only double arithmetic "
......
###
#
# @file CMakeLists.txt
#
# @copyright 2009-2014 The University of Tennessee and The University of
# Tennessee Research Foundation. All rights reserved.
# @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
#
# CHAMELEON example routines
# CHAMELEON is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
# University of Bordeaux, Bordeaux INP
#
# @version 1.0.0
# @author Florent Pruvost
# @date 2016-08-23
#
###
if (NOT CHAMELEON_SCHED_STARPU)
message(ERROR "This directory should not be included if CHAMELEON_SCHED_STARPU is not enabled")
endif()
if(CHAMELEON_SIMULATION)
message(ERROR "This directory should not be included if CHAMELEON_SIMULATION is enabled")
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# list of sources
set(OOC_SOURCES
out_of_core.c
)
# Define what libraries we have to link with
# ------------------------------------------
unset(libs_for_ooc)
# ooc executable depends on chameleon, starpu and lapacke (already chameleon's dependencies)
list(APPEND libs_for_ooc chameleon)
# message(STATUS "libs_for_ooc: ${libs_for_ooc}")
foreach(_ooc ${OOC_SOURCES})
get_filename_component(_name_exe ${_ooc} NAME_WE)
add_executable(${_name_exe} ${_ooc})
set_property(TARGET ${_name_exe} PROPERTY LINKER_LANGUAGE Fortran)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -undefined dynamic_lookup" )
endif()
target_link_libraries(${_name_exe} ${libs_for_ooc})
install(TARGETS ${_name_exe}
DESTINATION bin/example/out_of_core)
endforeach()
#-------- Tests ---------
include(CTestLists.cmake)
###
### END CMakeLists.txt
###
#
# Check Example out_of_core
#
set(TESTLIST
out_of_core
)
# OOC tests required to create a directory where the data will be written on disk
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ooc)
# define tests
foreach(test ${TESTLIST})
add_test(example_ooc_${test} ./${test})
endforeach()
/**
*
* @file out_of_core.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon out_of_core example
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2014-10-29
*
*/
#include "out_of_core.h"
/*
* @brief ooc is driver example routine to test the out-of-core feature with StarPU
* @details TODO: write some details
*/
int main(int argc, char *argv[]) {
size_t N; // matrix order
int NB; // number of rows and columns in tiles
int NRHS; // number of RHS vectors
int NCPU; // number of cores to use
int NGPU; // number of gpus (cuda devices) to use
int UPLO = ChamUpper; // where is stored L
/* descriptors necessary for calling CHAMELEON tile interface */
CHAM_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL;
/* declarations to time the program and evaluate performances */
double fmuls, fadds, flops, gflops, cpu_time;
/* variable to check the numerical results */
double anorm, bnorm, xnorm, eps, res;
int hres;
/* initialize some parameters with default values */
int iparam[IPARAM_SIZEOF];
memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));
init_iparam(iparam);
/* read arguments */
read_args(argc, argv, iparam);
N = iparam[IPARAM_N];
NB = iparam[IPARAM_NB];
NRHS = iparam[IPARAM_NRHS];
/* compute the algorithm complexity to evaluate performances */
fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) );
fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) );
flops = 1e-9 * (fmuls + fadds);
/* initialize the number of thread if not given by the user in argv */
if ( iparam[IPARAM_THRDNBR] == -1 ) {
get_thread_count( &(iparam[IPARAM_THRDNBR]) );
}
NCPU = iparam[IPARAM_THRDNBR];
NGPU = 0;
/* print informations to user */
print_header( argv[0], iparam);
/* check that o direct will work */
if (iparam[IPARAM_OUTOFCORE] > 0) {
if (! will_o_direct_work(NB)) {
print_o_direct_wont_work();
return EXIT_FAILURE;
}
char maxMemoryAllowed[32];
sprintf (maxMemoryAllowed, "%d", iparam[IPARAM_OUTOFCORE]);
setenv ("STARPU_LIMIT_CPU_MEM", maxMemoryAllowed, 1);
}
/* Initialize CHAMELEON with main parameters */
if ( CHAMELEON_Init( NCPU, NGPU ) != CHAMELEON_SUCCESS ) {
fprintf(stderr, "Error initializing CHAMELEON library\n");
return EXIT_FAILURE;
}
CHAMELEON_Set(CHAMELEON_TILE_SIZE, NB);
/* limit ram memory */
if (iparam[IPARAM_OUTOFCORE] > 0) {
int new_dd = starpu_disk_register (&starpu_disk_unistd_o_direct_ops,
(void*) "./ooc/", 1024*1024*10);
if (new_dd == -ENOENT){
fprintf(stderr, "Can't write on ./ooc/\n");
return EXIT_FAILURE;
}
}
CHAMELEON_Desc_Create_User(&descA, NULL, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1,
chameleon_getaddr_null, // specific function
chameleon_getblkldd_ccrb,
chameleon_getrankof_2d);
CHAMELEON_Desc_Create(&descB, NULL, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1);
CHAMELEON_Desc_Create(&descX, NULL, ChamRealDouble,
NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1);
CHAMELEON_Desc_Create(&descAC, NULL, ChamRealDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1);
/* generate A matrix with random values such that it is spd */
CHAMELEON_dplgsy_Tile( (double)N, ChamUpperLower, descA, 51 );
/* generate RHS */
CHAMELEON_dplrnt_Tile( descB, 5673 );
/* copy A before facto. in order to check the result */
CHAMELEON_dlacpy_Tile(ChamUpperLower, descA, descAC);
/* copy B in X before solving
* same sense as memcpy(X, B, N*NRHS*sizeof(double)) but for descriptors */
CHAMELEON_dlacpy_Tile(ChamUpperLower, descB, descX);
/************************************************************/
/* solve the system AX = B using the Cholesky factorization */
/************************************************************/
cpu_time = -CHAMELEON_timer();
/* Cholesky factorization:
* A is replaced by its factorization L or L^T depending on uplo */
CHAMELEON_dpotrf_Tile( UPLO, descA );
/* Solve:
* B is stored in X on entry, X contains the result on exit.
* Forward and back substitutions
*/
CHAMELEON_dpotrs_Tile( UPLO, descA, descX );
cpu_time += CHAMELEON_timer();
/* print informations to user */
gflops = flops / cpu_time;
printf( "%9.3f %9.2f\n", cpu_time, gflops);
fflush( stdout );
/************************************************************/
/* check if solve is correct i.e. AX-B = 0 */
/************************************************************/
/* compute norms to check the result */
anorm = CHAMELEON_dlange_Tile( ChamInfNorm, descAC);
bnorm = CHAMELEON_dlange_Tile( ChamInfNorm, descB);
xnorm = CHAMELEON_dlange_Tile( ChamInfNorm, descX);
/* compute A*X-B, store the result in B */
CHAMELEON_dgemm_Tile( ChamNoTrans, ChamNoTrans,
1.0, descAC, descX, -1.0, descB );
res = CHAMELEON_dlange_Tile( ChamInfNorm, descB );
/* check residual and print a message */
eps = LAPACKE_dlamch_work( 'e' );
/*
* if hres = 0 then the test succeed
* else the test failed
*/
hres = ( (res / N / eps / (anorm * xnorm + bnorm )) > 100.0 );
printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n");
if (hres) {
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
}
else {
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
}
/* free descriptors descA, descB, descX, descAC */
CHAMELEON_Desc_Destroy( &descA );
CHAMELEON_Desc_Destroy( &descB );
CHAMELEON_Desc_Destroy( &descX );
CHAMELEON_Desc_Destroy( &descAC );
/* Finalize CHAMELEON */
CHAMELEON_Finalize();
return EXIT_SUCCESS;
}
/**
*
* @file out_of_core.h
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon out_of_core example header
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2016-08-23
*
*/
#ifndef _out_of_core_h_
#define _out_of_core_h_
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#if defined( _WIN32 ) || defined( _WIN64 )
#define int64_t __int64
#endif
/* Define these so that the Microsoft VC compiler stops complaining
about scanf and friends */
#define _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_WARNINGS
#if defined( _WIN32 ) || defined( _WIN64 )
#include <windows.h>
#else /* Non-Windows */
#include <unistd.h>
#include <sys/resource.h>
#endif
#include <starpu.h>
#include "coreblas/lapacke.h"
#include "chameleon.h"
#include "control/common.h"
/* Common functions for all steps of the tutorial */
static void get_thread_count(int *thrdnbr) {
#if defined WIN32 || defined WIN64
sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr );
#else
*thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
static int startswith(const char *s, const char *prefix) {
size_t n = strlen( prefix );
if (strncmp( s, prefix, n ))
return 0;
return 1;
}
/* define complexity of algorithms - see Lawn 41 page 120 */
#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.)))
#define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
#define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
/* define some tools to time the program */
#include <chameleon/timer.h>
/* Integer parameters */
enum iparam_ooc {
IPARAM_THRDNBR, /* Number of cores */
IPARAM_N, /* Number of columns of the matrix */
IPARAM_NB, /* Number of columns in a tile */
IPARAM_NRHS, /* Number of RHS */
IPARAM_OUTOFCORE, /* if > 0 --> how many memory accepted incore */
/* else --> do not use ooc. */
/* End */
IPARAM_SIZEOF
};
/* Specific routines */
/**
* Initialize integer parameters
*/
static void init_iparam(int iparam[IPARAM_SIZEOF]){
iparam[IPARAM_THRDNBR ] = -1;
iparam[IPARAM_N ] = 500;
iparam[IPARAM_NB ] = 128;
iparam[IPARAM_NRHS ] = 1;
iparam[IPARAM_OUTOFCORE ] = 2000;
}
/**
* Print how to use the program
*/
static void show_help(char *prog_name) {
printf( "Usage:\n%s [options]\n\n", prog_name );
printf( "Options are:\n"
" --help Show this help\n"
"\n"
" --n=X dimension (N). (default: 500)\n"
" --nb=X NB size. (default: 128)\n"
" --nrhs=X number of RHS. (default: 1)\n"
"\n"
" --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n"
" --ooc=N Allow to store N MiB in main memory. (default: )\n"
"\n");
}
/**
* Read arguments following ooc program call
*/
static void read_args(int argc, char *argv[], int *iparam){
int i;
for (i = 1; i < argc && argv[i]; ++i) {
if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") ||
startswith( argv[i], "--h") || startswith( argv[i], "-h") ) {
show_help( argv[0] );
exit(0);
} else if (startswith( argv[i], "--n=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) );
} else if (startswith( argv[i], "--nb=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
} else if (startswith( argv[i], "--nrhs=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) );
} else if (startswith( argv[i], "--threads=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) );
} else if (startswith( argv[i], "--ooc=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_OUTOFCORE]) );
} else {
fprintf( stderr, "Unknown option: %s\n", argv[i] );
}
}
}
/**
* Print a header message to summarize main parameters
*/
static void print_header(char *prog_name, int * iparam) {
#if defined(CHAMELEON_SIMULATION)
double eps = 0.;
#else
double eps = LAPACKE_dlamch_work( 'e' );
#endif
printf( "#\n"
"# CHAMELEON %d.%d.%d, %s\n"
"# Nb threads: %d\n"
"# Nb gpus: %d\n"
"# N: %d\n"
"# NB: %d\n"
"# IB: %d\n"
"# eps: %e\n"
"# ooc: %d\n"
"#\n",
CHAMELEON_VERSION_MAJOR,
CHAMELEON_VERSION_MINOR,
CHAMELEON_VERSION_MICRO,
prog_name,
iparam[IPARAM_THRDNBR],
0,
iparam[IPARAM_N],
iparam[IPARAM_NB],
32,
eps,
iparam[IPARAM_OUTOFCORE]);
printf( "# M N K/NRHS seconds Gflop/s\n");
printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]);
fflush( stdout );
return;
}
// Checking if all block size is a multiple of 4096 Bytes
static int
will_o_direct_work(int nb) {
if ((nb * nb * sizeof(float)) % 4096 != 0)
return 0;
return 1;
}
static void
print_o_direct_wont_work(void) {
fprintf(stderr, "\n[chameleon] Using out-of-core in o_direct force your blocks' size to be\n"
"multiples of 4096. Tip : chose 'n' and 'nb' as both multiples of 32.\n");
}
#endif /* _out_of_core_h_ */
......@@ -56,7 +56,7 @@ RUNTIME_context_destroy( CHAM_context_t *ctxt );
/**
* @brief Enable a global option of the runtime.
* @warning Should be called only by RUNTIME_Enable()
* @warning Should be called only by CHAMELEON_Enable()
*
* @param[in] option
* @arg CHAMELEON_PROFILING_MODE: start the profiling mode of the runtime.
......@@ -66,7 +66,7 @@ RUNTIME_enable( int option );
/**
* @brief Disable a global option of the runtime.
* @warning Should be called only by RUNTIME_Disable()
* @warning Should be called only by CHAMELEON_Disable()
*
* @param[in] option
* @arg CHAMELEON_PROFILING_MODE: stop the profiling mode of the runtime.
......
......@@ -2,14 +2,14 @@
# Check timing/
#
set(TEST_CMD_shm --n_range=500:2000:500 --nb=320 )
set(TEST_CMD_shmgpu --n_range=500:2000:500 --nb=320 --gpus=1)
set(TEST_CMD_mpi --n_range=500:2000:500 --nb=320 --P=2)
set(TEST_CMD_mpigpu --n_range=500:2000:500 --nb=320 --P=2 --gpus=1)
set(TEST_CMD_shm --n_range=17:407:39 --nb=32 --ib=7 )
set(TEST_CMD_shmgpu --n_range=170:4070:390 --nb=320 --ib=48 --gpus=1 )
set(TEST_CMD_mpi --n_range=17:407:39 --nb=32 --ib=7 --P=2 )
set(TEST_CMD_mpigpu --n_range=170:4070:390 --nb=320 --ib=48 --P=2 --gpus=1)
set(MPI_CMD_shm )
set(MPI_CMD_shm )
set(MPI_CMD_shmgpu )
set(MPI_CMD_mpi mpirun -np 4)
set(MPI_CMD_mpi mpirun -np 4)
set(MPI_CMD_mpigpu mpirun -np 4)
if (NOT CHAMELEON_SIMULATION)
......@@ -62,10 +62,29 @@ if (NOT CHAMELEON_SIMULATION)
endforeach()
endforeach()
if ( CHAMELEON_SCHED_STARPU )
foreach(cat ${TEST_CATEGORIES})
foreach(prec ${RP_CHAMELEON_PRECISIONS})
string(TOUPPER ${prec} PREC)
if (CHAMELEON_PREC_${PREC})
foreach(test ${TESTLIST})
add_test(time_ooc_${cat}_${prec}${test} STARPU_DISK_SWAP=/tmp STARPU_LIMIT_CPU_MEM=1 ${MPI_CMD_${cat}} ./time_${prec}${test}_tile ${TEST_CMD_${cat}} --ooc --check)
endforeach()
endif()
endforeach()
foreach(prec ${CHAMELEON_PRECISIONS_ZC})
string(TOUPPER ${prec} PREC)
if (CHAMELEON_PREC_${PREC})
foreach(test ${TESTLIST_ZC})
add_test(time_ooc_${cat}_${prec}${test} STARPU_DISK_SWAP=/tmp STARPU_LIMIT_CPU_MEM=1 ${MPI_CMD_${cat}} ./time_${prec}${test}_tile ${TEST_CMD_${cat}} --ooc --check)
endforeach()
endif()
endforeach()
endforeach()
endif()
if (CHAMELEON_USE_MPI AND MPI_C_FOUND)
set( TEST_CATEGORIES mpi )
set( TEST_CMD_mpi --P=2 --n_range=2000:2000:1 --nb=32)
set( TEST_CMD_mpigpu --P=2 --n_range=2000:2000:1 --nb=32 --gpus=1)
#set( TEST_CATEGORIES ${TEST_CATEGORIES} mpi )
#if (CHAMELEON_USE_CUDA AND CUDA_FOUND)
# set( TEST_CATEGORIES ${TEST_CATEGORIES} mpigpu )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment