Commit 3e6305c6 authored by Samuel Thibault's avatar Samuel Thibault Committed by Mathieu Faverge

Add Out-of-Core option

Add MORSE_Desc_Create_OOC, which is like MORSE_Desc_Create, but does not
actually allocate a matrix, thus letting the runtime allocate on-demand the
tiles, possibly pushing them to the disk.

Add a --ooc option to tests to enable this.
parent 94cf0fb4
......@@ -379,6 +379,93 @@ int MORSE_Desc_Create(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp, int mb, i
return MORSE_SUCCESS;
}
/** ***************************************************************************
*
* @ingroup Descriptor
*
* MORSE_Desc_Create_OOC - Create matrix descriptor for matrix which may not fit memory
*
******************************************************************************
*
* @param[out] desc
* On exit, descriptor of the matrix.
*
* @param[in] dtyp
* Data type of the matrix:
* @arg MorseRealFloat: single precision real (S),
* @arg MorseRealDouble: double precision real (D),
* @arg MorseComplexFloat: single precision complex (C),
* @arg MorseComplexDouble: double precision complex (Z).
*
* @param[in] nb
* Number of rows and columns in a tile.
*
* @param[in] m
* Number of rows of the entire matrix.
*
* @param[in] n
* Number of columns of the entire matrix.
*
* @param[in] p
* 2d-block cyclic partitioning, number of tiles in rows.
*
* @param[in] q
* 2d-block cyclic partitioning, number of tiles in columns.
*
* @param[in] (*get_rankof)( const MORSE_desc_t *A, int m, int n)
* A function that return the MPI rank of the tile A(m,n).
*
******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
*
*****************************************************************************/
int MORSE_Desc_Create_OOC(MORSE_desc_t **desc, MORSE_enum dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q,
int (*get_rankof)( const MORSE_desc_t*, int, int ))
{
#if !defined (CHAMELEON_SCHED_STARPU)
morse_error("MORSE_Desc_Create_Tiles", "Only StarPU supports on-demand tile allocation");
return MORSE_ERR_NOT_INITIALIZED;
#else
MORSE_context_t *morse;
int status;
morse = morse_context_self();
if (morse == NULL) {
morse_error("MORSE_Desc_Create_Tiles", "MORSE not initialized");
return MORSE_ERR_NOT_INITIALIZED;
}
/* Allocate memory and initialize the descriptor */
*desc = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
if (*desc == NULL) {
morse_error("MORSE_Desc_Create_Tiles", "malloc() failed");
return MORSE_ERR_OUT_OF_RESOURCES;
}
**desc = morse_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
morse_getaddr_null, NULL, get_rankof);
/* memory of the matrix is completely handled by runtime */
(**desc).use_mat = 0;
(**desc).alloc_mat = 0;
(**desc).mat = NULL;
(**desc).ooc = 1;
/* Create scheduler structure like registering data */
RUNTIME_desc_create( *desc );
status = morse_desc_check(*desc);
if (status != MORSE_SUCCESS) {
morse_error("MORSE_Desc_Create_Tiles", "invalid descriptor");
return status;
}
return MORSE_SUCCESS;
#endif
}
/** ***************************************************************************
*
* @ingroup Descriptor
......
......@@ -41,6 +41,7 @@ extern "C" {
inline static void* morse_geteltaddr(const MORSE_desc_t *A, int m, int n, int eltsize);
inline static void* morse_getaddr_cm (const MORSE_desc_t *A, int m, int n);
inline static void* morse_getaddr_ccrb (const MORSE_desc_t *A, int m, int n);
inline static void* morse_getaddr_null (const MORSE_desc_t *A, int m, int n);
inline static int morse_getblkldd_cm (const MORSE_desc_t *A, int m);
inline static int morse_getblkldd_ccrb(const MORSE_desc_t *A, int m);
......@@ -120,6 +121,15 @@ inline static void *morse_getaddr_cm(const MORSE_desc_t *A, int m, int n)
return (void*)((intptr_t)A->mat + (offset*eltsize) );
}
/*******************************************************************************
* Internal function to return address of block (m,n) with m,n = block indices
* This version lets the runtime allocate on-demand.
**/
inline static void *morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
{
return NULL;
}
/*******************************************************************************
* Internal function to return address of element A(m,n) with m,n = matrix indices
**/
......
......@@ -344,3 +344,26 @@ Database of models is subject to change, it should be enrich in a near future.
One can additionally decide to enable the magma kernels by setting the cmake
option @option{-DCHAMELEON_SIMULATION_MAGMA=ON} .
@node Use out of core support with StarPU
@section Use out of core support with StarPU
If the matrix can not fit in the main memory, StarPU can automatically evict
tiles to the disk. The following variables need to be set:
@itemize @bullet
@item @env{STARPU_DISK_SWAP} environment variable to a place where to store
evicted tiles, for example:
@example
@env{STARPU_DISK_SWAP}=/tmp
@end example
@item @env{STARPU_DISK_SWAP_BACKEND} environment variable to the I/O method,
for example:
@example
@env{STARPU_DISK_SWAP_BACKEND}=unistd_o_direct
@end example
@item @env{STARPU_LIMIT_CPU_MEM} environment variable to the amount of memory
that can be used in MBytes, for example:
@example
@env{STARPU_LIMIT_CPU_MEM}=1000
@end example
@end itemize
......@@ -435,6 +435,9 @@ This can be achieved from different ways.
@item Use the existing function @code{MORSE_Desc_Create}: means the
matrix data are considered contiguous in memory as it is considered in PLASMA
(@ref{Tile Data Layout}).
@item Use the existing function @code{MORSE_Desc_Create_OOC}: means the
matrix data is allocated on-demand in memory tile by tile, and possibly pushed
to disk if that does not fit memory.
@item Use the existing function @code{MORSE_Desc_Create_User}: it is more
flexible than @code{Desc_Create} because you can give your own way to access to
tile data so that your tiles can be allocated wherever you want in memory, see
......
......@@ -260,13 +260,4 @@ print_o_direct_wont_work(void) {
"multiples of 4096. Tip : chose 'n' and 'nb' as both multiples of 32.\n");
}
/******************************************************************************
* Ffunction to return address of block (m,n) -> here NULL because memory is
* directly handled by StarPU
**/
inline static void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
{
return (void*)( NULL );
}
#endif /* OOC_H */
......@@ -85,6 +85,10 @@ int MORSE_Element_Size(int type);
int MORSE_Desc_Create (MORSE_desc_t **desc, void *mat, MORSE_enum dtyp,
int mb, int nb, int bsiz, int lm, int ln,
int i, int j, int m, int n, int p, int q);
int MORSE_Desc_Create_OOC (MORSE_desc_t **desc, MORSE_enum dtyp,
int mb, int nb, int bsiz, int lm, int ln,
int i, int j, int m, int n, int p, int q,
int (*get_rankof)( const MORSE_desc_t*, int, int ));
int MORSE_Desc_Create_User(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp, int mb, int nb, int bsiz,
int lm, int ln, int i, int j, int m, int n, int p, int q,
void* (*get_blkaddr)( const MORSE_desc_t*, int, int ),
......
......@@ -108,6 +108,7 @@ struct morse_desc_s {
int alloc_mat; // 1 if we handle the allocation of mat - else 0
int register_mat; // 1 if we have to register mat - else 0 (handled by the application)
int myrank; // MPI rank of the descriptor
int ooc; // 1 if the matrix is not to fit in memory
void *schedopt; // scheduler (QUARK|StarPU) specific structure
};
......
......@@ -279,6 +279,10 @@ int RUNTIME_desc_getoncpu( MORSE_desc_t *desc )
int lnt = desc->lnt;
int m, n;
if (desc->ooc)
/* May not even fit */
return MORSE_SUCCESS;
for (n = 0; n < lnt; n++)
for (m = 0; m < lmt; m++)
{
......
......@@ -475,6 +475,7 @@ main(int argc, char *argv[]) {
iparam[IPARAM_TRACE ] = 0;
iparam[IPARAM_DAG ] = 0;
iparam[IPARAM_ASYNC ] = 1;
iparam[IPARAM_OOC ] = 0;
iparam[IPARAM_MX ] = -1;
iparam[IPARAM_NX ] = -1;
iparam[IPARAM_RHBLK ] = 0;
......@@ -549,6 +550,10 @@ main(int argc, char *argv[]) {
iparam[IPARAM_ASYNC] = 0;
} else if (startswith( argv[i], "--async" )) {
iparam[IPARAM_ASYNC] = 1;
} else if (startswith( argv[i], "--ooc" )) {
iparam[IPARAM_OOC] = 1;
} else if (startswith( argv[i], "--noooc" )) {
iparam[IPARAM_OOC] = 0;
} else if (startswith( argv[i], "--n_range=" )) {
get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
} else if (startswith( argv[i], "--m=" )) {
......
......@@ -38,6 +38,7 @@ enum iparam_timing {
IPARAM_TRACE, /* Generate trace on the first non warmup run */
IPARAM_DAG, /* Do we require to output the DOT file? */
IPARAM_ASYNC, /* Asynchronous calls */
IPARAM_OOC, /* Out of Core */
IPARAM_MX, /* */
IPARAM_NX, /* */
IPARAM_RHBLK, /* Householder reduction parameter for QR/LQ */
......@@ -97,6 +98,7 @@ enum dparam_timing {
int64_t MT = (M%MB==0) ? (M/MB) : (M/MB+1); \
int64_t NT = (N%NB==0) ? (N/NB) : (N/NB+1); \
int bigmat = iparam[IPARAM_BIGMAT]; \
int ooc = iparam[IPARAM_OOC]; \
int check = iparam[IPARAM_CHECK]; \
int loud = iparam[IPARAM_VERBOSE]; \
(void)M;(void)N;(void)K;(void)NRHS; \
......@@ -109,7 +111,10 @@ enum dparam_timing {
MORSE_desc_t *_desc_ = NULL; \
int status ## _desc_ ; \
if( _cond_ ) { \
if (!bigmat) \
if (ooc) \
status ## _desc_ = MORSE_Desc_Create_OOC(&(_desc_), _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
P, Q, NULL);\
else if (!bigmat) \
status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
P, Q, morse_getaddr_null, NULL, NULL);\
else \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment