From 3e6305c64d6f1adae85b25f7baeeae922c737d04 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Wed, 15 Feb 2017 19:19:24 +0100
Subject: [PATCH] Add Out-of-Core option

Add MORSE_Desc_Create_OOC, which is like MORSE_Desc_Create, but does not
actually allocate a matrix, thus letting the runtime allocate on-demand the
tiles, possibly pushing them to the disk.

Add a --ooc option to tests to enable this.
---
 control/descriptor.c                        | 87 +++++++++++++++++++++
 control/descriptor.h                        | 10 +++
 docs/texinfo/chapters/configuration.texi    | 23 ++++++
 docs/texinfo/chapters/using.texi            |  3 +
 example/out_of_core/out_of_core.h           |  9 ---
 include/morse.h.in                          |  4 +
 include/morse_struct.h                      |  1 +
 runtime/starpu/control/runtime_descriptor.c |  4 +
 timing/timing.c                             |  5 ++
 timing/timing.h                             |  7 +-
 10 files changed, 143 insertions(+), 10 deletions(-)

diff --git a/control/descriptor.c b/control/descriptor.c
index e5f4146b2..2d330b8c3 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -379,6 +379,93 @@ int MORSE_Desc_Create(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp, int mb, i
     return MORSE_SUCCESS;
 }
 
+/** ***************************************************************************
+ *
+ * @ingroup Descriptor
+ *
+ *  MORSE_Desc_Create_OOC - Create matrix descriptor for matrix which may not fit memory
+ *
+ ******************************************************************************
+ *
+ * @param[out] desc
+ *          On exit, descriptor of the matrix.
+ *
+ * @param[in] dtyp
+ *          Data type of the matrix:
+ *          @arg MorseRealFloat:     single precision real (S),
+ *          @arg MorseRealDouble:    double precision real (D),
+ *          @arg MorseComplexFloat:  single precision complex (C),
+ *          @arg MorseComplexDouble: double precision complex (Z).
+ *
+ * @param[in] nb
+ *          Number of rows and columns in a tile.
+ *
+ * @param[in] m
+ *          Number of rows of the entire matrix.
+ *
+ * @param[in] n
+ *          Number of columns of the entire matrix.
+ *
+ * @param[in] p
+ *          2d-block cyclic partitioning, number of tiles in rows.
+ *
+ * @param[in] q
+ *          2d-block cyclic partitioning, number of tiles in columns.
+ *
+ * @param[in] (*get_rankof)( const MORSE_desc_t *A, int m, int n)
+ *          A function that return the MPI rank of the tile A(m,n).
+ *
+ ******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *
+ *****************************************************************************/
+int MORSE_Desc_Create_OOC(MORSE_desc_t **desc, MORSE_enum dtyp, int mb, int nb, int bsiz,
+                          int lm, int ln, int i, int j, int m, int n, int p, int q,
+                          int (*get_rankof)( const MORSE_desc_t*, int, int ))
+{
+#if !defined (CHAMELEON_SCHED_STARPU)
+    morse_error("MORSE_Desc_Create_Tiles", "Only StarPU supports on-demand tile allocation");
+    return MORSE_ERR_NOT_INITIALIZED;
+#else
+    MORSE_context_t *morse;
+    int status;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_error("MORSE_Desc_Create_Tiles", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    /* Allocate memory and initialize the descriptor */
+    *desc = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+    if (*desc == NULL) {
+        morse_error("MORSE_Desc_Create_Tiles", "malloc() failed");
+        return MORSE_ERR_OUT_OF_RESOURCES;
+    }
+    **desc = morse_desc_init_user(dtyp, mb, nb, bsiz, lm, ln, i, j, m, n, p, q,
+        morse_getaddr_null, NULL, get_rankof);
+
+    /* memory of the matrix is completely handled by runtime */
+    (**desc).use_mat = 0;
+    (**desc).alloc_mat = 0;
+    (**desc).mat = NULL;
+
+    (**desc).ooc = 1;
+
+    /* Create scheduler structure like registering data */
+    RUNTIME_desc_create( *desc );
+
+    status = morse_desc_check(*desc);
+    if (status != MORSE_SUCCESS) {
+        morse_error("MORSE_Desc_Create_Tiles", "invalid descriptor");
+        return status;
+    }
+
+    return MORSE_SUCCESS;
+#endif
+}
+
 /** ***************************************************************************
  *
  * @ingroup Descriptor
diff --git a/control/descriptor.h b/control/descriptor.h
index 7d1cb2057..0d3d2173b 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -41,6 +41,7 @@ extern "C" {
 inline static void* morse_geteltaddr(const MORSE_desc_t *A, int m, int n, int eltsize);
 inline static void* morse_getaddr_cm    (const MORSE_desc_t *A, int m, int n);
 inline static void* morse_getaddr_ccrb  (const MORSE_desc_t *A, int m, int n);
+inline static void* morse_getaddr_null  (const MORSE_desc_t *A, int m, int n);
 inline static int   morse_getblkldd_cm  (const MORSE_desc_t *A, int m);
 inline static int   morse_getblkldd_ccrb(const MORSE_desc_t *A, int m);
 
@@ -120,6 +121,15 @@ inline static void *morse_getaddr_cm(const MORSE_desc_t *A, int m, int n)
     return (void*)((intptr_t)A->mat + (offset*eltsize) );
 }
 
+/*******************************************************************************
+ *  Internal function to return address of block (m,n) with m,n = block indices
+ *  This version lets the runtime allocate on-demand.
+ **/
+inline static void *morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
+{
+    return NULL;
+}
+
 /*******************************************************************************
  *  Internal function to return address of element A(m,n) with m,n = matrix indices
  **/
diff --git a/docs/texinfo/chapters/configuration.texi b/docs/texinfo/chapters/configuration.texi
index a147cf1f9..d57a476a9 100644
--- a/docs/texinfo/chapters/configuration.texi
+++ b/docs/texinfo/chapters/configuration.texi
@@ -344,3 +344,26 @@ Database of models is subject to change, it should be enrich in a near future.
 
 One can additionally decide to enable the magma kernels by setting the cmake
 option @option{-DCHAMELEON_SIMULATION_MAGMA=ON} .
+
+@node Use out of core support with StarPU
+@section Use out of core support with StarPU
+
+If the matrix can not fit in the main memory, StarPU can automatically evict
+tiles to the disk. The following variables need to be set:
+@itemize @bullet
+@item @env{STARPU_DISK_SWAP} environment variable to a place where to store
+evicted tiles, for example:
+  @example
+  @env{STARPU_DISK_SWAP}=/tmp
+  @end example
+@item @env{STARPU_DISK_SWAP_BACKEND} environment variable to the I/O method,
+for example:
+  @example
+  @env{STARPU_DISK_SWAP_BACKEND}=unistd_o_direct
+  @end example
+@item @env{STARPU_LIMIT_CPU_MEM} environment variable to the amount of memory
+that can be used in MBytes, for example:
+  @example
+  @env{STARPU_LIMIT_CPU_MEM}=1000
+  @end example
+@end itemize
diff --git a/docs/texinfo/chapters/using.texi b/docs/texinfo/chapters/using.texi
index 47bf96ddf..4cfd8e662 100644
--- a/docs/texinfo/chapters/using.texi
+++ b/docs/texinfo/chapters/using.texi
@@ -435,6 +435,9 @@ This can be achieved from different ways.
 @item Use the existing function @code{MORSE_Desc_Create}: means the
 matrix data are considered contiguous in memory as it is considered in PLASMA
 (@ref{Tile Data Layout}).
+@item Use the existing function @code{MORSE_Desc_Create_OOC}: means the
+matrix data is allocated on-demand in memory tile by tile, and possibly pushed
+to disk if that does not fit memory.
 @item Use the existing function @code{MORSE_Desc_Create_User}: it is more
 flexible than @code{Desc_Create} because you can give your own way to access to
 tile data so that your tiles can be allocated wherever you want in memory, see
diff --git a/example/out_of_core/out_of_core.h b/example/out_of_core/out_of_core.h
index dcb9e9702..95a25dcbc 100644
--- a/example/out_of_core/out_of_core.h
+++ b/example/out_of_core/out_of_core.h
@@ -260,13 +260,4 @@ print_o_direct_wont_work(void) {
                     "multiples of 4096. Tip : chose 'n' and 'nb' as both multiples of 32.\n");
 }
 
-/******************************************************************************
- *  Ffunction to return address of block (m,n) -> here NULL because memory is
- *  directly handled by StarPU
- **/
-inline static void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
-{
-    return (void*)( NULL );
-}
-
 #endif /* OOC_H */
diff --git a/include/morse.h.in b/include/morse.h.in
index 9473b6e1c..35a07e405 100644
--- a/include/morse.h.in
+++ b/include/morse.h.in
@@ -85,6 +85,10 @@ int MORSE_Element_Size(int type);
 int MORSE_Desc_Create  (MORSE_desc_t **desc, void *mat, MORSE_enum dtyp,
                         int mb, int nb, int bsiz, int lm, int ln,
                         int i, int j, int m, int n, int p, int q);
+int MORSE_Desc_Create_OOC (MORSE_desc_t **desc, MORSE_enum dtyp,
+                           int mb, int nb, int bsiz, int lm, int ln,
+                           int i, int j, int m, int n, int p, int q,
+                           int (*get_rankof)( const MORSE_desc_t*, int, int ));
 int MORSE_Desc_Create_User(MORSE_desc_t **desc, void *mat, MORSE_enum dtyp, int mb, int nb, int bsiz,
                            int lm, int ln, int i, int j, int m, int n, int p, int q,
                            void* (*get_blkaddr)( const MORSE_desc_t*, int, int ),
diff --git a/include/morse_struct.h b/include/morse_struct.h
index f307645d2..2aacefcdf 100644
--- a/include/morse_struct.h
+++ b/include/morse_struct.h
@@ -108,6 +108,7 @@ struct morse_desc_s {
     int alloc_mat;    // 1 if we handle the allocation of mat - else 0
     int register_mat; // 1 if we have to register mat - else 0 (handled by the application)
     int myrank;       // MPI rank of the descriptor
+    int ooc;          // 1 if the matrix is not to fit in memory
     void *schedopt;   // scheduler (QUARK|StarPU) specific structure
 };
 
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 3f6a1e285..dc7d6ba8e 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -279,6 +279,10 @@ int RUNTIME_desc_getoncpu( MORSE_desc_t *desc )
     int lnt = desc->lnt;
     int m, n;
 
+    if (desc->ooc)
+        /* May not even fit */
+        return MORSE_SUCCESS;
+
     for (n = 0; n < lnt; n++)
         for (m = 0; m < lmt; m++)
         {
diff --git a/timing/timing.c b/timing/timing.c
index d27120296..d2b73d49b 100644
--- a/timing/timing.c
+++ b/timing/timing.c
@@ -475,6 +475,7 @@ main(int argc, char *argv[]) {
     iparam[IPARAM_TRACE         ] = 0;
     iparam[IPARAM_DAG           ] = 0;
     iparam[IPARAM_ASYNC         ] = 1;
+    iparam[IPARAM_OOC           ] = 0;
     iparam[IPARAM_MX            ] = -1;
     iparam[IPARAM_NX            ] = -1;
     iparam[IPARAM_RHBLK         ] = 0;
@@ -549,6 +550,10 @@ main(int argc, char *argv[]) {
             iparam[IPARAM_ASYNC] = 0;
         } else if (startswith( argv[i], "--async" )) {
             iparam[IPARAM_ASYNC] = 1;
+        } else if (startswith( argv[i], "--ooc" )) {
+            iparam[IPARAM_OOC] = 1;
+        } else if (startswith( argv[i], "--noooc" )) {
+            iparam[IPARAM_OOC] = 0;
         } else if (startswith( argv[i], "--n_range=" )) {
             get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
         } else if (startswith( argv[i], "--m=" )) {
diff --git a/timing/timing.h b/timing/timing.h
index 3d33748a6..e4c0dc6fb 100644
--- a/timing/timing.h
+++ b/timing/timing.h
@@ -38,6 +38,7 @@ enum iparam_timing {
     IPARAM_TRACE,          /* Generate trace on the first non warmup run */
     IPARAM_DAG,            /* Do we require to output the DOT file?      */
     IPARAM_ASYNC,          /* Asynchronous calls                         */
+    IPARAM_OOC,            /* Out of Core                                */
     IPARAM_MX,             /* */
     IPARAM_NX,             /* */
     IPARAM_RHBLK,          /* Householder reduction parameter for QR/LQ  */
@@ -97,6 +98,7 @@ enum dparam_timing {
     int64_t MT    = (M%MB==0) ? (M/MB) : (M/MB+1); \
     int64_t NT    = (N%NB==0) ? (N/NB) : (N/NB+1); \
     int bigmat     = iparam[IPARAM_BIGMAT];         \
+    int ooc       = iparam[IPARAM_OOC];            \
     int check     = iparam[IPARAM_CHECK];          \
     int loud      = iparam[IPARAM_VERBOSE];        \
     (void)M;(void)N;(void)K;(void)NRHS;            \
@@ -109,7 +111,10 @@ enum dparam_timing {
     MORSE_desc_t *_desc_ = NULL;                                        \
     int status ## _desc_ ; \
     if( _cond_ ) {                                                      \
-       if (!bigmat) \
+       if (ooc) \
+           status ## _desc_ = MORSE_Desc_Create_OOC(&(_desc_), _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
+                   P, Q, NULL);\
+       else if (!bigmat) \
            status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
                           P, Q, morse_getaddr_null, NULL, NULL);\
        else \
-- 
GitLab