From 764be57d352afb5496c29afc0042a120bc1b4fc7 Mon Sep 17 00:00:00 2001 From: Florent Pruvost <florent.pruvost@inria.fr> Date: Thu, 25 Aug 2016 15:45:42 +0000 Subject: [PATCH] chameleon: add a out_of_core example, requires to handle the case where starpu handles itself the allocation of tiles -> we give a NULL pointer through the get_blkaddr function for registering --- example/CMakeLists.txt | 11 +- example/lapack_to_morse/CTestLists.cmake | 2 +- example/lapack_to_morse/step3.c | 2 +- example/out_of_core/CMakeLists.txt | 107 ++++++++ example/out_of_core/CTestLists.cmake | 11 + example/out_of_core/out_of_core.c | 193 ++++++++++++++ example/out_of_core/out_of_core.h | 272 ++++++++++++++++++++ include/runtime.h | 20 +- runtime/starpu/control/runtime_descriptor.c | 17 +- 9 files changed, 614 insertions(+), 21 deletions(-) create mode 100644 example/out_of_core/CMakeLists.txt create mode 100644 example/out_of_core/CTestLists.cmake create mode 100644 example/out_of_core/out_of_core.c create mode 100644 example/out_of_core/out_of_core.h diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 6983ac335..430392799 100755 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -28,11 +28,14 @@ add_subdirectory(basic_zposv) if (CHAMELEON_PREC_D) add_subdirectory(lapack_to_morse) + if (CHAMELEON_SCHED_STARPU) + add_subdirectory(out_of_core) + endif() else() - message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_morse" - "tutorial cannot be built (use only double arithmetic precision).\n" - "Please set CHAMELEON_PREC_D to ON if you want to build executables of" - "this tutorial.") + message(WARNING "CHAMELEON_PREC_D is set to OFF so that lapack_to_morse " + "and out_core tutorials cannot be built (use only double arithmetic " + "precision).\n Please set CHAMELEON_PREC_D to ON if you want to build " + "executables of this tutorial.") endif() ### diff --git a/example/lapack_to_morse/CTestLists.cmake b/example/lapack_to_morse/CTestLists.cmake index d6baba725..202140571 100644 --- a/example/lapack_to_morse/CTestLists.cmake +++ b/example/lapack_to_morse/CTestLists.cmake @@ -1,5 +1,5 @@ # -# Check Example basic_zposv +# Check Example lapack_to_morse # set(TESTLIST diff --git a/example/lapack_to_morse/step3.c b/example/lapack_to_morse/step3.c index 8f77be1a5..4677bd55f 100644 --- a/example/lapack_to_morse/step3.c +++ b/example/lapack_to_morse/step3.c @@ -34,7 +34,7 @@ */ int main(int argc, char *argv[]) { - size_t N; // matrix order + size_t N; // matrix order int NB; // number of rows and columns in tiles int NRHS; // number of RHS vectors int NCPU; // number of cores to use diff --git a/example/out_of_core/CMakeLists.txt b/example/out_of_core/CMakeLists.txt new file mode 100644 index 000000000..1b6c61fc8 --- /dev/null +++ b/example/out_of_core/CMakeLists.txt @@ -0,0 +1,107 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# @file CMakeLists.c +# +# MORSE example routines +# MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI, +# University of Bordeaux, Bordeaux INP +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 2016-08-23 +# +### + + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +# list of sources +set(OOC_SOURCES + out_of_core.c + ) + +# Define what libraries we have to link with +# ------------------------------------------ +unset(libs_for_ooc) +list(APPEND libs_for_ooc + chameleon + chameleon_starpu + ${STARPU_LIBRARIES_DEP} +) +link_directories(${STARPU_LIBRARY_DIRS}) + + +if(NOT CHAMELEON_SIMULATION) + + if(CHAMELEON_USE_CUDA OR CHAMELEON_USE_MAGMA) + list(APPEND libs_for_ooc + cudablas) + endif() + if(CHAMELEON_USE_CUDA) + list(APPEND libs_for_ooc + ${CUDA_LIBRARIES} + ) + link_directories(${CUDA_LIBRARY_DIRS}) + endif() + if(CHAMELEON_USE_MAGMA) + list(APPEND libs_for_ooc + ${MAGMA_LIBRARIES} + ) + link_directories(${MAGMA_LIBRARY_DIRS}) + endif() + + list(APPEND libs_for_ooc + coreblas + ${LAPACKE_LIBRARIES} + ${CBLAS_LIBRARIES} + ${LAPACK_SEQ_LIBRARIES} + ${BLAS_SEQ_LIBRARIES} + ${HWLOC_LIBRARIES} + ${EXTRA_LIBRARIES} + ) + + link_directories(${LAPACKE_LIBRARY_DIRS}) + link_directories(${LAPACK_LIBRARY_DIRS}) + link_directories(${CBLAS_LIBRARY_DIRS}) + link_directories(${BLAS_LIBRARY_DIRS}) + +else() + + list(APPEND libs_for_ooc + coreblas + simulapacke + simucblas + ${HWLOC_LIBRARIES} + ${EXTRA_LIBRARIES} + ) + +endif() + +link_directories(${HWLOC_LIBRARY_DIRS}) + + +# message(STATUS "libs examples: ${libs_for_ooc}") +foreach(_ooc ${OOC_SOURCES}) + get_filename_component(_name_exe ${_ooc} NAME_WE) + add_executable(${_name_exe} ${_ooc}) + set_property(TARGET ${_name_exe} PROPERTY LINKER_LANGUAGE Fortran) + target_link_libraries(${_name_exe} ${libs_for_ooc}) + install(TARGETS ${_name_exe} + DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/chameleon/example/out_of_core) +endforeach() + +#-------- Tests --------- +include(CTestLists.cmake) + +### +### END CMakeLists.txt +### diff --git a/example/out_of_core/CTestLists.cmake b/example/out_of_core/CTestLists.cmake new file mode 100644 index 000000000..1d02adac6 --- /dev/null +++ b/example/out_of_core/CTestLists.cmake @@ -0,0 +1,11 @@ +# +# Check Example out_of_core +# + +set(TESTLIST + out_of_core + ) + +foreach(test ${TESTLIST}) + add_test(example_ooc_${test} ./${test}) +endforeach() diff --git a/example/out_of_core/out_of_core.c b/example/out_of_core/out_of_core.c new file mode 100644 index 000000000..254e79410 --- /dev/null +++ b/example/out_of_core/out_of_core.c @@ -0,0 +1,193 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2014 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file ooc.c + * + * MORSE example routines + * MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI, + * University of Bordeaux, Bordeaux INP + * + * @version 1.0.0 + * @author Florent Pruvost + * @date 2014-10-29 + * + **/ + +#include "out_of_core.h" + +/* + * @brief ooc is driver example routine to test the out-of-core feature with StarPU + * @details TODO: write some details + */ +int main(int argc, char *argv[]) { + + size_t N; // matrix order + int NB; // number of rows and columns in tiles + int NRHS; // number of RHS vectors + int NCPU; // number of cores to use + int NGPU; // number of gpus (cuda devices) to use + int UPLO = MorseUpper; // where is stored L + + /* descriptors necessary for calling MORSE tile interface */ + MORSE_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL; + + /* declarations to time the program and evaluate performances */ + double fmuls, fadds, flops, gflops, cpu_time; + + /* variable to check the numerical results */ + double anorm, bnorm, xnorm, eps, res; + int hres; + + /* initialize some parameters with default values */ + int iparam[IPARAM_SIZEOF]; + memset(iparam, 0, IPARAM_SIZEOF*sizeof(int)); + init_iparam(iparam); + + /* read arguments */ + read_args(argc, argv, iparam); + N = iparam[IPARAM_N]; + NB = iparam[IPARAM_NB]; + NRHS = iparam[IPARAM_NRHS]; + + /* compute the algorithm complexity to evaluate performances */ + fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) ); + fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) ); + flops = 1e-9 * (fmuls + fadds); + gflops = 0.0; + cpu_time = 0.0; + + /* initialize the number of thread if not given by the user in argv */ + if ( iparam[IPARAM_THRDNBR] == -1 ) { + get_thread_count( &(iparam[IPARAM_THRDNBR]) ); + } + NCPU = iparam[IPARAM_THRDNBR]; + NGPU = 0; + + /* print informations to user */ + print_header( argv[0], iparam); + + /* check that o direct will work */ + if (iparam[IPARAM_OUTOFCORE] > 0) { + if (! will_o_direct_work(NB)) { + print_o_direct_wont_work(); + return EXIT_FAILURE; + } + char maxMemoryAllowed[32]; + sprintf (maxMemoryAllowed, "%d", iparam[IPARAM_OUTOFCORE]); + setenv ("STARPU_LIMIT_CPU_MEM", maxMemoryAllowed, 1); + } + + /* Initialize MORSE with main parameters */ + if ( MORSE_Init( NCPU, NGPU ) != MORSE_SUCCESS ) { + fprintf(stderr, "Error initializing MORSE library\n"); + return EXIT_FAILURE; + } + MORSE_Set(MORSE_TILE_SIZE, NB); + + /* limit ram memory */ + if (iparam[IPARAM_OUTOFCORE] > 0) { + int new_dd = starpu_disk_register (&starpu_disk_unistd_o_direct_ops, + (void*) "/tmp/starpu_ooc/", 1024*1024*10); + } + + MORSE_Desc_Create_User(&descA, NULL, MorseRealDouble, + NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1, + morse_getaddr_null, + morse_getblkldd_ccrb, + morse_getrankof_2d); + MORSE_Desc_Create(&descB, NULL, MorseRealDouble, + NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1); + MORSE_Desc_Create(&descX, NULL, MorseRealDouble, + NB, NB, NB*NB, N, NRHS, 0, 0, N, NRHS, 1, 1); + MORSE_Desc_Create(&descAC, NULL, MorseRealDouble, + NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1); + + /* generate A matrix with random values such that it is spd */ + MORSE_dplgsy_Tile( (double)N, descA, 51 ); + + /* generate RHS */ + MORSE_dplrnt_Tile( descB, 5673 ); + + /* copy A before facto. in order to check the result */ + MORSE_dlacpy_Tile(MorseUpperLower, descA, descAC); + + /* copy B in X before solving + * same sense as memcpy(X, B, N*NRHS*sizeof(double)) but for descriptors */ + MORSE_dlacpy_Tile(MorseUpperLower, descB, descX); + + /************************************************************/ + /* solve the system AX = B using the Cholesky factorization */ + /************************************************************/ + + cpu_time = -cWtime(); + + /* Cholesky factorization: + * A is replaced by its factorization L or L^T depending on uplo */ + MORSE_dpotrf_Tile( UPLO, descA ); + + /* Solve: + * B is stored in X on entry, X contains the result on exit. + * Forward and back substitutions + */ + MORSE_dpotrs_Tile( UPLO, descA, descX ); + + cpu_time += cWtime(); + + /* print informations to user */ + gflops = flops / cpu_time; + printf( "%9.3f %9.2f\n", cpu_time, gflops); + fflush( stdout ); + + /************************************************************/ + /* check if solve is correct i.e. AX-B = 0 */ + /************************************************************/ + + /* compute norms to check the result */ + anorm = MORSE_dlange_Tile( MorseInfNorm, descAC); + bnorm = MORSE_dlange_Tile( MorseInfNorm, descB); + xnorm = MORSE_dlange_Tile( MorseInfNorm, descX); + + /* compute A*X-B, store the result in B */ + MORSE_dgemm_Tile( MorseNoTrans, MorseNoTrans, + 1.0, descAC, descX, -1.0, descB ); + res = MORSE_dlange_Tile( MorseInfNorm, descB ); + + /* check residual and print a message */ + eps = LAPACKE_dlamch_work( 'e' ); + + /* + * if hres = 0 then the test succeed + * else the test failed + */ + hres = 0; + hres = ( res / N / eps / (anorm * xnorm + bnorm ) > 100.0 ); + printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n"); + if (hres) + printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n", + res, anorm, xnorm, bnorm, + res / N / eps / (anorm * xnorm + bnorm )); + else + printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n", + res, anorm, xnorm, bnorm, + res / N / eps / (anorm * xnorm + bnorm )); + + /* free descriptors descA, descB, descX, descAC */ + MORSE_Desc_Destroy( &descA ); + MORSE_Desc_Destroy( &descB ); + MORSE_Desc_Destroy( &descX ); + MORSE_Desc_Destroy( &descAC ); + + /* Finalize MORSE */ + MORSE_Finalize(); + + return EXIT_SUCCESS; +} diff --git a/example/out_of_core/out_of_core.h b/example/out_of_core/out_of_core.h new file mode 100644 index 000000000..dcb9e9702 --- /dev/null +++ b/example/out_of_core/out_of_core.h @@ -0,0 +1,272 @@ +/** + * + * @copyright (c) 2009-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. + * All rights reserved. + * @copyright (c) 2012-2016 Inria. All rights reserved. + * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. + * + **/ + +/** + * + * @file out_of_core.h + * + * MORSE example routines + * MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI, + * University of Bordeaux, Bordeaux INP + * + * @version 1.0.0 + * @author Florent Pruvost + * @date 2016-08-23 + * + **/ + +#ifndef OOC_H +#define OOC_H + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#if defined( _WIN32 ) || defined( _WIN64 ) +#define int64_t __int64 +#endif + +/* Define these so that the Microsoft VC compiler stops complaining + about scanf and friends */ +#define _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_WARNINGS + +#if defined( _WIN32 ) || defined( _WIN64 ) +#include <windows.h> +#else /* Non-Windows */ +#include <unistd.h> +#include <sys/resource.h> +#endif + +#include <starpu.h> +#include "coreblas/include/lapacke.h" +#include "morse.h" +#include "control/common.h" + +/* Common functions for all steps of the tutorial */ + +static void get_thread_count(int *thrdnbr) { +#if defined WIN32 || defined WIN64 + sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr ); +#else + *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN); +#endif +} + +static int startswith(const char *s, const char *prefix) { + size_t n = strlen( prefix ); + if (strncmp( s, prefix, n )) + return 0; + return 1; +} + + +/* define complexity of algorithms - see Lawn 41 page 120 */ +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) +#define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +/* define some tools to time the program */ +#if defined( _WIN32 ) || defined( _WIN64 ) +#include <windows.h> +#include <time.h> +#include <sys/timeb.h> +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +struct timezone +{ + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +int gettimeofday(struct timeval* tv, struct timezone* tz) +{ + FILETIME ft; + unsigned __int64 tmpres = 0; + static int tzflag; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + + /*converting file time to unix epoch*/ + tmpres /= 10; /*convert into microseconds*/ + tmpres -= DELTA_EPOCH_IN_MICROSECS; + + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + return 0; +} + +#else /* Non-Windows */ +#include <sys/time.h> +#endif + +/* + * struct timeval {time_t tv_sec; suseconds_t tv_usec;}; + */ +double cWtime(void) +{ + struct timeval tp; + gettimeofday( &tp, NULL ); + return tp.tv_sec + 1e-6 * tp.tv_usec; +} + +/* Integer parameters */ +enum iparam_ooc { + IPARAM_THRDNBR, /* Number of cores */ + IPARAM_N, /* Number of columns of the matrix */ + IPARAM_NB, /* Number of columns in a tile */ + IPARAM_NRHS, /* Number of RHS */ + IPARAM_OUTOFCORE, /* if > 0 --> how many memory accepted incore */ + /* else --> do not use ooc. */ + /* End */ + IPARAM_SIZEOF +}; + +/* Specific routines */ + +/****************************************************************************** + * Initialize integer parameters + */ +static void init_iparam(int iparam[IPARAM_SIZEOF]){ + iparam[IPARAM_THRDNBR ] = -1; + iparam[IPARAM_N ] = 500; + iparam[IPARAM_NB ] = 128; + iparam[IPARAM_NRHS ] = 1; + iparam[IPARAM_OUTOFCORE ] = 2000; + } + +/****************************************************************************** + * Print how to use the program + */ +static void show_help(char *prog_name) { + printf( "Usage:\n%s [options]\n\n", prog_name ); + printf( "Options are:\n" + " --help Show this help\n" + "\n" + " --n=X dimension (N). (default: 500)\n" + " --nb=X NB size. (default: 128)\n" + " --nrhs=X number of RHS. (default: 1)\n" + "\n" + " --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n" + " --ooc=N Allow to store N MiB in main memory. (default: )\n" + "\n"); +} + +/****************************************************************************** + * Read arguments following ooc program call + */ +static void read_args(int argc, char *argv[], int *iparam){ + int i; + for (i = 1; i < argc && argv[i]; ++i) { + if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") || + startswith( argv[i], "--h") || startswith( argv[i], "-h") ) { + show_help( argv[0] ); + exit(0); + } else if (startswith( argv[i], "--n=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) ); + } else if (startswith( argv[i], "--nb=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) ); + } else if (startswith( argv[i], "--nrhs=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) ); + } else if (startswith( argv[i], "--threads=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) ); + } else if (startswith( argv[i], "--ooc=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_OUTOFCORE]) ); + } else { + fprintf( stderr, "Unknown option: %s\n", argv[i] ); + } + } +} + +/****************************************************************************** + * Print a header message to summarize main parameters + */ +static void print_header(char *prog_name, int * iparam) { +#if defined(CHAMELEON_SIMULATION) + double eps = 0.; +#else + double eps = LAPACKE_dlamch_work( 'e' ); +#endif + + printf( "#\n" + "# CHAMELEON %d.%d.%d, %s\n" + "# Nb threads: %d\n" + "# Nb gpus: %d\n" + "# N: %d\n" + "# NB: %d\n" + "# IB: %d\n" + "# eps: %e\n" + "# ooc: %d\n" + "#\n", + CHAMELEON_VERSION_MAJOR, + CHAMELEON_VERSION_MINOR, + CHAMELEON_VERSION_MICRO, + prog_name, + iparam[IPARAM_THRDNBR], + 0, + iparam[IPARAM_N], + iparam[IPARAM_NB], + 32, + eps, + iparam[IPARAM_OUTOFCORE]); + + printf( "# M N K/NRHS seconds Gflop/s\n"); + printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]); + fflush( stdout ); + return; +} + +// Checking if all block size is a multiple of 4096 Bytes +static int +will_o_direct_work(int nb) { + if ((nb * nb * sizeof(float)) % 4096 != 0) + return 0; + return 1; +} + +static void +print_o_direct_wont_work(void) { + fprintf(stderr, "\n[chameleon] Using out-of-core in o_direct force your blocks' size to be\n" + "multiples of 4096. Tip : chose 'n' and 'nb' as both multiples of 32.\n"); +} + +/****************************************************************************** + * Ffunction to return address of block (m,n) -> here NULL because memory is + * directly handled by StarPU + **/ +inline static void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n) +{ + return (void*)( NULL ); +} + +#endif /* OOC_H */ diff --git a/include/runtime.h b/include/runtime.h index 38b2ed213..6cd6c2a14 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -60,18 +60,18 @@ void RUNTIME_comm_size (int*); /******************************************************************************* * RUNTIME Descriptor **/ -void* RUNTIME_mat_alloc (size_t); -void RUNTIME_mat_free (void*, size_t); -void RUNTIME_desc_init (MORSE_desc_t*); -void RUNTIME_desc_create (MORSE_desc_t*); -void RUNTIME_desc_destroy (MORSE_desc_t*); -void RUNTIME_desc_submatrix (MORSE_desc_t*); -void* RUNTIME_desc_getaddr (MORSE_desc_t*, int, int); +void* RUNTIME_mat_alloc (size_t); +void RUNTIME_mat_free (void*, size_t); +void RUNTIME_desc_init (MORSE_desc_t*); +void RUNTIME_desc_create (MORSE_desc_t*); +void RUNTIME_desc_destroy (MORSE_desc_t*); +void RUNTIME_desc_submatrix (MORSE_desc_t*); +void* RUNTIME_desc_getaddr (MORSE_desc_t*, int, int); /* Acquire in main memory an up-to-date copy of the data described by the descriptor for read-write access. */ -int RUNTIME_desc_acquire (MORSE_desc_t*); +int RUNTIME_desc_acquire (MORSE_desc_t*); /* Release the data described by the descriptor to be used by the StarPU tasks again. */ -int RUNTIME_desc_release (MORSE_desc_t*); -int RUNTIME_desc_getoncpu (MORSE_desc_t*); +int RUNTIME_desc_release (MORSE_desc_t*); +int RUNTIME_desc_getoncpu (MORSE_desc_t*); /******************************************************************************* * RUNTIME Options diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c index b7926f2ae..313ceff09 100644 --- a/runtime/starpu/control/runtime_descriptor.c +++ b/runtime/starpu/control/runtime_descriptor.c @@ -56,7 +56,7 @@ void *RUNTIME_mat_alloc( size_t size) #else void *mat; - if (starpu_malloc_flags(&mat, size, STARPU_MALLOC_PINNED|FOLDED) != 0) + if (starpu_malloc_flags(&mat, size, STARPU_MALLOC_PINNED|FOLDED|STARPU_MALLOC_COUNT) != 0) return NULL; return mat; #endif @@ -67,7 +67,7 @@ void RUNTIME_mat_free( void *mat, size_t size) #if defined(CHAMELEON_SIMULATION) && !defined(STARPU_MALLOC_SIMULATION_FOLDED) && !defined(CHAMELEON_USE_MPI) return (void*) 1; #else - starpu_free_flags(mat, size, STARPU_MALLOC_PINNED|FOLDED); + starpu_free_flags(mat, size, STARPU_MALLOC_PINNED|FOLDED|STARPU_MALLOC_COUNT); #endif } @@ -297,9 +297,16 @@ void *RUNTIME_desc_getaddr( MORSE_desc_t *desc, int m, int n ) int tempnn = (n == desc->lnt-1) ? (desc->ln - n * desc->nb) : desc->nb; if ( myrank == owner ) { - starpu_matrix_data_register(ptrtile, 0, - (uintptr_t)desc->get_blkaddr(desc, m, n), - BLKLDD(desc, m), tempmm, tempnn, eltsze); + if ( desc->get_blkaddr(desc, m, n) == (void*)NULL ) { + starpu_matrix_data_register(ptrtile, -1, + (uintptr_t) NULL, + BLKLDD(desc, m), tempmm, tempnn, eltsze); + } + else { + starpu_matrix_data_register(ptrtile, 0, + (uintptr_t)desc->get_blkaddr(desc, m, n), + BLKLDD(desc, m), tempmm, tempnn, eltsze); + } } else { starpu_matrix_data_register(ptrtile, -1, -- GitLab