diff --git a/example/link_chameleon/CMakeLists.txt b/example/link_chameleon/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..bbcea21645eed3c184188d79cbd3b4e47dfced45 --- /dev/null +++ b/example/link_chameleon/CMakeLists.txt @@ -0,0 +1,34 @@ +### Main CMakeLists.txt for project link_chameleon + +cmake_minimum_required(VERSION 2.8) +project(LINK_CHAMELEON Fortran C CXX) + +# Add extra cmake module path and initialize morse cmake modules +# -------------------------------------------------------------- +set( MORSE_DISTRIB_DIR "" CACHE PATH "Directory of MORSE distribution") + +if (MORSE_DISTRIB_DIR) + set( MORSE_CMAKE_MODULE_DIR "${MORSE_DISTRIB_DIR}/cmake_modules/morse" CACHE PATH + "Directory where to find MORSE CMake modules (cmake_modules/morse)") + list(APPEND CMAKE_MODULE_PATH "${MORSE_CMAKE_MODULE_DIR}") + list(APPEND CMAKE_MODULE_PATH "${MORSE_CMAKE_MODULE_DIR}/find") + include(MorseInit) + + # Detect CHAMELEON + #find_package(CHAMELEON REQUIRED COMPONENTS QUARK) + find_package(CHAMELEON REQUIRED) + link_directories(${CHAMELEON_LIBRARY_DIRS}) + include_directories(${CHAMELEON_INCLUDE_DIRS}) + + # link_chameleon exe + add_executable(link_chameleon link_chameleon.c) + target_link_libraries(link_chameleon ${CHAMELEON_LIBRARIES}) +else() + message(STATUS "MORSE_DISTRIB_DIR is not set") + message(STATUS "Please indicate where is located your MORSE distribution directory." + " This is necessary to find cmake_modules.") +endif() + +### +### END CMakeLists.txt +### diff --git a/example/link_chameleon/link_chameleon.c b/example/link_chameleon/link_chameleon.c new file mode 100644 index 0000000000000000000000000000000000000000..bfb45332b392a3a9c6ad90a3a6954b70db941807 --- /dev/null +++ b/example/link_chameleon/link_chameleon.c @@ -0,0 +1,353 @@ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#if defined( _WIN32 ) || defined( _WIN64 ) +#include <windows.h> +#else /* Non-Windows */ +#include <unistd.h> +#include <sys/resource.h> +#endif + + +/* Common functions for all steps of the tutorial */ + +static void get_thread_count(int *thrdnbr) { +#if defined WIN32 || defined WIN64 + sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr ); +#else + *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN); +#endif +} + +static int startswith(const char *s, const char *prefix) { + size_t n = strlen( prefix ); + if (strncmp( s, prefix, n )) + return 0; + return 1; +} + + +/* define complexity of algorithms - see Lawn 41 page 120 */ +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) +#define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +/* define some tools to time the program */ +#if defined( _WIN32 ) || defined( _WIN64 ) +#include <windows.h> +#include <time.h> +#include <sys/timeb.h> +#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 +#else +#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL +#endif + +struct timezone +{ + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +int gettimeofday(struct timeval* tv, struct timezone* tz) +{ + FILETIME ft; + unsigned __int64 tmpres = 0; + static int tzflag; + + if (NULL != tv) + { + GetSystemTimeAsFileTime(&ft); + tmpres |= ft.dwHighDateTime; + tmpres <<= 32; + tmpres |= ft.dwLowDateTime; + + /*converting file time to unix epoch*/ + tmpres /= 10; /*convert into microseconds*/ + tmpres -= DELTA_EPOCH_IN_MICROSECS; + + tv->tv_sec = (long)(tmpres / 1000000UL); + tv->tv_usec = (long)(tmpres % 1000000UL); + } + if (NULL != tz) + { + if (!tzflag) + { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + return 0; +} + +#else /* Non-Windows */ +#include <sys/time.h> +#endif + +/* + * struct timeval {time_t tv_sec; suseconds_t tv_usec;}; + */ +double cWtime(void) +{ + struct timeval tp; + gettimeofday( &tp, NULL ); + return tp.tv_sec + 1e-6 * tp.tv_usec; +} +#include <lapacke.h> +#include "morse.h" + +/* Integer parameters for step1 */ +enum iparam_step1 { + IPARAM_THRDNBR, /* Number of cores */ + IPARAM_N, /* Number of columns of the matrix */ + IPARAM_NRHS, /* Number of RHS */ + /* End */ + IPARAM_SIZEOF +}; + +/* Specific routines used in step1.c main program */ + +/****************************************************************************** + * Initialize integer parameters + */ +static void init_iparam(int iparam[IPARAM_SIZEOF]){ + iparam[IPARAM_THRDNBR ] = -1; + iparam[IPARAM_N ] = 500; + iparam[IPARAM_NRHS ] = 1; + } + +/****************************************************************************** + * Print how to use the program + */ +static void show_help(char *prog_name) { + printf( "Usage:\n%s [options]\n\n", prog_name ); + printf( "Options are:\n" + " --help Show this help\n" + "\n" + " --n=X dimension (N). (default: 500)\n" + " --nrhs=X number of RHS. (default: 1)\n" + "\n" + " --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n" + "\n"); +} + +/****************************************************************************** + * Read arguments following step1 program call + */ +static void read_args(int argc, char *argv[], int *iparam){ + int i; + for (i = 1; i < argc && argv[i]; ++i) { + if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") || + startswith( argv[i], "--h") || startswith( argv[i], "-h") ) { + show_help( argv[0] ); + exit(0); + } else if (startswith( argv[i], "--n=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) ); + } else if (startswith( argv[i], "--nrhs=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) ); + } else if (startswith( argv[i], "--threads=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) ); + } else { + fprintf( stderr, "Unknown option: %s\n", argv[i] ); + } + } +} + +/****************************************************************************** + * Print a header message to summarize main parameters + */ +static void print_header(char *prog_name, int * iparam) { +#if defined(MAGMAMORSE_SIMULATION) + double eps = 0.; +#else + double eps = LAPACKE_dlamch_work( 'e' ); +#endif + + printf( "#\n" + "# MORSE %d.%d.%d, %s\n" + "# Nb threads: %d\n" + "# Nb gpus: %d\n" + "# N: %d\n" + "# NB: %d\n" + "# IB: %d\n" + "# eps: %e\n" + "#\n", + CHAMELEON_VERSION_MAJOR, + CHAMELEON_VERSION_MINOR, + CHAMELEON_VERSION_MICRO, + prog_name, + iparam[IPARAM_THRDNBR], + 0, + iparam[IPARAM_N], + 128, + 32, + eps ); + + printf( "# M N K/NRHS seconds Gflop/s\n"); + printf( "#%7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS]); + fflush( stdout ); + return; +} + +/****************************************************************************** + * Macro to allocate a matrix as a 1D array + */ +#define PASTE_CODE_ALLOCATE_MATRIX(_name_, _type_, _m_, _n_) \ + _type_ *_name_ = NULL; \ + _name_ = (_type_*)malloc( (_m_) * (_n_) * sizeof(_type_) ); \ + if ( ! _name_ ) { \ + fprintf(stderr, "Out of Memory for %s\n", #_name_); \ + return -1; \ + } + +/* + * test external application link with magmamorse + */ +int main(int argc, char *argv[]) { + + size_t i, j; + size_t N; // matrix order + size_t NRHS; // number of RHS vectors + int NCPU; // number of cores to use + int NGPU; // number of gpus (cuda devices) to use + int UPLO = MorseUpper; // where is stored L + + /* declarations to time the program and evaluate performances */ + double fmuls, fadds, flops, gflops, cpu_time; + + /* variable to check the numerical results */ + double anorm, bnorm, xnorm, eps, res; + int hres; + + int major, minor, patch; + MORSE_Version(&major, &minor, &patch); + + /* initialize some parameters with default values */ + int iparam[IPARAM_SIZEOF]; + memset(iparam, 0, IPARAM_SIZEOF*sizeof(int)); + init_iparam(iparam); + + /* read arguments */ + read_args(argc, argv, iparam); + N = iparam[IPARAM_N]; + NRHS = iparam[IPARAM_NRHS]; + + /* compute the algorithm complexity to evaluate performances */ + fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) ); + fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) ); + flops = 1e-9 * (fmuls + fadds); + gflops = 0.0; + cpu_time = 0.0; + + /* initialize the number of thread if not given by the user in argv + * It makes sense only if this program is linked with pthread and + * multithreaded BLAS and LAPACK */ + if ( iparam[IPARAM_THRDNBR] == -1 ) { + get_thread_count( &(iparam[IPARAM_THRDNBR]) ); + } + NCPU = iparam[IPARAM_THRDNBR]; + NGPU = 0; + + /* print informations to user */ + print_header( argv[0], iparam); + + /* Initialize MORSE with main parameters */ + MORSE_Init( NCPU, NGPU ); + + /* + * allocate memory for our data using a C macro (see step1.h) + * - matrix A : size N x N + * - set of RHS vectors B : size N x NRHS + * - set of solutions vectors X : size N x NRHS + */ + PASTE_CODE_ALLOCATE_MATRIX( A, double, N, N ); + PASTE_CODE_ALLOCATE_MATRIX( B, double, N, NRHS ); + PASTE_CODE_ALLOCATE_MATRIX( X, double, N, NRHS ); + PASTE_CODE_ALLOCATE_MATRIX( Acpy, double, N, N ); + + /* generate A matrix with random values such that it is spd */ + MORSE_dplgsy( (double)N, N, A, N, 51 ); + + /* generate RHS */ + MORSE_dplrnt( N, NRHS, B, N, 5673 ); + + /* copy A before facto. in order to check the result */ + memcpy(Acpy, A, N*N*sizeof(double)); + + /* copy B in X before solving */ + memcpy(X, B, N*NRHS*sizeof(double)); + + /************************************************************/ + /* solve the system AX = B using the Cholesky factorization */ + /************************************************************/ + + cpu_time = -cWtime(); + + /* Cholesky facorization: + * A is replaced by its factorization L or L^T depending on uplo */ + MORSE_dpotrf( UPLO, N, A, N ); + + /* Solve: + * B is stored in X on entry, X contains the result on exit. + * Forward and back substitutions + */ + MORSE_dpotrs(UPLO, N, NRHS, A, N, X, N); + + cpu_time += cWtime(); + + /* print informations to user */ + gflops = flops / cpu_time; + printf( "%9.3f %9.2f\n", cpu_time, gflops); + fflush( stdout ); + + /************************************************************/ + /* check if solve is correct i.e. AX-B = 0 */ + /************************************************************/ + + /* compute norms to check the result */ + anorm = MORSE_dlange( MorseInfNorm, N, N, Acpy, N); + bnorm = MORSE_dlange( MorseInfNorm, N, NRHS, B, N); + xnorm = MORSE_dlange( MorseInfNorm, N, NRHS, X, N); + + /* compute A*X-B, store the result in B */ + MORSE_dgemm(MorseNoTrans, MorseNoTrans, + N, NRHS, N, 1.0, Acpy, N, X, N, -1.0, B, N); + res = MORSE_dlange( MorseInfNorm, N, NRHS, B, N); + + /* check residual and print a message */ + eps = LAPACKE_dlamch_work( 'e' ); + + /* + * if hres = 0 then the test succeed + * else the test failed + */ + hres = 0; + hres = ( res / N / eps / (anorm * xnorm + bnorm ) > 100.0 ); + printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n"); + if (hres) + printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n", + res, anorm, xnorm, bnorm, + res / N / eps / (anorm * xnorm + bnorm )); + else + printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n", + res, anorm, xnorm, bnorm, + res / N / eps / (anorm * xnorm + bnorm )); + + /* deallocate data */ + free(A); + free(Acpy); + free(B); + free(X); + + /* Finalize MORSE */ + MORSE_Finalize(); + + return EXIT_SUCCESS; +}