Commit e0034d2e authored by Mathieu Faverge's avatar Mathieu Faverge

Merge branch 'update-morse-cmake' into 'master'

Update CMake

See merge request solverstack/chameleon!101
parents 8595b23c c7598e6e
This diff is collapsed.
......@@ -110,9 +110,9 @@ MACRO(GENERATE_PKGCONFIG_FILE)
if(CHAMELEON_SCHED_STARPU)
list(APPEND CHAMELEON_PKGCONFIG_LIBS -lchameleon_starpu)
if ( CHAMELEON_USE_MPI )
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE libstarpumpi)
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE starpumpi-${STARPU_VERSION})
else()
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE libstarpu)
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED_PRIVATE starpu-${STARPU_VERSION})
endif()
elseif(CHAMELEON_SCHED_QUARK)
list(APPEND CHAMELEON_PKGCONFIG_LIBS -lchameleon_quark)
......@@ -131,7 +131,7 @@ MACRO(GENERATE_PKGCONFIG_FILE)
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "coreblas")
if(CHAMELEON_USE_CUDA)
list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${CUDA_LIBRARIES})
list(APPEND CUDABLAS_PKGCONFIG_LIBS_PRIVATE ${EXTRA_LIBRARIES_CUDA})
list(APPEND CHAMELEON_PKGCONFIG_REQUIRED "cudablas")
endif()
......
......@@ -4,7 +4,7 @@
#
# @copyright 2009-2014 The University of Tennessee and The University of
# Tennessee Research Foundation. All rights reserved.
# @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
......@@ -22,6 +22,7 @@
# @date 2014-11-10
#
###
set(dep_message "\nConfiguration of Chameleon:\n"
" BUILDNAME ...........: ${BUILDNAME}\n"
" SITE ................: ${SITE}\n"
......@@ -40,14 +41,16 @@ set(dep_message "${dep_message}"
" Build shared ........: ${BUILD_SHARED_LIBS}\n"
" CFlags ..............: ${CMAKE_C_FLAGS}\n"
" LDFlags .............: ${CMAKE_C_LINK_FLAGS}\n"
" EXE LDFlags .........: ${CMAKE_EXE_LINKER_FLAGS}\n"
"\n"
" Implementation paradigm\n"
" CUDA ................: ${CHAMELEON_USE_CUDA}\n"
" MPI .................: ${CHAMELEON_USE_MPI}\n"
"\n"
" Runtime specific\n"
" PARSEC ..............: ${CHAMELEON_SCHED_PARSEC}\n"
" QUARK ...............: ${CHAMELEON_SCHED_QUARK}\n"
" StarPU ..............: ${CHAMELEON_SCHED_STARPU}\n"
" STARPU ..............: ${CHAMELEON_SCHED_STARPU}\n"
"\n"
" Kernels specific\n"
" BLAS ................: ${BLAS_VENDOR_FOUND}\n"
......@@ -63,7 +66,7 @@ set(dep_message "${dep_message}"
" timing ...............: ${CHAMELEON_ENABLE_TIMING}\n"
"\n"
" CHAMELEON dependencies :\n")
foreach (_dep ${CHAMELEON_DEP})
foreach (_dep ${CHAMELEON_LIBRARIES_DEP})
set(dep_message "${dep_message}"
" ${_dep}\n")
endforeach ()
......
Subproject commit 3f67955ffafb59534a77f80e61776e7b8a850c32
Subproject commit 41874844df7b3327b5ea71e983c9d9471f31eeec
......@@ -294,21 +294,17 @@ set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};chameleon_sources" C
add_library(chameleon ${CHAMELEON_SRCS} ${CHAMELEON_SRCSF})
if(CHAMELEON_SCHED_STARPU)
target_link_libraries(chameleon chameleon_starpu)
list(INSERT CHAMELEON_DEP 0 -lchameleon_starpu)
elseif(CHAMELEON_SCHED_PARSEC)
target_link_libraries(chameleon chameleon_parsec)
list(INSERT CHAMELEON_DEP 0 -lchameleon_parsec)
elseif(CHAMELEON_SCHED_QUARK)
target_link_libraries(chameleon chameleon_quark)
list(INSERT CHAMELEON_DEP 0 -lchameleon_quark)
endif()
if (NOT CHAMELEON_SIMULATION)
add_dependencies(chameleon
coreblas_include)
# Depends on coreblas only for set_coreblas_gemm3m_enabled() (Maybe we should change that)
add_dependencies(chameleon coreblas_include)
target_link_libraries(chameleon coreblas)
endif()
target_link_libraries(chameleon hqr)
list(INSERT CHAMELEON_DEP 0 -lchameleon)
add_dependencies(chameleon
chameleon_include
......
......@@ -114,16 +114,13 @@ set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};coreblas_sources" CA
# Compile step
# ------------
add_library(coreblas ${COREBLAS_SRCS})
list(INSERT CHAMELEON_DEP 0 -lcoreblas)
add_dependencies(coreblas coreblas_include coreblas_sources)
set_property(TARGET coreblas PROPERTY LINKER_LANGUAGE Fortran)
set_property(TARGET coreblas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
target_link_libraries(coreblas
${LAPACKE_LIBRARIES}
${LAPACK_SEQ_LIBRARIES}
${CBLAS_LIBRARIES}
${BLAS_SEQ_LIBRARIES}
${LAPACKE_LIBRARIES_DEP}
${CBLAS_LIBRARIES_DEP}
)
# Installation
......
......@@ -33,6 +33,7 @@ if (EZTRACE_FOUND AND EZTRACE_DIR_FOUND)
find_path(EZTRACE_eztrace_create_plugin_DIR
NAMES eztrace_create_plugin
HINTS ${EZTRACE_DIR_FOUND}/bin)
mark_as_advanced(EZTRACE_eztrace_create_plugin_DIR)
if (EZTRACE_eztrace_create_plugin_DIR)
......
......@@ -102,7 +102,7 @@ set_property(TARGET cudablas PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/
target_link_libraries(cudablas
coreblas
${CUDA_LIBRARIES})
${EXTRA_LIBRARIES_CUDA})
# installation
# ------------
......
......@@ -33,6 +33,7 @@ if (EZTRACE_FOUND AND EZTRACE_DIR_FOUND)
find_path(EZTRACE_eztrace_create_plugin_DIR
NAMES eztrace_create_plugin
HINTS ${EZTRACE_DIR_FOUND}/bin)
mark_as_advanced(EZTRACE_eztrace_create_plugin_DIR)
if (EZTRACE_eztrace_create_plugin_DIR)
......
......@@ -24,8 +24,6 @@ if (CHAMELEON_SIMULATION)
message(ERROR "example directory should not be included when simulation is enabled")
endif()
add_subdirectory(basic_zposv)
if (CHAMELEON_PREC_D)
add_subdirectory(lapack_to_morse)
if (CHAMELEON_SCHED_STARPU)
......
###
#
# @file CMakeLists.txt
#
# @copyright 2009-2014 The University of Tennessee and The University of
# Tennessee Research Foundation. All rights reserved.
# @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
# Univ. Bordeaux. All rights reserved.
#
###
#
# MORSE example routines
# MORSE is a software package provided by Inria Bordeaux - Sud-Ouest, LaBRI,
# University of Bordeaux, Bordeaux INP
#
# @version 1.0.0
# @author Florent Pruvost
# @date 2014-10-13
#
###
set(EXAMPLES_HDR
basic_posv.h
posv_morse_functions.h
posv_users_functions.h
)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# Generate the morse testing sources for all possible precisions
# --------------------------------------------------------------
set(EXAMPLES "")
set(ZSRC
zposv_morse_functions.c
zposv_users_functions.c
)
precisions_rules_py(EXAMPLES "${ZSRC}"
PRECISIONS "${CHAMELEON_PRECISION}")
# Define what libraries we have to link with
# ------------------------------------------
unset(libs_for_examples)
list(APPEND libs_for_examples
chameleon
)
if(CHAMELEON_SCHED_STARPU)
list(APPEND libs_for_examples
chameleon_starpu
${STARPU_LIBRARIES_DEP}
)
link_directories(${STARPU_LIBRARY_DIRS})
elseif(CHAMELEON_SCHED_PARSEC)
list(APPEND libs_for_examples
chameleon_parsec
${PARSEC_LIBRARIES_DEP}
)
link_directories(${PARSEC_LIBRARY_DIRS_DEP})
elseif(CHAMELEON_SCHED_QUARK)
list(APPEND libs_for_examples
chameleon_quark
${QUARK_LIBRARIES}
)
link_directories(${QUARK_LIBRARY_DIRS})
endif()
if(CHAMELEON_USE_CUDA)
list(APPEND libs_for_examples
cudablas)
endif()
if(CHAMELEON_USE_CUDA)
link_directories(${CUDA_LIBRARY_DIRS})
endif()
list(APPEND libs_for_examples
coreblas
${LAPACKE_LIBRARIES}
${CBLAS_LIBRARIES}
${LAPACK_SEQ_LIBRARIES}
${BLAS_SEQ_LIBRARIES}
${EXTRA_LIBRARIES}
)
link_directories(${LAPACKE_LIBRARY_DIRS})
link_directories(${LAPACK_LIBRARY_DIRS})
link_directories(${CBLAS_LIBRARY_DIRS})
link_directories(${BLAS_LIBRARY_DIRS})
# message(STATUS "libs timings: ${libs_for_examples}")
foreach(_example ${EXAMPLES})
get_filename_component(_name_exe ${_example} NAME_WE)
add_executable(${_name_exe} ${_example})
set_property(TARGET ${_name_exe} PROPERTY LINKER_LANGUAGE Fortran)
target_link_libraries(${_name_exe} ${libs_for_examples})
install(TARGETS ${_name_exe}
DESTINATION bin/example/basic_zposv )
endforeach()
#-------- Tests ---------
include(CTestLists.cmake)
###
### END CMakeLists.txt
###
#
# Check Example basic_zposv
#
set(TESTLIST
posv_morse_functions
posv_users_functions
)
foreach(prec ${RP_CHAMELEON_PRECISIONS})
string(TOUPPER ${prec} PREC)
if (CHAMELEON_PREC_${PREC})
foreach(test ${TESTLIST})
add_test(example_basic_${prec}${test} ./${prec}${test})
endforeach()
endif()
endforeach()
\ No newline at end of file
/**
*
* @file basic_posv.h
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon basic_posv example header
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2014-10-13
*
*/
#ifndef BASIC_POSV_H
#define BASIC_POSV_H
#if defined( _WIN32 ) || defined( _WIN64 )
#define int64_t __int64
#endif
/* Define these so that the Microsoft VC compiler stops complaining
about scanf and friends */
#define _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_WARNINGS
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#if defined( _WIN32 ) || defined( _WIN64 )
#include <windows.h>
#else /* Non-Windows */
#include <unistd.h>
#include <sys/resource.h>
#endif
#include <coreblas/cblas.h>
#include <coreblas/lapacke.h>
#include <morse.h>
#include <coreblas.h>
#if defined(CHAMELEON_USE_MPI)
#include <mpi.h>
#endif
static void
show_help(char *prog_name) {
printf( "Usage:\n%s [options]\n\n", prog_name );
printf( "Options are:\n"
" --help Show this help\n"
"\n"
" --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n"
" --gpus=X Number of GPU workers (default: 0)\n"
"\n"
" --n=X dimension (N) (default: 500)\n"
" --nb=X Nb size. (default: 128)\n"
" --ib=X IB size. (default: 32)\n"
"\n");
}
static void get_thread_count(int *thrdnbr) {
#if defined WIN32 || defined WIN64
sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr );
#else
*thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
static int startswith(const char *s, const char *prefix) {
size_t n = strlen( prefix );
if (strncmp( s, prefix, n ))
return 0;
return 1;
}
enum iparam_examples {
IPARAM_THRDNBR, /* Number of cores */
IPARAM_THRDNBR_SUBGRP, /* Number of cores in a subgroup (NUMA node) */
IPARAM_SCHEDULER, /* What scheduler do we choose (dyn, stat) */
IPARAM_M, /* Number of rows of the matrix */
IPARAM_N, /* Number of columns of the matrix */
IPARAM_K, /* RHS or K */
IPARAM_LDA, /* Leading dimension of A */
IPARAM_LDB, /* Leading dimension of B */
IPARAM_LDC, /* Leading dimension of C */
IPARAM_IB, /* Inner-blocking size */
IPARAM_NB, /* Number of columns in a tile */
IPARAM_MB, /* Number of rows in a tile */
IPARAM_UPLO, /* Consider upper or lower part of sym. mat. */
IPARAM_NITER, /* Number of iteration of each test */
IPARAM_WARMUP, /* Run one test to load dynamic libraries */
IPARAM_CHECK, /* Checking activated or not */
IPARAM_VERBOSE, /* How much noise do we want? */
IPARAM_AUTOTUNING, /* Disable/enable autotuning */
IPARAM_INPUTFMT, /* Input format (Use only for getmi/gecfi) */
IPARAM_OUTPUTFMT, /* Output format (Use only for getmi/gecfi) */
IPARAM_TRACE, /* Generate trace on the first non warmup run */
IPARAM_DAG, /* Do we require to output the DOT file? */
IPARAM_ASYNC, /* Asynchronous calls */
IPARAM_MX, /* */
IPARAM_NX, /* */
IPARAM_RHBLK, /* Householder reduction parameter for QR/LQ */
IPARAM_INPLACE, /* InPlace/OutOfPlace translation mode */
IPARAM_INVERSE,
IPARAM_NCUDAS,
IPARAM_NMPI,
IPARAM_P, /* Parameter for 2D cyclic distribution */
IPARAM_Q, /* Parameter for 2D cyclic distribution */
/* Added for StarPU version */
IPARAM_PROFILE,
IPARAM_PRINT_ERRORS,
IPARAM_PARALLEL_TASKS,
IPARAM_NO_CPU,
IPARAM_BOUND,
/* End */
IPARAM_SIZEOF
};
enum dparam_examples {
IPARAM_TIME,
IPARAM_ANORM,
IPARAM_BNORM,
IPARAM_XNORM,
IPARAM_RNORM,
IPARAM_AinvNORM,
IPARAM_RES,
/* Begin section for hydra integration tool */
IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
/* End section for hydra integration tool */
IPARAM_DNBPARAM
};
static void init_iparam(int iparam[IPARAM_SIZEOF]){
iparam[IPARAM_THRDNBR ] = -1;
iparam[IPARAM_THRDNBR_SUBGRP] = 1;
iparam[IPARAM_SCHEDULER ] = 0;
iparam[IPARAM_M ] = -1;
iparam[IPARAM_N ] = 500;
iparam[IPARAM_K ] = 1;
iparam[IPARAM_LDA ] = -1;
iparam[IPARAM_LDB ] = -1;
iparam[IPARAM_LDC ] = -1;
iparam[IPARAM_MB ] = 128;
iparam[IPARAM_UPLO ] = MorseUpper;
iparam[IPARAM_NB ] = 128;
iparam[IPARAM_IB ] = 32;
iparam[IPARAM_NITER ] = 1;
iparam[IPARAM_WARMUP ] = 1;
iparam[IPARAM_CHECK ] = 0;
iparam[IPARAM_VERBOSE ] = 0;
iparam[IPARAM_AUTOTUNING ] = 0;
iparam[IPARAM_INPUTFMT ] = 0;
iparam[IPARAM_OUTPUTFMT ] = 0;
iparam[IPARAM_TRACE ] = 0;
iparam[IPARAM_DAG ] = 0;
iparam[IPARAM_ASYNC ] = 1;
iparam[IPARAM_MX ] = -1;
iparam[IPARAM_NX ] = -1;
iparam[IPARAM_RHBLK ] = 0;
iparam[IPARAM_MX ] = -1;
iparam[IPARAM_NX ] = -1;
iparam[IPARAM_RHBLK ] = 0;
iparam[IPARAM_INPLACE ] = MORSE_OUTOFPLACE;
iparam[IPARAM_INVERSE ] = 0;
iparam[IPARAM_NCUDAS ] = 0;
iparam[IPARAM_NMPI ] = 1;
iparam[IPARAM_P ] = 1;
iparam[IPARAM_Q ] = 1;
iparam[IPARAM_PROFILE ] = 0;
iparam[IPARAM_PRINT_ERRORS ] = 0;
iparam[IPARAM_PARALLEL_TASKS] = 0;
iparam[IPARAM_NO_CPU ] = 0;
iparam[IPARAM_BOUND ] = 0;
}
static void read_args(int argc, char *argv[], int *iparam){
int i;
for (i = 1; i < argc && argv[i]; ++i) {
if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") ||
startswith( argv[i], "--h") || startswith( argv[i], "-h") ) {
show_help( argv[0] );
exit(0);
} else if (startswith( argv[i], "--n=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) );
} else if (startswith( argv[i], "--nb=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) );
} else if (startswith( argv[i], "--threads=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) );
} else if (startswith( argv[i], "--gpus=" )) {
sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NCUDAS]) );
} else {
fprintf( stderr, "Unknown option: %s\n", argv[i] );
}
}
}
static void print_header(char *prog_name, int * iparam) {
const char *bound_header = iparam[IPARAM_BOUND] ? " thGflop/s" : "";
const char *check_header = iparam[IPARAM_CHECK] ? " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN" : "";
const char *inverse_header = iparam[IPARAM_INVERSE] ? " ||I-A*Ainv|| ||A|| ||Ainv|| ||Id - A*Ainv||/((||A|| ||Ainv||).N.eps)" : "";
#if defined(CHAMELEON_SIMULATION)
double eps = 0.;
#else
double eps = LAPACKE_dlamch_work( 'e' );
#endif
printf( "#\n"
"# CHAMELEON %d.%d.%d, %s\n"
"# Nb threads: %d\n"
"# Nb GPUs: %d\n"
"# NB: %d\n"
"# IB: %d\n"
"# eps: %e\n"
"#\n",
CHAMELEON_VERSION_MAJOR,
CHAMELEON_VERSION_MINOR,
CHAMELEON_VERSION_MICRO,
prog_name,
iparam[IPARAM_THRDNBR],
iparam[IPARAM_NCUDAS],
iparam[IPARAM_NB],
iparam[IPARAM_IB],
eps );
printf( "# M N K/NRHS seconds Gflop/s Deviation%s%s\n",
bound_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
printf( "# %5.0d %5.0d %5.0d\n", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_K]);
return;
}
#endif /* BASIC_POSV_H */
/**
*
* @file posv_morse_functions.h
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon posv_morse_functions example header
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2014-10-13
*
*/
#ifndef POSV_MORSE_FUNCTIONS_H
#define POSV_MORSE_FUNCTIONS_H
#include "basic_posv.h"
#endif /* POSV_MORSE_FUNCTIONS_H */
/**
*
* @file posv_users_functions.h
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon posv_users_functions example header
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2014-10-13
*
*/
#ifndef POSV_USERS_FUNCTIONS_H
#define POSV_USERS_FUNCTIONS_H
#include "basic_posv.h"
/**
* Function to return address of block (m,n)
*/
inline static void* user_getaddr_arrayofpointers(const MORSE_desc_t *A, int m, int n)
{
MORSE_Complex64_t **matA = (MORSE_Complex64_t **)A->mat;
size_t mm = m + A->i / A->mb;
size_t nn = n + A->j / A->nb;
size_t offset = 0;
#if defined(CHAMELEON_USE_MPI)
assert( A->myrank == A->get_rankof( A, mm, nn) );
mm = mm / A->p;
nn = nn / A->q;
#endif
offset = A->mt*nn + mm;
// printf ("Array address in %d %d: %p\n", mm, nn, *(matA+(offset)));
return (void*)( *(matA + offset) );
}
/**
* Function to return the leading dimension of element A(m,*)
*/
inline static int user_getblkldd_arrayofpointers(const MORSE_desc_t *A, int m)
{
(void)m;
return A->mb;
}
/**
* Function to return MPI rank of element A(m,n)
*/
inline static int user_getrankof_2d(const MORSE_desc_t *desc, int m, int n)
{
return (m % desc->p) * desc->q + (n % desc->q);
}
#endif /* POSV_USERS_FUNCTIONS_H */
/**
*
* @file zposv_morse_functions.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zposv_morse_functions example
*
* @version 1.0.0
* @author Florent Pruvost
* @date 2014-10-13
* @precisions normal z -> s d c
*
*/
#include "posv_morse_functions.h"
int main(int argc, char *argv[]) {
int N, NB, NCPU, NCUDA;
MORSE_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL;
/* initialize some parameters with default values */
int iparam[IPARAM_SIZEOF];
memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));
init_iparam(iparam);
iparam[IPARAM_UPLO] = MorseUpper;
/* read arguments */
read_args(argc, argv, iparam);
N = iparam[IPARAM_N];
NB = iparam[IPARAM_NB];
/* initialize the number of thread if not given by the user in argv*/
if ( iparam[IPARAM_THRDNBR] == -1 ) {
get_thread_count( &(iparam[IPARAM_THRDNBR]) );
iparam[IPARAM_THRDNBR] -= iparam[IPARAM_NCUDAS];
}
NCPU = iparam[IPARAM_THRDNBR];
NCUDA = iparam[IPARAM_NCUDAS];
printf("N, NB: %d %d\n", N, NB);
printf("NCPU, NCUDA: %d %d\n", NCPU, NCUDA);
/* initialize MORSE */
MORSE_Init( NCPU, NCUDA);
MORSE_Set(MORSE_TILE_SIZE, NB );
MORSE_Set(MORSE_INNER_BLOCK_SIZE, iparam[IPARAM_IB] );
print_header( argv[0], iparam);
/*
* initialize the structure required for MORSE sequential task-based algorithms
* MORSE_desc_t is a structure wrapping your data allowing MORSE to get
* pointers to tiles
*/
MORSE_Desc_Create(&descA, NULL, MorseComplexDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1);
/* generate A matrix with random values such that it is hermitian */
MORSE_zplghe_Tile( (double)N, MorseUpperLower, descA, 51 );
/* generate RHS */
MORSE_Desc_Create(&descB, NULL, MorseComplexDouble,
NB, NB, NB*NB, N, 1, 0, 0, N, 1, 1, 1);
MORSE_zplrnt_Tile( descB, 7672 );
/* copy A before facto. in order to check the result */
MORSE_Desc_Create(&descAC, NULL, MorseComplexDouble,
NB, NB, NB*NB, N, N, 0, 0, N, N, 1, 1);
MORSE_zlacpy_Tile(MorseUpperLower, descA, descAC);
/* copy B before solving in order to check the result */
MORSE_Desc_Create(&descX, NULL, MorseComplexDouble,
NB, NB, NB*NB, N, 1, 0, 0, N, 1, 1, 1);
MORSE_zlacpy_Tile(MorseUpperLower, descB, descX);
/* solve the system AX = B using the Cholesky factorization,
* A is replaced by its factorization L or L^T depending on uplo
* B is stored in X on entry, X contains the result on exit */
MORSE_zposv_Tile(iparam[IPARAM_UPLO], descA, descX);
/* note that this call is equivalent to
* MORSE_zpotrf_Tile(uplo, descA) followed by
* MORSE_zpotrs_Tile(uplo, descA, descX )
* */
/* check if solve is correct i.e. AX-B = 0 */
/* compute norms to check the result */
double anorm = MORSE_zlange_Tile(MorseInfNorm, descAC);
double bnorm = MORSE_zlange_Tile(MorseInfNorm, descB);
double xnorm = MORSE_zlange_Tile(MorseInfNorm, descX);
/* compute A*X-B, store the result in B */
MORSE_zgemm_Tile( MorseNoTrans, MorseNoTrans, 1.0, descAC, descX, -1.0, descB );
double res = MORSE_zlange_Tile(MorseInfNorm, descB);
/* check residual and print a message */
#if defined(CHAMELEON_SIMULATION)
double eps = 0.;
#else
double eps = LAPACKE_dlamch_work( 'e' );
#endif
/*
* if hres = 0 then the test succeed
* else the test failed
*/
int hres = 0;
hres = ( res / N / eps / (anorm * xnorm + bnorm ) > 100.0 );
printf( " ||Ax-b|| ||A|| ||x|| ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||) RETURN\n");
if (hres)
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e FAILURE \n",
res, anorm, xnorm, bnorm,
res / N / eps / (anorm * xnorm + bnorm ));
else
printf( "%8.5e %8.5e %8.5e %8.5e %8.5e SUCCESS \n",