From 3559bd6d20a1e051df09afb7748c9d44d80e9d36 Mon Sep 17 00:00:00 2001 From: Lucas Barros de Assis <lucas.barros-de-assis@inria.fr> Date: Thu, 4 Jul 2019 13:40:39 +0200 Subject: [PATCH] New testing/timing structure --- CMakeLists.txt | 1 + cmake_modules/local_subs.py | 1 + include/chameleon/types.h | 2 + new-testing/CMakeLists.txt | 160 +++ new-testing/CTestLists.cmake | 78 ++ new-testing/flops.h | 355 ++++++ new-testing/input/geadd.in | 21 + new-testing/input/gelqf.in | 18 + new-testing/input/gelqf_hqr.in | 26 + new-testing/input/gelqs.in | 18 + new-testing/input/gels.in | 23 + new-testing/input/gels_hqr.in | 31 + new-testing/input/gemm.in | 27 + new-testing/input/geqrf.in | 19 + new-testing/input/geqrf_hqr.in | 26 + new-testing/input/geqrs.in | 18 + new-testing/input/gesv.in | 19 + new-testing/input/getrf.in | 16 + new-testing/input/getrs.in | 18 + new-testing/input/hemm.in | 27 + new-testing/input/her2k.in | 27 + new-testing/input/herk.in | 26 + new-testing/input/lacpy.in | 19 + new-testing/input/lange.in | 18 + new-testing/input/lanhe.in | 20 + new-testing/input/lansy.in | 20 + new-testing/input/lantr.in | 22 + new-testing/input/lascal.in | 18 + new-testing/input/lauum.in | 16 + new-testing/input/ongqr.in | 18 + new-testing/input/orglq.in | 20 + new-testing/input/orglq_hqr.in | 28 + new-testing/input/orgqr.in | 20 + new-testing/input/orgqr_hqr.in | 28 + new-testing/input/ormlq.in | 24 + new-testing/input/ormlq_hqr.in | 32 + new-testing/input/ormqr.in | 24 + new-testing/input/ormqr_hqr.in | 32 + new-testing/input/posv.in | 20 + new-testing/input/potrf.in | 16 + new-testing/input/potri.in | 16 + new-testing/input/potrs.in | 20 + new-testing/input/symm.in | 27 + new-testing/input/syr2k.in | 27 + new-testing/input/syrk.in | 25 + new-testing/input/sysv.in | 20 + new-testing/input/sytrf.in | 16 + new-testing/input/sytrs.in | 20 + new-testing/input/tradd.in | 23 + new-testing/input/trmm.in | 26 + new-testing/input/trsm.in | 27 + new-testing/input/trtri.in | 18 + new-testing/input/unglq.in | 20 + new-testing/input/unglq_hqr.in | 28 + new-testing/input/ungqr.in | 20 + new-testing/input/ungqr_hqr.in | 28 + new-testing/input/unmlq.in | 24 + new-testing/input/unmlq_hqr.in | 32 + new-testing/input/unmqr.in | 24 + new-testing/input/unmqr_hqr.in | 32 + new-testing/parameters.c | 373 ++++++ new-testing/run_list.c | 891 ++++++++++++++ new-testing/testing_zauxiliary.c | 524 +++++++++ new-testing/testing_zauxiliary.h | 110 ++ new-testing/testing_zcheck.c | 1856 ++++++++++++++++++++++++++++++ new-testing/testing_zcheck.h | 69 ++ new-testing/testing_zgeadd.c | 133 +++ new-testing/testing_zgelqf.c | 117 ++ new-testing/testing_zgelqf_hqr.c | 128 +++ new-testing/testing_zgelqs.c | 135 +++ new-testing/testing_zgels.c | 151 +++ new-testing/testing_zgels_hqr.c | 161 +++ new-testing/testing_zgemm.c | 137 +++ new-testing/testing_zgeqrf.c | 117 ++ new-testing/testing_zgeqrf_hqr.c | 128 +++ new-testing/testing_zgeqrs.c | 135 +++ new-testing/testing_zgesv.c | 120 ++ new-testing/testing_zgetrf.c | 98 ++ new-testing/testing_zgetrs.c | 110 ++ new-testing/testing_zhemm.c | 129 +++ new-testing/testing_zher2k.c | 131 +++ new-testing/testing_zherk.c | 126 ++ new-testing/testing_zlacpy.c | 129 +++ new-testing/testing_zlange.c | 120 ++ new-testing/testing_zlanhe.c | 121 ++ new-testing/testing_zlansy.c | 121 ++ new-testing/testing_zlantr.c | 121 ++ new-testing/testing_zlascal.c | 124 ++ new-testing/testing_zlauum.c | 102 ++ new-testing/testing_zposv.c | 121 ++ new-testing/testing_zpotrf.c | 96 ++ new-testing/testing_zpotri.c | 99 ++ new-testing/testing_zpotrs.c | 113 ++ new-testing/testing_zsymm.c | 129 +++ new-testing/testing_zsyr2k.c | 131 +++ new-testing/testing_zsyrk.c | 125 ++ new-testing/testing_zsysv.c | 121 ++ new-testing/testing_zsytrf.c | 96 ++ new-testing/testing_zsytrs.c | 113 ++ new-testing/testing_ztradd.c | 168 +++ new-testing/testing_ztrmm.c | 122 ++ new-testing/testing_ztrsm.c | 123 ++ new-testing/testing_ztrtri.c | 97 ++ new-testing/testing_zunglq.c | 116 ++ new-testing/testing_zunglq_hqr.c | 127 ++ new-testing/testing_zungqr.c | 116 ++ new-testing/testing_zungqr_hqr.c | 127 ++ new-testing/testing_zunmlq.c | 135 +++ new-testing/testing_zunmlq_hqr.c | 147 +++ new-testing/testing_zunmqr.c | 135 +++ new-testing/testing_zunmqr_hqr.c | 147 +++ new-testing/testings.h | 217 ++++ new-testing/values.c | 616 ++++++++++ testing/CMakeLists.txt | 2 +- timing/CMakeLists.txt | 2 +- tools/analysis.sh | 2 +- 116 files changed, 12082 insertions(+), 3 deletions(-) create mode 100644 new-testing/CMakeLists.txt create mode 100644 new-testing/CTestLists.cmake create mode 100644 new-testing/flops.h create mode 100644 new-testing/input/geadd.in create mode 100644 new-testing/input/gelqf.in create mode 100644 new-testing/input/gelqf_hqr.in create mode 100644 new-testing/input/gelqs.in create mode 100644 new-testing/input/gels.in create mode 100644 new-testing/input/gels_hqr.in create mode 100644 new-testing/input/gemm.in create mode 100644 new-testing/input/geqrf.in create mode 100644 new-testing/input/geqrf_hqr.in create mode 100644 new-testing/input/geqrs.in create mode 100644 new-testing/input/gesv.in create mode 100644 new-testing/input/getrf.in create mode 100644 new-testing/input/getrs.in create mode 100644 new-testing/input/hemm.in create mode 100644 new-testing/input/her2k.in create mode 100644 new-testing/input/herk.in create mode 100644 new-testing/input/lacpy.in create mode 100644 new-testing/input/lange.in create mode 100644 new-testing/input/lanhe.in create mode 100644 new-testing/input/lansy.in create mode 100644 new-testing/input/lantr.in create mode 100644 new-testing/input/lascal.in create mode 100644 new-testing/input/lauum.in create mode 100644 new-testing/input/ongqr.in create mode 100644 new-testing/input/orglq.in create mode 100644 new-testing/input/orglq_hqr.in create mode 100644 new-testing/input/orgqr.in create mode 100644 new-testing/input/orgqr_hqr.in create mode 100644 new-testing/input/ormlq.in create mode 100644 new-testing/input/ormlq_hqr.in create mode 100644 new-testing/input/ormqr.in create mode 100644 new-testing/input/ormqr_hqr.in create mode 100644 new-testing/input/posv.in create mode 100644 new-testing/input/potrf.in create mode 100644 new-testing/input/potri.in create mode 100644 new-testing/input/potrs.in create mode 100644 new-testing/input/symm.in create mode 100644 new-testing/input/syr2k.in create mode 100644 new-testing/input/syrk.in create mode 100644 new-testing/input/sysv.in create mode 100644 new-testing/input/sytrf.in create mode 100644 new-testing/input/sytrs.in create mode 100644 new-testing/input/tradd.in create mode 100644 new-testing/input/trmm.in create mode 100644 new-testing/input/trsm.in create mode 100644 new-testing/input/trtri.in create mode 100644 new-testing/input/unglq.in create mode 100644 new-testing/input/unglq_hqr.in create mode 100644 new-testing/input/ungqr.in create mode 100644 new-testing/input/ungqr_hqr.in create mode 100644 new-testing/input/unmlq.in create mode 100644 new-testing/input/unmlq_hqr.in create mode 100644 new-testing/input/unmqr.in create mode 100644 new-testing/input/unmqr_hqr.in create mode 100644 new-testing/parameters.c create mode 100644 new-testing/run_list.c create mode 100644 new-testing/testing_zauxiliary.c create mode 100644 new-testing/testing_zauxiliary.h create mode 100644 new-testing/testing_zcheck.c create mode 100644 new-testing/testing_zcheck.h create mode 100644 new-testing/testing_zgeadd.c create mode 100644 new-testing/testing_zgelqf.c create mode 100644 new-testing/testing_zgelqf_hqr.c create mode 100644 new-testing/testing_zgelqs.c create mode 100644 new-testing/testing_zgels.c create mode 100644 new-testing/testing_zgels_hqr.c create mode 100644 new-testing/testing_zgemm.c create mode 100644 new-testing/testing_zgeqrf.c create mode 100644 new-testing/testing_zgeqrf_hqr.c create mode 100644 new-testing/testing_zgeqrs.c create mode 100644 new-testing/testing_zgesv.c create mode 100644 new-testing/testing_zgetrf.c create mode 100644 new-testing/testing_zgetrs.c create mode 100644 new-testing/testing_zhemm.c create mode 100644 new-testing/testing_zher2k.c create mode 100644 new-testing/testing_zherk.c create mode 100644 new-testing/testing_zlacpy.c create mode 100644 new-testing/testing_zlange.c create mode 100644 new-testing/testing_zlanhe.c create mode 100644 new-testing/testing_zlansy.c create mode 100644 new-testing/testing_zlantr.c create mode 100644 new-testing/testing_zlascal.c create mode 100644 new-testing/testing_zlauum.c create mode 100644 new-testing/testing_zposv.c create mode 100644 new-testing/testing_zpotrf.c create mode 100644 new-testing/testing_zpotri.c create mode 100644 new-testing/testing_zpotrs.c create mode 100644 new-testing/testing_zsymm.c create mode 100644 new-testing/testing_zsyr2k.c create mode 100644 new-testing/testing_zsyrk.c create mode 100644 new-testing/testing_zsysv.c create mode 100644 new-testing/testing_zsytrf.c create mode 100644 new-testing/testing_zsytrs.c create mode 100644 new-testing/testing_ztradd.c create mode 100644 new-testing/testing_ztrmm.c create mode 100644 new-testing/testing_ztrsm.c create mode 100644 new-testing/testing_ztrtri.c create mode 100644 new-testing/testing_zunglq.c create mode 100644 new-testing/testing_zunglq_hqr.c create mode 100644 new-testing/testing_zungqr.c create mode 100644 new-testing/testing_zungqr_hqr.c create mode 100644 new-testing/testing_zunmlq.c create mode 100644 new-testing/testing_zunmlq_hqr.c create mode 100644 new-testing/testing_zunmqr.c create mode 100644 new-testing/testing_zunmqr_hqr.c create mode 100644 new-testing/testings.h create mode 100644 new-testing/values.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e2c41e4a..28887656a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1030,6 +1030,7 @@ endif(CHAMELEON_ENABLE_EXAMPLE AND NOT CHAMELEON_SIMULATION) # Testing executables if(CHAMELEON_ENABLE_TESTING AND NOT CHAMELEON_SIMULATION) add_subdirectory(testing) + add_subdirectory(new-testing) endif(CHAMELEON_ENABLE_TESTING AND NOT CHAMELEON_SIMULATION) # Timing executables diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index ac31d1e83..65ccc98e1 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -42,6 +42,7 @@ subs = { ('int', 'float', 'double', 'CHAMELEON_Complex32_t', r'\bCHAMELEON_Complex64_t'), ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamComplexFloat', r'\bChamComplexDouble' ), ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamRealFloat', r'\bChamRealDouble' ), + ('int', 'float', 'double', 'complex32', r'\bcomplex64' ), # ----- Additional BLAS ('', 'sTile', 'dTile', 'cTile', 'zTile' ), diff --git a/include/chameleon/types.h b/include/chameleon/types.h index f3c505843..633a87782 100644 --- a/include/chameleon/types.h +++ b/include/chameleon/types.h @@ -122,6 +122,8 @@ static inline int chameleon_min( int a, int b ) { if ( a < b ) return a; else return b; } +typedef double cham_fixdbl_t; + END_C_DECLS #endif /* _chameleon_types_h_ */ diff --git a/new-testing/CMakeLists.txt b/new-testing/CMakeLists.txt new file mode 100644 index 000000000..b228fe0f1 --- /dev/null +++ b/new-testing/CMakeLists.txt @@ -0,0 +1,160 @@ +### +# +# @file CMakeLists.txt +# +# @copyright 2009-2014 The University of Tennessee and The University of +# Tennessee Research Foundation. All rights reserved. +# @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, +# Univ. Bordeaux. All rights reserved. +# +### +# +# @project CHAMELEON +# CHAMELEON is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 0.9.2 +# @author Cedric Castagnede +# @author Emmanuel Agullo +# @author Mathieu Faverge +# @author Lucas Barros de Assis +# @date 2014-11-16 +# +### +if (CHAMELEON_SIMULATION) + message(ERROR "new-testing directory should not be included when simulation is enabled") +endif() + +# Generate chameleon auxiliary testing sources for all possible precisions +# -------------------------------------------------------------------- +set(NEWTESTING_HDRS_GENERATED "") +set(ZHDR + testing_zauxiliary.h + testing_zcheck.h + ) + +precisions_rules_py(NEWTESTING_HDRS_GENERATED "${ZHDR}" + PRECISIONS "s;d;c;z;ds;zc" ) + +add_custom_target(new-testing_include ALL SOURCES ${NEWTESTING_HDRS_GENERATED}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};new-testing_include" CACHE INTERNAL "List of targets of sources") + +# Generate chameleon testing sources for all possible precisions +# ---------------------------------------------------------- +set(ZSRC + testing_zauxiliary.c + testing_zcheck.c + ################## + # LAPACK + ################## + #testing_zlaset.c + testing_zlacpy.c + testing_zlange.c + testing_zlanhe.c + testing_zlansy.c + testing_zlantr.c + testing_zgeadd.c + testing_ztradd.c + testing_zlascal.c + testing_zgemm.c + testing_zhemm.c + testing_zherk.c + testing_zher2k.c + testing_zsymm.c + testing_zsyrk.c + testing_zsyr2k.c + testing_ztrmm.c + testing_ztrsm.c + testing_zpotrf.c + testing_zpotrs.c + testing_zposv.c + testing_ztrtri.c + testing_zlauum.c + testing_zpotri.c + testing_zsytrf.c + testing_zsytrs.c + testing_zsysv.c + testing_zgetrf.c + testing_zgetrs.c + testing_zgesv.c + testing_zgeqrf.c + testing_zungqr.c + testing_zunmqr.c + testing_zgelqf.c + testing_zunglq.c + testing_zunmlq.c + # testing_zgeqrs.c + # testing_zgelqs.c + testing_zgels.c + testing_zgeqrf_hqr.c + testing_zungqr_hqr.c + testing_zunmqr_hqr.c + testing_zgelqf_hqr.c + testing_zunglq_hqr.c + testing_zunmlq_hqr.c + # testing_zgeqrs_hqr.c + # testing_zgelqs_hqr.c + testing_zgels_hqr.c + ) + +# Add include and link directories +# -------------------------------- +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# Define what libraries we have to link with +# ------------------------------------------ +unset(libs_for_tests) +# testing executables depend on chameleon and cblas, lapacke, mpi (already chameleon's dependencies) +list(APPEND libs_for_tests chameleon) +# message(STATUS "libs testings: ${libs_for_tests}") + +# Create one testing per precision with all files +# ----------------------------------------------- +foreach(_precision ${CHAMELEON_PRECISION} ) + + precisions_rules_py(${_precision}SRC_GENERATED "${ZSRC}" + PRECISIONS "${_precision}" ) + + add_executable(${_precision}new-testing + ${${_precision}SRC_GENERATED} + values.c + run_list.c + parameters.c + ) + add_dependencies(${_precision}new-testing + chameleon_include + coreblas_include + control_include + new-testing_include + ) + set_property(TARGET ${_precision}new-testing PROPERTY LINKER_LANGUAGE Fortran) + target_link_libraries(${_precision}new-testing ${libs_for_tests}) + + install(TARGETS ${_precision}new-testing + DESTINATION bin/new-testing) + +endforeach() + +# Force generation of sources +# --------------------------- +set(NEWTESTING_SRCS) +foreach(_precision ${CHAMELEON_PRECISION}) + list(APPEND NEWTESTING_SRCS ${${_precision}SRC_GENERATED}) +endforeach() +add_custom_target(new-testing_sources ALL SOURCES ${NEWTESTING_SRCS}) +set(CHAMELEON_SOURCES_TARGETS "${CHAMELEON_SOURCES_TARGETS};new-testing_sources" CACHE INTERNAL "List of targets of sources") + +#-------- Tests --------- +include(CTestLists.cmake) + +# copy input files +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/input DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + +### +### END CMakeLists.txt +### diff --git a/new-testing/CTestLists.cmake b/new-testing/CTestLists.cmake new file mode 100644 index 000000000..e8bc3ee26 --- /dev/null +++ b/new-testing/CTestLists.cmake @@ -0,0 +1,78 @@ +# +# Check testing/ +# +set(NP 2) # Amount of MPI processes +set(THREADS 2) # Amount of threads +set(N_GPUS 0) # Amount of graphic cards +set(TEST_CATEGORIES shm) +if (CHAMELEON_USE_MPI AND MPI_C_FOUND) + set( TEST_CATEGORIES ${TEST_CATEGORIES} mpi ) +endif() +if (CHAMELEON_USE_CUDA AND CUDA_FOUND) + set(N_GPUS 0 1) +endif() + +foreach(prec ${RP_CHAMELEON_PRECISIONS}) + set (CMD ./${prec}new-testing) + + # + # Create the list of test based on precision and runtime + # + set( TESTS lacpy lange lantr lansy ) + if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) + set( TESTS ${TESTS} lanhe ) + endif() + set( TESTS ${TESTS} + geadd tradd lascal + gemm symm syrk syr2k trmm trsm ) + if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) + set( TESTS ${TESTS} + hemm herk her2k ) + endif() + set( TESTS ${TESTS} + potrf potrs posv trtri lauum ) + if ( NOT CHAMELEON_SCHED_PARSEC ) + set( TESTS ${TESTS} potri ) + endif() + if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) + set( TESTS ${TESTS} + sytrf sytrs sysv ) + endif() + set( TESTS ${TESTS} + getrf getrs gesv + geqrf gelqf + geqrf_hqr gelqf_hqr) + if ( ${prec} STREQUAL c OR ${prec} STREQUAL z ) + set( TESTS ${TESTS} + ungqr unglq unmqr unmlq + ungqr_hqr unglq_hqr unmqr_hqr unmlq_hqr) + else() + set( TESTS ${TESTS} + orgqr orglq ormqr ormlq + orgqr_hqr orglq_hqr ormqr_hqr ormlq_hqr) + endif() + set( TESTS ${TESTS} + #geqrs gelqs + #geqrs_hqr gelqs_hqr + gels + gels_hqr ) + + foreach(cat ${TEST_CATEGORIES}) + foreach(gpus ${N_GPUS}) + + if (${gpus} EQUAL 1) + set(cat ${cat}_gpu) + endif() + + if (${cat} STREQUAL "mpi") + set (PREFIX mpiexec --bind-to none -n ${NP}) + else() + set (PREFIX "") + endif() + + foreach(_test ${TESTS}) + add_test(test_${cat}_${prec}${_test} ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -o ${_test} -f input/${_test}.in ) + endforeach() + endforeach() + endforeach() +endforeach() diff --git a/new-testing/flops.h b/new-testing/flops.h new file mode 100644 index 000000000..ce0d8c48f --- /dev/null +++ b/new-testing/flops.h @@ -0,0 +1,355 @@ +/** + * + * @file flops.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * File provided by Univ. of Tennessee, + * + * @version 0.9.2 + * @author Mathieu Faverge + * @author Cedric Castagnede + * @date 2014-11-16 + * + */ +/* + * This file provide the flops formula for all Level 3 BLAS and some + * Lapack routines. Each macro uses the same size parameters as the + * function associated and provide one formula for additions and one + * for multiplications. Example to use these macros: + * + * FLOPS_ZGEMM( m, n, k ) + * + * All the formula are reported in the LAPACK Lawn 41: + * http://www.netlib.org/lapack/lawns/lawn41.ps + */ +#ifndef _flops_h_ +#define _flops_h_ + +/** + * Generic formula coming from LAWN 41 + */ + +/* + * Level 2 BLAS + */ +#define FMULS_GEMV(__m, __n) ((double)(__m) * (double)(__n) + 2. * (double)(__m)) +#define FADDS_GEMV(__m, __n) ((double)(__m) * (double)(__n) ) + +#define FMULS_SYMV(__n) FMULS_GEMV( (__n), (__n) ) +#define FADDS_SYMV(__n) FADDS_GEMV( (__n), (__n) ) +#define FMULS_HEMV FMULS_SYMV +#define FADDS_HEMV FADDS_SYMV + +/* + * Level 3 BLAS + */ +#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) +#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k)) + +#define FMULS_SYMM(__side, __m, __n) ( ( (__side) == ChamLeft ) ? FMULS_GEMM((__m), (__m), (__n)) : FMULS_GEMM((__m), (__n), (__n)) ) +#define FADDS_SYMM(__side, __m, __n) ( ( (__side) == ChamLeft ) ? FADDS_GEMM((__m), (__m), (__n)) : FADDS_GEMM((__m), (__n), (__n)) ) +#define FMULS_HEMM FMULS_SYMM +#define FADDS_HEMM FADDS_SYMM + +#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.)) +#define FMULS_HERK FMULS_SYRK +#define FADDS_HERK FADDS_SYRK + +#define FMULS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) ) +#define FADDS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) + (double)(__n)) +#define FMULS_HER2K FMULS_SYR2K +#define FADDS_HER2K FADDS_SYR2K + +#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + + +#define FMULS_TRMM(__side, __m, __n) ( ( (__side) == ChamLeft ) ? FMULS_TRMM_2((__m), (__n)) : FMULS_TRMM_2((__n), (__m)) ) +#define FADDS_TRMM(__side, __m, __n) ( ( (__side) == ChamLeft ) ? FADDS_TRMM_2((__m), (__n)) : FADDS_TRMM_2((__n), (__m)) ) + +#define FMULS_TRSM FMULS_TRMM +#define FADDS_TRSM FMULS_TRMM + +/* + * Lapack + */ +#define FMULS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) - 1. ) + (double)(__n)) + (2. / 3.) * (__m)) \ + : (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) - 1. ) + (double)(__m)) + (2. / 3.) * (__n)) ) +#define FADDS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) ) - (double)(__n)) + (1. / 6.) * (__m)) \ + : (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) ) - (double)(__m)) + (1. / 6.) * (__n)) ) + +#define FMULS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) + 0.5)) ) +#define FADDS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) - 1.5)) ) + +#define FMULS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * (double)(__n) ) +#define FADDS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. )) + +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) + +#define FMULS_POTRI(__n) ( (double)(__n) * ((2. / 3.) + (double)(__n) * ((1. / 3.) * (double)(__n) + 1. )) ) +#define FADDS_POTRI(__n) ( (double)(__n) * ((1. / 6.) + (double)(__n) * ((1. / 3.) * (double)(__n) - 0.5)) ) + +#define FMULS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) + 1. )) +#define FADDS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. )) + +//SPBTRF +//SPBTRS + +#define FMULS_SYTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_SYTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) + +//SSYTRI +//SSYTRS + +#define FMULS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 23. / 6.)) \ + : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 23. / 6.)) ) +#define FADDS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + 5. / 6.)) \ + : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + (double)(__n) + 5. / 6.)) ) + +#define FMULS_GEQLF(__m, __n) FMULS_GEQRF(__m, __n) +#define FADDS_GEQLF(__m, __n) FADDS_GEQRF(__m, __n) + +#define FMULS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 29. / 6.)) \ + : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 29. / 6.)) ) +#define FADDS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( -0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 5. / 6.)) \ + : ((double)(__m) * ((double)(__m) * ( 0.5-(1./3.) * (double)(__m) + (double)(__n)) + + 5. / 6.)) ) + +#define FMULS_GELQF(__m, __n) FMULS_GERQF(__m, __n) +#define FADDS_GELQF(__m, __n) FADDS_GERQF(__m, __n) + +#define FMULS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + 2. * (double)(__n) - 5./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.))) +#define FADDS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__n) - (double)(__m) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) ))) +#define FMULS_UNGQL FMULS_UNGQR +#define FMULS_ORGQR FMULS_UNGQR +#define FMULS_ORGQL FMULS_UNGQR +#define FADDS_UNGQL FADDS_UNGQR +#define FADDS_ORGQR FADDS_UNGQR +#define FADDS_ORGQL FADDS_UNGQR + +#define FMULS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) + (double)(__n) - 2./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.))) +#define FADDS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) - (double)(__n) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) ))) +#define FMULS_UNGLQ FMULS_UNGRQ +#define FMULS_ORGRQ FMULS_UNGRQ +#define FMULS_ORGLQ FMULS_UNGRQ +#define FADDS_UNGLQ FADDS_UNGRQ +#define FADDS_ORGRQ FADDS_UNGRQ +#define FADDS_ORGLQ FADDS_UNGRQ + +#define FMULS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 2.5))) +#define FADDS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 0.5))) + +#define FMULS_UNMQR(__side, __m, __n, __k) ( ((__side) == ChamLeft ) ? ((double)(__k) * (double)(__n) * ( 2.* (double)(__m) - (double)(__k) + 2.)) \ + : ((double)(__k) * ((double)(__m) * ( 2.* (double)(__n) - (double)(__k) + 1.) + (double)(__n) - .5 * (double)(__k) + .5)) ) +#define FADDS_UNMQR(__side, __m, __n, __k) ( ((__side) == ChamLeft ) ? ((double)(__k) * (double)(__n) * ( 2.* (double)(__m) - (double)(__k) + 1.)) \ + : ((double)(__k) * (double)(__m) * ( 2.* (double)(__n) - (double)(__k) + 1.)) ) + +#define FMULS_UNMLQ FMULS_UNMQR +#define FADDS_UNMLQ FADDS_UNMQR + +//UNMQR, UNMLQ, UNMQL, UNMRQ (Left) +//UNMQR, UNMLQ, UNMQL, UNMRQ (Right) + +#define FMULS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) + 0.5 ) + 1./3.)) +#define FADDS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) - 0.5 ) + 1./3.)) + +#define FMULS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) + 0.5) - 7./6.) - 13. ) +#define FADDS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) - 1. ) - 2./3.) - 8. ) + +#define FMULS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 2.5 ) - 1./6. ) - 15.) +#define FADDS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 1. ) - 8./3. ) - 4.) +#define FMULS_HETRD FMULS_SYTRD +#define FADDS_HETRD FADDS_SYTRD + +#define FMULS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 2. ) + 20./3.)) \ + : ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 2. ) + 20./3.)) ) +#define FADDS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 1. ) - (double)(__m) + 5./3.)) \ + : ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 1. ) - (double)(__n) + 5./3.)) ) + + +/** + * Users functions + */ +/* + * Level 2 BLAS + */ +static inline double flops_zgemv( double __m, double __n) { double flops = (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) ); return flops; } +static inline double flops_cgemv( double __m, double __n) { double flops = (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) ); return flops; } +static inline double flops_dgemv( double __m, double __n) { double flops = ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) ); return flops; } +static inline double flops_sgemv( double __m, double __n) { double flops = ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) ); return flops; } + +static inline double flops_zhemv( double __n) { double flops = (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) ); return flops; } +static inline double flops_chemv( double __n) { double flops = (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) ); return flops; } + +static inline double flops_zsymv( double __n) { double flops = (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) ); return flops; } +static inline double flops_csymv( double __n) { double flops = (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) ); return flops; } +static inline double flops_dsymv( double __n) { double flops = ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) ); return flops; } +static inline double flops_ssymv( double __n) { double flops = ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) ); return flops; } + +/* + * Level 3 BLAS + */ +static inline double flops_zgemm( double __m, double __n, double __k) { double flops = (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) ); return flops; } +static inline double flops_cgemm( double __m, double __n, double __k) { double flops = (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) ); return flops; } +static inline double flops_dgemm( double __m, double __n, double __k) { double flops = ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) ); return flops; } +static inline double flops_sgemm( double __m, double __n, double __k) { double flops = ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) ); return flops; } + +static inline double flops_zhemm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_chemm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) ); return flops; } + +static inline double flops_zsymm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_csymm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_dsymm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_ssymm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) ); return flops; } + +static inline double flops_zherk( double __k, double __n) { double flops = (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) ); return flops; } +static inline double flops_cherk( double __k, double __n) { double flops = (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) ); return flops; } + +static inline double flops_zsyrk( double __k, double __n) { double flops = (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) ); return flops; } +static inline double flops_csyrk( double __k, double __n) { double flops = (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) ); return flops; } +static inline double flops_dsyrk( double __k, double __n) { double flops = ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) ); return flops; } +static inline double flops_ssyrk( double __k, double __n) { double flops = ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) ); return flops; } + +static inline double flops_zher2k( double __k, double __n) { double flops = (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) ); return flops; } +static inline double flops_cher2k( double __k, double __n) { double flops = (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) ); return flops; } + +static inline double flops_zsyr2k( double __k, double __n) { double flops = (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) ); return flops; } +static inline double flops_csyr2k( double __k, double __n) { double flops = (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) ); return flops; } +static inline double flops_dsyr2k( double __k, double __n) { double flops = ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) ); return flops; } +static inline double flops_ssyr2k( double __k, double __n) { double flops = ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) ); return flops; } + +static inline double flops_ztrmm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_ctrmm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_dtrmm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) ); return flops; } +static inline double flops_strmm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) ); return flops; } + +static inline double flops_ztrsm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) ); return flops; } +static inline double flops_ctrsm( cham_side_t __side, double __m, double __n) { double flops = (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) ); return flops; } +static inline double flops_dtrsm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) ); return flops; } +static inline double flops_strsm( cham_side_t __side, double __m, double __n) { double flops = ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) ); return flops; } + +/* + * Lapack + */ +static inline double flops_zgetrf( double __m, double __n) { double flops = (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) ); return flops; } +static inline double flops_cgetrf( double __m, double __n) { double flops = (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) ); return flops; } +static inline double flops_dgetrf( double __m, double __n) { double flops = ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) ); return flops; } +static inline double flops_sgetrf( double __m, double __n) { double flops = ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) ); return flops; } + +static inline double flops_zgetri( double __n) { double flops = (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) ); return flops; } +static inline double flops_cgetri( double __n) { double flops = (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) ); return flops; } +static inline double flops_dgetri( double __n) { double flops = ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) ); return flops; } +static inline double flops_sgetri( double __n) { double flops = ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) ); return flops; } + +static inline double flops_zgetrs( double __n, double __nrhs) { double flops = (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) ); return flops; } +static inline double flops_cgetrs( double __n, double __nrhs) { double flops = (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) ); return flops; } +static inline double flops_dgetrs( double __n, double __nrhs) { double flops = ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) ); return flops; } +static inline double flops_sgetrs( double __n, double __nrhs) { double flops = ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) ); return flops; } + +static inline double flops_zpotrf( double __n) { double flops = (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) ); return flops; } +static inline double flops_cpotrf( double __n) { double flops = (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) ); return flops; } +static inline double flops_dpotrf( double __n) { double flops = ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) ); return flops; } +static inline double flops_spotrf( double __n) { double flops = ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) ); return flops; } + +static inline double flops_zpotri( double __n) { double flops = (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) ); return flops; } +static inline double flops_cpotri( double __n) { double flops = (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) ); return flops; } +static inline double flops_dpotri( double __n) { double flops = ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) ); return flops; } +static inline double flops_spotri( double __n) { double flops = ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) ); return flops; } + +static inline double flops_zpotrs( double __n, double __nrhs) { double flops = (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) ); return flops; } +static inline double flops_cpotrs( double __n, double __nrhs) { double flops = (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) ); return flops; } +static inline double flops_dpotrs( double __n, double __nrhs) { double flops = ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) ); return flops; } +static inline double flops_spotrs( double __n, double __nrhs) { double flops = ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) ); return flops; } + +static inline double flops_zgeqrf( double __m, double __n) { double flops = (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) ); return flops; } +static inline double flops_cgeqrf( double __m, double __n) { double flops = (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) ); return flops; } +static inline double flops_dgeqrf( double __m, double __n) { double flops = ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) ); return flops; } +static inline double flops_sgeqrf( double __m, double __n) { double flops = ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) ); return flops; } + +static inline double flops_zgeqlf( double __m, double __n) { double flops = (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) ); return flops; } +static inline double flops_cgeqlf( double __m, double __n) { double flops = (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) ); return flops; } +static inline double flops_dgeqlf( double __m, double __n) { double flops = ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) ); return flops; } +static inline double flops_sgeqlf( double __m, double __n) { double flops = ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) ); return flops; } + +static inline double flops_zgerqf( double __m, double __n) { double flops = (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) ); return flops; } +static inline double flops_cgerqf( double __m, double __n) { double flops = (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) ); return flops; } +static inline double flops_dgerqf( double __m, double __n) { double flops = ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) ); return flops; } +static inline double flops_sgerqf( double __m, double __n) { double flops = ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) ); return flops; } + +static inline double flops_zgelqf( double __m, double __n) { double flops = (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) ); return flops; } +static inline double flops_cgelqf( double __m, double __n) { double flops = (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) ); return flops; } +static inline double flops_dgelqf( double __m, double __n) { double flops = ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) ); return flops; } +static inline double flops_sgelqf( double __m, double __n) { double flops = ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) ); return flops; } + +static inline double flops_zungqr( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) ); return flops; } +static inline double flops_cungqr( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) ); return flops; } +static inline double flops_dorgqr( double __m, double __n, double __k) { double flops = ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) ); return flops; } +static inline double flops_sorgqr( double __m, double __n, double __k) { double flops = ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) ); return flops; } + +static inline double flops_zungql( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) ); return flops; } +static inline double flops_cungql( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) ); return flops; } +static inline double flops_dorgql( double __m, double __n, double __k) { double flops = ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) ); return flops; } +static inline double flops_sorgql( double __m, double __n, double __k) { double flops = ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) ); return flops; } + +static inline double flops_zungrq( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_cungrq( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_dorgrq( double __m, double __n, double __k) { double flops = ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_sorgrq( double __m, double __n, double __k) { double flops = ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) ); return flops; } + +static inline double flops_zunglq( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_cunglq( double __m, double __n, double __k) { double flops = (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_dorglq( double __m, double __n, double __k) { double flops = ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) ); return flops; } +static inline double flops_sorglq( double __m, double __n, double __k) { double flops = ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) ); return flops; } + +static inline double flops_zunmqr( cham_side_t side, double __m, double __n, double __k) { double flops = (6. * FMULS_UNMQR(side, (__m), (__n), (__k)) + 2.0 * FADDS_UNMQR(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_cunmqr( cham_side_t side, double __m, double __n, double __k) { double flops = (6. * FMULS_UNMQR(side, (__m), (__n), (__k)) + 2.0 * FADDS_UNMQR(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_dormqr( cham_side_t side, double __m, double __n, double __k) { double flops = ( FMULS_UNMQR(side, (__m), (__n), (__k)) + FADDS_UNMQR(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_sormqr( cham_side_t side, double __m, double __n, double __k) { double flops = ( FMULS_UNMQR(side, (__m), (__n), (__k)) + FADDS_UNMQR(side, (__m), (__n), (__k)) ); return flops; } + +static inline double flops_zunmlq( cham_side_t side, double __m, double __n, double __k) { double flops = (6. * FMULS_UNMLQ(side, (__m), (__n), (__k)) + 2.0 * FADDS_UNMLQ(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_cunmlq( cham_side_t side, double __m, double __n, double __k) { double flops = (6. * FMULS_UNMLQ(side, (__m), (__n), (__k)) + 2.0 * FADDS_UNMLQ(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_dormlq( cham_side_t side, double __m, double __n, double __k) { double flops = ( FMULS_UNMLQ(side, (__m), (__n), (__k)) + FADDS_UNMLQ(side, (__m), (__n), (__k)) ); return flops; } +static inline double flops_sormlq( cham_side_t side, double __m, double __n, double __k) { double flops = ( FMULS_UNMLQ(side, (__m), (__n), (__k)) + FADDS_UNMLQ(side, (__m), (__n), (__k)) ); return flops; } + +static inline double flops_zgeqrs( double __m, double __n, double __nrhs) { double flops = (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) ); return flops; } +static inline double flops_cgeqrs( double __m, double __n, double __nrhs) { double flops = (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) ); return flops; } +static inline double flops_dgeqrs( double __m, double __n, double __nrhs) { double flops = ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) ); return flops; } +static inline double flops_sgeqrs( double __m, double __n, double __nrhs) { double flops = ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) ); return flops; } + +static inline double flops_ztrtri( double __n) { double flops = (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) ); return flops; } +static inline double flops_ctrtri( double __n) { double flops = (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) ); return flops; } +static inline double flops_dtrtri( double __n) { double flops = ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) ); return flops; } +static inline double flops_strtri( double __n) { double flops = ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) ); return flops; } + +static inline double flops_zgehrd( double __n) { double flops = (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) ); return flops; } +static inline double flops_cgehrd( double __n) { double flops = (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) ); return flops; } +static inline double flops_dgehrd( double __n) { double flops = ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) ); return flops; } +static inline double flops_sgehrd( double __n) { double flops = ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) ); return flops; } + +static inline double flops_zhetrd( double __n) { double flops = (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) ); return flops; } +static inline double flops_chetrd( double __n) { double flops = (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) ); return flops; } + +static inline double flops_zsytrd( double __n) { double flops = (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) ); return flops; } +static inline double flops_csytrd( double __n) { double flops = (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) ); return flops; } +static inline double flops_dsytrd( double __n) { double flops = ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) ); return flops; } +static inline double flops_ssytrd( double __n) { double flops = ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) ); return flops; } + +static inline double flops_zgebrd( double __m, double __n) { double flops = (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) ); return flops; } +static inline double flops_cgebrd( double __m, double __n) { double flops = (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) ); return flops; } +static inline double flops_dgebrd( double __m, double __n) { double flops = ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) ); return flops; } +static inline double flops_sgebrd( double __m, double __n) { double flops = ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) ); return flops; } + +/* + * Norms + */ +#define FMULS_LANGE(__m, __n) ((double)(__m) * (double)(__n)) +#define FADDS_LANGE(__m, __n) ((double)(__m) * (double)(__n)) + +#endif /* _flops_h_ */ diff --git a/new-testing/input/geadd.in b/new-testing/input/geadd.in new file mode 100644 index 000000000..e5ce4b9e0 --- /dev/null +++ b/new-testing/input/geadd.in @@ -0,0 +1,21 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEADD +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A +# N: Number of columns of matrix B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# trans: Wether the matrix A is not tranposed, transposed or conjugate transposed (0: not transposed, 1: transposed, 2: conjugate transposed) +# alpha: Scalar alpha +# beta: Scalar beta + +nb = 16, 17 +ib = 8 +m = 15, 17, 33 +n = 13, 19, 35 +lda = 37 +ldb = 39 +trans = 0:2 diff --git a/new-testing/input/gelqf.in b/new-testing/input/gelqf.in new file mode 100644 index 000000000..984f1a0f4 --- /dev/null +++ b/new-testing/input/gelqf.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GELQF + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/gelqf_hqr.in b/new-testing/input/gelqf_hqr.in new file mode 100644 index 000000000..041ca9683 --- /dev/null +++ b/new-testing/input/gelqf_hqr.in @@ -0,0 +1,26 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GELQF_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/gelqs.in b/new-testing/input/gelqs.in new file mode 100644 index 000000000..d35251eab --- /dev/null +++ b/new-testing/input/gelqs.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GELQS + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/gels.in b/new-testing/input/gels.in new file mode 100644 index 000000000..4b2932b8b --- /dev/null +++ b/new-testing/input/gels.in @@ -0,0 +1,23 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GELS +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A +# N: Number of columns of matrix B +# K: Number of columns of matrix A and rows of Matrix B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# trans: Wether the system involves A (ChamNoTrans) or A^H *ChamTransConj) +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 17, 31, 35 +n = 15, 29, 33 +k = 11, 27, 37 +lda = 43 +ldb = 45 +trans = 0, 2 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/gels_hqr.in b/new-testing/input/gels_hqr.in new file mode 100644 index 000000000..279c400cd --- /dev/null +++ b/new-testing/input/gels_hqr.in @@ -0,0 +1,31 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GELS +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A +# N: Number of columns of matrix B +# K: Number of columns of matrix A and rows of Matrix B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# trans: Wether the system involves A (ChamNoTrans) or A^H *ChamTransConj). Currently only ChamNoTrans is supported. +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 17, 31, 35 +n = 15, 29, 33 +k = 11, 27, 37 +lda = 43 +ldb = 45 +trans = 0 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/gemm.in b/new-testing/input/gemm.in new file mode 100644 index 000000000..edbe7881f --- /dev/null +++ b/new-testing/input/gemm.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEMM +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A and C +# N: Number of columns of matrix B and C +# K: Number of columns of matrix A and rows of matrix B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# LDC: Leading dimension of matrix C +# transA: Wether the matrix A is not tranposed, transposed or conjugate transposed (0: not transposed, 1: transposed, 2: conjugate transposed) +# transB: Wether the matrix B is not tranposed, transposed or conjugate transposed (0: not transposed, 1: transposed, 2: conjugate transposed) +# alpha: Scalar alpha +# beta: Scalar beta + +nb = 16, 17 +ib = 8 +m = 15, 25, 37 +n = 13, 23, 35 +k = 14, 24, 34 +lda = 41 +ldb = 43 +ldc = 41 +transA = 0:2 +transB = 0:2 diff --git a/new-testing/input/geqrf.in b/new-testing/input/geqrf.in new file mode 100644 index 000000000..27f1dd01e --- /dev/null +++ b/new-testing/input/geqrf.in @@ -0,0 +1,19 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEQRF + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# RH: Size of each subdomain when using RH + + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/geqrf_hqr.in b/new-testing/input/geqrf_hqr.in new file mode 100644 index 000000000..3196bbdc7 --- /dev/null +++ b/new-testing/input/geqrf_hqr.in @@ -0,0 +1,26 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEQRF_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 13, 17, 57 +n = 15, 19, 55 +lda = 59 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/geqrs.in b/new-testing/input/geqrs.in new file mode 100644 index 000000000..eb4391409 --- /dev/null +++ b/new-testing/input/geqrs.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GEQRS + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the A matrix +# n: Number of columns of the A matrix +# lda: Leading dimension of the A matrix +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/gesv.in b/new-testing/input/gesv.in new file mode 100644 index 000000000..84e4793c9 --- /dev/null +++ b/new-testing/input/gesv.in @@ -0,0 +1,19 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GESV + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B + +nb = 16, 17 +ib = 8 +n = 15, 21, 35 +nrhs = 13, 22, 33 +lda = 40 +ldb = 41 +seedA = 4687 \ No newline at end of file diff --git a/new-testing/input/getrf.in b/new-testing/input/getrf.in new file mode 100644 index 000000000..717b14cd4 --- /dev/null +++ b/new-testing/input/getrf.in @@ -0,0 +1,16 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GETRF + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of the matrix A +# n: Number of columns of the matrix A +# lda: Leading dimension of matrix A + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 \ No newline at end of file diff --git a/new-testing/input/getrs.in b/new-testing/input/getrs.in new file mode 100644 index 000000000..2b9eb0062 --- /dev/null +++ b/new-testing/input/getrs.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GETRS + +# nb: Tile size +# ib: Inner tile size +# m: Order of the matrix A and number of rows of matrix B +# n: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B + +nb = 16, 17 +ib = 8 +m = 13, 17, 35 +n = 15, 19, 33 +lda = 41 +ldb = 42 \ No newline at end of file diff --git a/new-testing/input/hemm.in b/new-testing/input/hemm.in new file mode 100644 index 000000000..d9149be51 --- /dev/null +++ b/new-testing/input/hemm.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# HEMM +# nb: Tile size +# ib: Inner tile size +# side: whether the hermitian matrix A appears on the left or right side of the multiplication operation (0: left, 1: right) +# uplo: matrix part to be used (0: Upper, 1: Lower) +# M: Number of rows of matrix A and C +# N: Number of columns of matrix B and C +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# LDC: Leading dimension of matrix C +# alpha: Scalar alpha +# beta: Scalar beta +# bump: bump value for Hermitian matrices + +nb = 16, 17 +ib = 8 +side = 0:1 +uplo = 0:1 +m = 15, 25, 37 +n = 13, 23, 35 +lda = 41 +ldb = 43 +ldc = 41 +bump = 0 diff --git a/new-testing/input/her2k.in b/new-testing/input/her2k.in new file mode 100644 index 000000000..3adce390e --- /dev/null +++ b/new-testing/input/her2k.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# HER2K +# nb: Tile size +# ib: Inner tile size +# N: Number of columns and rows of matrix C and number of row of matrix A and B +# K: Number of columns of matrix A and B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# LDC: Leading dimension of matrix C +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# trans: Whether the matrix A is transposed or conjugate transposed +# alpha: Scalar alpha +# beta: Scalar beta +# bump: Bump value for symmetric matrices + +nb = 16, 17 +ib = 8 +n = 15, 21, 33 +k = 13, 23, 35 +lda = 37 +ldb = 39 +ldc = 40 +uplo = 0:1 +trans = 0,2 +bump = 0 diff --git a/new-testing/input/herk.in b/new-testing/input/herk.in new file mode 100644 index 000000000..a3f3f70f0 --- /dev/null +++ b/new-testing/input/herk.in @@ -0,0 +1,26 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# HERK +# nb: Tile size +# ib: Inner tile size +# N: Order of the matrix C +# K: Number of columns of the matrix op(A) +# LDA: Leading dimension of matrix A +# LDC: Leading dimension of matrix C +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# trans: Whether the matrix A is transposed or conjugate transposed +# alpha: Scalar alpha +# beta: Scalar beta +# bump: Bump value for symmetric matrices + +nb = 16, 17 +ib = 8 +n = 15, 21, 33 +k = 13, 23, 35 +lda = 37 +ldc = 40 +side = 0:1 +uplo = 0:1 +trans = 0,2 +bump = 0 diff --git a/new-testing/input/lacpy.in b/new-testing/input/lacpy.in new file mode 100644 index 000000000..2354df8f8 --- /dev/null +++ b/new-testing/input/lacpy.in @@ -0,0 +1,19 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LACPY +# nb: Tile size +# ib: Inner tile size +# uplo: Part of the matrix to be copied (0 for Upper, 1 for Lower and 2 for UpperLower) +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B + +nb = 16, 17 +ib = 8 +uplo = 0:2 +m = 13:45:16 +n = 15:52:16 +lda = 65 +ldb = 66 \ No newline at end of file diff --git a/new-testing/input/lange.in b/new-testing/input/lange.in new file mode 100644 index 000000000..1844435db --- /dev/null +++ b/new-testing/input/lange.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LANGE +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# norm: norm type to be calculated (0 for Max|1 for One|2 for Infinity|3 for Frobenius) + + +nb = 16, 17 +ib = 8 +m = 15, 21, 33 +n = 13, 23, 35 +lda = 45 +norm = MaxNorm, OneNorm, InfNorm, FrobeniusNorm diff --git a/new-testing/input/lanhe.in b/new-testing/input/lanhe.in new file mode 100644 index 000000000..5797df08f --- /dev/null +++ b/new-testing/input/lanhe.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LANHE +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# norm: norm type to be calculated (0 for Max|1 for One|2 for Infinity|3 for Frobenius) +# uplo: matrix parte to be considered (0: Upper, 1: Lower) +# bump: + +nb = 16, 17 +ib = 8 +m = 15, 19, 32 +n = 13, 21, 33 +lda = 43 +uplo= 0,1 +norm = MaxNorm, OneNorm, InfNorm, FrobeniusNorm diff --git a/new-testing/input/lansy.in b/new-testing/input/lansy.in new file mode 100644 index 000000000..3348d4afd --- /dev/null +++ b/new-testing/input/lansy.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LANSY +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# norm: norm type to be calculated (0 for Max|1 for One|2 for Infinity|3 for Frobenius) +# uplo: matrix parte to be considered (0: Upper, 1: Lower) +# bump: bump value for Hermitian matrices + +nb = 16, 17 +ib = 8 +m = 15, 19, 32 +n = 13, 21, 33 +lda = 43 +uplo = 0,1 +norm = MaxNorm, OneNorm, InfNorm, FrobeniusNorm diff --git a/new-testing/input/lantr.in b/new-testing/input/lantr.in new file mode 100644 index 000000000..234b042cb --- /dev/null +++ b/new-testing/input/lantr.in @@ -0,0 +1,22 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LANTR +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# norm: norm type to be calculated (0 for Max|1 for One|2 for Infinity|3 for Frobenius) +# uplo: matrix parte to be considered (0: Upper, 1: Lower) +# bump: +# diag: whether or not A is unit triangular (0: non unit, 1: unit) + +nb = 16, 17 +ib = 8 +m = 15, 19, 32 +n = 13, 21, 33 +lda = 43 +uplo= 0,1 +diag = 0,1 +norm = MaxNorm, OneNorm, InfNorm, FrobeniusNorm diff --git a/new-testing/input/lascal.in b/new-testing/input/lascal.in new file mode 100644 index 000000000..404061a6f --- /dev/null +++ b/new-testing/input/lascal.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LASCAL +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrices A and C +# N: Number of columns of matrices B and C +# LDA: Leading dimension of matrix A +# uplo: Part of the matrix to be copied (0 for Upper, 1 for Lower and 2 for UpperLower) +# alpha: Scale to apply + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 14, 21, 31 +lda = 41 +uplo = 0:2 diff --git a/new-testing/input/lauum.in b/new-testing/input/lauum.in new file mode 100644 index 000000000..a50f75225 --- /dev/null +++ b/new-testing/input/lauum.in @@ -0,0 +1,16 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LAUUM + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 31, 33 +lda = 35 +uplo = 0,1 diff --git a/new-testing/input/ongqr.in b/new-testing/input/ongqr.in new file mode 100644 index 000000000..e4ca0c755 --- /dev/null +++ b/new-testing/input/ongqr.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNGQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 13, 17, 35 +k = 14, 18, 37 +lda = 41 \ No newline at end of file diff --git a/new-testing/input/orglq.in b/new-testing/input/orglq.in new file mode 100644 index 000000000..cc7079fd1 --- /dev/null +++ b/new-testing/input/orglq.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORGLQ + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/orglq_hqr.in b/new-testing/input/orglq_hqr.in new file mode 100644 index 000000000..e14831fdb --- /dev/null +++ b/new-testing/input/orglq_hqr.in @@ -0,0 +1,28 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORGLQ_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/orgqr.in b/new-testing/input/orgqr.in new file mode 100644 index 000000000..9f172fd27 --- /dev/null +++ b/new-testing/input/orgqr.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORGQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/orgqr_hqr.in b/new-testing/input/orgqr_hqr.in new file mode 100644 index 000000000..45080ddd2 --- /dev/null +++ b/new-testing/input/orgqr_hqr.in @@ -0,0 +1,28 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORGQR_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/ormlq.in b/new-testing/input/ormlq.in new file mode 100644 index 000000000..69ea2b7a3 --- /dev/null +++ b/new-testing/input/ormlq.in @@ -0,0 +1,24 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORMLQ + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 20, 33 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,1 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/ormlq_hqr.in b/new-testing/input/ormlq_hqr.in new file mode 100644 index 000000000..93054a713 --- /dev/null +++ b/new-testing/input/ormlq_hqr.in @@ -0,0 +1,32 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORMLQ_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 16, 17 +ib = 3 +n = 15, 20, 32 +m = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,1 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 diff --git a/new-testing/input/ormqr.in b/new-testing/input/ormqr.in new file mode 100644 index 000000000..e9a7c15c2 --- /dev/null +++ b/new-testing/input/ormqr.in @@ -0,0 +1,24 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORMQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 20, 33 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,1 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/ormqr_hqr.in b/new-testing/input/ormqr_hqr.in new file mode 100644 index 000000000..f24a54f2d --- /dev/null +++ b/new-testing/input/ormqr_hqr.in @@ -0,0 +1,32 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# ORMQR_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 16, 17 +ib = 3 +m = 15, 20, 32 +n = 13, 21, 34 +lda = 41 +ldc = 42 +side = 0:1 +trans = 0,1 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 diff --git a/new-testing/input/posv.in b/new-testing/input/posv.in new file mode 100644 index 000000000..0d4a05328 --- /dev/null +++ b/new-testing/input/posv.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POSV + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 21, 35 +nrhs = 13, 22, 33 +lda = 37 +ldb = 39 +uplo = 0,1 diff --git a/new-testing/input/potrf.in b/new-testing/input/potrf.in new file mode 100644 index 000000000..acb8ca94d --- /dev/null +++ b/new-testing/input/potrf.in @@ -0,0 +1,16 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POTRF + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 19, 37 +lda = 41 +uplo = 0,1 diff --git a/new-testing/input/potri.in b/new-testing/input/potri.in new file mode 100644 index 000000000..7ec3e8747 --- /dev/null +++ b/new-testing/input/potri.in @@ -0,0 +1,16 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POTRI + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 19, 37 +lda = 41 +uplo = 0,1 diff --git a/new-testing/input/potrs.in b/new-testing/input/potrs.in new file mode 100644 index 000000000..615674d7e --- /dev/null +++ b/new-testing/input/potrs.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POTRS + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 19, 37 +nrhs = 13, 21, 35 +lda = 41 +ldb = 43 +uplo = 0,1 diff --git a/new-testing/input/symm.in b/new-testing/input/symm.in new file mode 100644 index 000000000..164f0f5f8 --- /dev/null +++ b/new-testing/input/symm.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# SYMM +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A and C +# N: Number of columns of matrix B and C +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# LDC: Leading dimension of matrix C +# uplo: matrix part to be used (0: Upper, 1: Lower) +# side: whether the symmetric matrix A appears on the left or right side of the multiplication operation (0: left, 1: right) +# alpha: Scalar alpha +# beta: Scalar beta +# bump: bump value for Hermitian matrices + +nb = 16, 17 +ib = 8 +side = 0:1 +uplo = 0:1 +m = 15, 25, 37 +n = 13, 23, 35 +lda = 41 +ldb = 43 +ldc = 41 +bump = 0 diff --git a/new-testing/input/syr2k.in b/new-testing/input/syr2k.in new file mode 100644 index 000000000..54fe34112 --- /dev/null +++ b/new-testing/input/syr2k.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# SYR2K +# nb: Tile size +# ib: Inner tile size +# N: Number of columns and rows of matrix C and number of row of matrix A and B +# K: Number of columns of matrix A and B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# LDC: Leading dimension of matrix C +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# trans: Whether the matrix A is transposed or conjugate transposed +# alpha: Scalar alpha +# beta: Scalar beta +# bump: Bump value for symmetric matrices + +nb = 16, 17 +ib = 8 +n = 15, 21, 35 +k = 13, 23, 33 +lda = 42 +ldb = 43 +ldc = 44 +uplo = 0:1 +trans = 0,1 +bump = 0 diff --git a/new-testing/input/syrk.in b/new-testing/input/syrk.in new file mode 100644 index 000000000..cad50f1aa --- /dev/null +++ b/new-testing/input/syrk.in @@ -0,0 +1,25 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# SYRK +# nb: Tile size +# ib: Inner tile size +# N: Order of the matrix C +# K: Number of columns of the matrix op(A) +# LDA: Leading dimension of matrix A +# LDC: Leading dimension of matrix C +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# trans: Whether the matrix A is transposed or conjugate transposed +# alpha: Scalar alpha +# beta: Scalar beta +# bump: Bump value for symmetric matrices + +nb = 16, 17 +ib = 8 +n = 15, 21, 35 +k = 13, 23, 33 +lda = 38 +ldc = 39 +side = 0:1 +uplo = 0:1 +bump = 0 diff --git a/new-testing/input/sysv.in b/new-testing/input/sysv.in new file mode 100644 index 000000000..0d4a05328 --- /dev/null +++ b/new-testing/input/sysv.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POSV + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 21, 35 +nrhs = 13, 22, 33 +lda = 37 +ldb = 39 +uplo = 0,1 diff --git a/new-testing/input/sytrf.in b/new-testing/input/sytrf.in new file mode 100644 index 000000000..acb8ca94d --- /dev/null +++ b/new-testing/input/sytrf.in @@ -0,0 +1,16 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POTRF + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 19, 37 +lda = 41 +uplo = 0,1 diff --git a/new-testing/input/sytrs.in b/new-testing/input/sytrs.in new file mode 100644 index 000000000..615674d7e --- /dev/null +++ b/new-testing/input/sytrs.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# POTRS + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) + +nb = 16, 17 +ib = 8 +n = 15, 19, 37 +nrhs = 13, 21, 35 +lda = 41 +ldb = 43 +uplo = 0,1 diff --git a/new-testing/input/tradd.in b/new-testing/input/tradd.in new file mode 100644 index 000000000..d56c130ac --- /dev/null +++ b/new-testing/input/tradd.in @@ -0,0 +1,23 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# TRADD +# nb: Tile size +# ib: Inner tile size +# M: Number of rows of matrix A +# N: Number of columns of matrix B +# LDA: Leading dimension of matrix A +# LDB: Leading dimension of matrix B +# trans: Wether the matrix A is not tranposed, transposed or conjugate transposed (0: not transposed, 1: transposed, 2: conjugate transposed) +# uplo: matrix parte to be considered (0: Upper, 1: Lower) +# alpha: Scalar alpha +# beta: Scalar beta + +nb = 16, 17 +ib = 8 +m = 15, 29, 33 +n = 13, 27, 35 +lda = 39 +ldb = 41 +trans = 0:2 +uplo = 0, 1 diff --git a/new-testing/input/trmm.in b/new-testing/input/trmm.in new file mode 100644 index 000000000..f7a21e7f4 --- /dev/null +++ b/new-testing/input/trmm.in @@ -0,0 +1,26 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# TRMM +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# k: the number of columns of the matrix B if side = ChamLeft and the number of rows of the matrix B if side = ChamRight +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# side: Whether A appears on the left or on the right of B +# trans: Whether the matrix A is transposed or conjugate transposed +# diag: Whether or not A is unit triangular +# alpha: Scalar alpha + +nb = 16, 17 +ib = 8 +n = 15, 19, 35 +k = 13, 21, 33 +lda = 38 +ldb = 37 +uplo = 0,1 +side = 0:1 +trans = 0:2 +diag = 0,1 diff --git a/new-testing/input/trsm.in b/new-testing/input/trsm.in new file mode 100644 index 000000000..8a376a0be --- /dev/null +++ b/new-testing/input/trsm.in @@ -0,0 +1,27 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# TRSM + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# k: the number of columns of the matrix B if side = ChamLeft and the number of rows of the matrix B if side = ChamRight +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# side: Whether A appears on the left or on the right of B +# trans: Whether the matrix A is transposed or conjugate transposed +# diag: Whether or not A is unit triangular +# alpha: Scalar alpha + +nb = 16, 17 +ib = 8 +n = 15, 27, 35 +k = 13, 25, 33 +lda = 41 +ldb = 42 +uplo = 0,1 +side = 0:1 +trans = 0:2 +diag = 0,1 diff --git a/new-testing/input/trtri.in b/new-testing/input/trtri.in new file mode 100644 index 000000000..ef19e366d --- /dev/null +++ b/new-testing/input/trtri.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# TRTRI + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# uplo: Matrix part to be considered (0: Upper, 1: Lower) +# diag: Wheter the matrix A is unit trianuglar (0: non unit, 1: unit) + +nb = 16, 17 +ib = 8 +n = 15, 19, 33 +lda = 38 +uplo = 0,1 +diag = 0,1 diff --git a/new-testing/input/unglq.in b/new-testing/input/unglq.in new file mode 100644 index 000000000..869f70420 --- /dev/null +++ b/new-testing/input/unglq.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNGLQ + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/unglq_hqr.in b/new-testing/input/unglq_hqr.in new file mode 100644 index 000000000..dbfd88788 --- /dev/null +++ b/new-testing/input/unglq_hqr.in @@ -0,0 +1,28 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNGLQ_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/ungqr.in b/new-testing/input/ungqr.in new file mode 100644 index 000000000..1e9be0084 --- /dev/null +++ b/new-testing/input/ungqr.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNGQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/ungqr_hqr.in b/new-testing/input/ungqr_hqr.in new file mode 100644 index 000000000..78f787899 --- /dev/null +++ b/new-testing/input/ungqr_hqr.in @@ -0,0 +1,28 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNGQR_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix Q and A +# n: Number of columns of matrix Q and A +# k: Number of reflectors of matrix A +# lda: Leading dimension of matrix Q and A +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 19, 33 +n = 13, 17, 35 +k = 9, 15, 17 +lda = 41 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/unmlq.in b/new-testing/input/unmlq.in new file mode 100644 index 000000000..3f520a0e2 --- /dev/null +++ b/new-testing/input/unmlq.in @@ -0,0 +1,24 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNMLQ + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 20, 33 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,2 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/unmlq_hqr.in b/new-testing/input/unmlq_hqr.in new file mode 100644 index 000000000..6a0ea24ee --- /dev/null +++ b/new-testing/input/unmlq_hqr.in @@ -0,0 +1,32 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNMLQ_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 20, 33 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,2 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/input/unmqr.in b/new-testing/input/unmqr.in new file mode 100644 index 000000000..24407e858 --- /dev/null +++ b/new-testing/input/unmqr.in @@ -0,0 +1,24 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNMQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transposed +# RH: Size of each subdomain when using RH + +nb = 16, 17 +ib = 8 +m = 15, 20, 33 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,2 +qra = 0, 3 \ No newline at end of file diff --git a/new-testing/input/unmqr_hqr.in b/new-testing/input/unmqr_hqr.in new file mode 100644 index 000000000..a4a74c1d0 --- /dev/null +++ b/new-testing/input/unmqr_hqr.in @@ -0,0 +1,32 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# UNMQR_HQR + +# nb: Tile size +# ib: Inner tile size +# m: Number of rows of matrix A +# n: Number of columns of matrix A +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B +# side: Whether Q appears on the left or on the right side of the multiplication +# trans: Whether the matrix Q is transposed or conjugate transpose +# qra: Size of TS domain +# qrp: Size of high level tree for distributed mode +# llvl: Tree used for low level reduction insides nodes +# hlv: Tree used for high level reduction between nodes, only if qrp > 1 +# domino: Enable/Disable the domino between upper and lower trees + +nb = 8, 9 +ib = 3 +m = 15, 20 +n = 13, 21, 34 +lda = 41 +ldb = 42 +side = 0:1 +trans = 0,2 +qra = 2 +qrp = -1 +llvl = -1 +hlvl = -1 +domino = -1 \ No newline at end of file diff --git a/new-testing/parameters.c b/new-testing/parameters.c new file mode 100644 index 000000000..c1d437223 --- /dev/null +++ b/new-testing/parameters.c @@ -0,0 +1,373 @@ +/** + * + * @file parameters.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + *** + * + * @brief Chameleon auxiliary routines for testing structures + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-18 + * + */ +#include "testings.h" + +/** + ******************************************************************************** + * + * @brief Get the list of values associated to a given parameter + * + ******************************************************************************* + * + * @param[in] name + * The name of the parameter we are interested in. + * + * @return NULL if no parameter exists with this name, otherwise the pointer to + * the list of values associated to this parameter. + * + ******************************************************************************* + */ +vallist_t * +parameters_getlist( const char *name ) +{ + parameter_t *param = parameters_getbyname( name ); + if ( param == NULL ) { + return NULL; + } + else { + return param->vallist; + } +} + +/** + ******************************************************************************** + * + * @brief Parses a list in form A1, A2, ..., An and insert the values in an + * argument list. + * + ******************************************************************************* + * + * @param[inout] param + * The parameter associated to the list. + * On exit, the list of values are added to the parameter list of + * possible values. + * + * @param[in] liststr + * The string that holds the list + * + ******************************************************************************* + */ +void +parameters_read_list( parameter_t *param, + const char *liststr ) +{ + const char *delim = ", \n"; + char *str = strdup( liststr ); + char *token, *saveptr; + vallist_t *previous, *current; + + /* Initialize the list items */ + previous = NULL; + current = param->vallist; + + /* Move to the end of the list if some parameters have already been registered */ + while( current != NULL ) { + previous = current; + current = current->next; + } + + fprintf( stderr, "%s (list): ", param->name ); + + token = strtok_r( str, delim, &saveptr ); + while ( token != NULL ) { + assert( current == NULL ); + current = calloc( 1, sizeof(vallist_t) ); + + /* Read the value */ + current->value = param->read( token ); + + /* Insert at the end of the list */ + if ( previous != NULL ) { + previous->next = current; + } + else { + /* Nothing was in the list */ + param->vallist = current; + } + + previous = current; + current = NULL; + + /* Move to the next token */ + token = strtok_r( NULL, delim, &saveptr ); + } + + fprintf( stderr, "\n" ); + free( str ); +} + +/** + ******************************************************************************** + * + * @brief Parses a list in form start:end[:step] and inserts the values in an + * argument list. + * + ******************************************************************************* + * + * @param[inout] param + * The parameter associated to the list. + * On exit, the range of values are added to the parameter list of + * possible values. + * + * @param[in] rangestr + * The string that holds the range + * + * @param[in] min + * The minimum value available + * + * @param[in] max + * The maximum value available + * + ******************************************************************************* + */ +void +parameters_read_intrange( parameter_t *param, + const char *rangestr, + int min, int max ) +{ + int start, end, step, count; + vallist_t *previous, *current; + + max = (max == -1) ? INT32_MAX : max; + + count = sscanf( rangestr, "%d:%d:%d", &start, &end, &step ); + if ( count < 2 ) { + fprintf(stderr, "Incorrect range syntax (%s): data skipped\n", rangestr ); + return; + } + else if (count == 2) { + step = 1; + } + + /* Check the range */ + if ( (start < min) || (start > max) || + (end < min) || (end > max) ) + { + /* Try to shift to 0 to see if now we fit */ + start += min; + end += min; + if ( (start < min) || (start > max) || + (end < min) || (end > max) ) + { + fprintf( stderr, "Incorrect range values outside the possible ranges [%d:%d]", + min, max ); + if ( min > 0 ) { + fprintf( stderr, " or [%d:%d]\n", 0, max-min ); + } + else { + fprintf( stderr, "\n" ); + } + } + } + + /* Initialize the list items */ + previous = NULL; + current = param->vallist; + + /* Move to the end of the list if some parameters have already been registered */ + while( current != NULL ) { + previous = current; + current = current->next; + } + + fprintf( stderr, "%s (range): ", param->name ); + while ( start <= end ) { + assert( current == NULL ); + current = calloc( 1, sizeof(vallist_t) ); + + /* Read the value */ + current->value.ival = start; + fprintf( stderr, " %d", start ); + + /* Insert at the end of the list */ + if ( previous != NULL ) { + previous->next = current; + } + else { + /* Nothing was in the list */ + param->vallist = current; + } + + previous = current; + current = NULL; + + start += step; + } + fprintf( stderr, "\n" ); +} + +/** + ******************************************************************************** + * + * @brief Wrapper to parse a list or range of values associated to a parameter. + * + ******************************************************************************* + * + * @param[inout] param + * The parameter associated to the list. + * On exit, the range of values are added to the parameter list of + * possible values. + * + * @param[in] values + * The string that holds the range of list of values + * + ******************************************************************************* + */ +void +parameters_read( parameter_t *param, + const char *values ) +{ + int range = ( strchr( values, ':' ) != NULL ); + + /* If we have a ranged of integer values */ + if ( range ) + { + switch ( param->valtype ) { + case TestValInt: + parameters_read_intrange( param, values, 0, -1 ); + break; + case TestTrans: + parameters_read_intrange( param, values, ChamNoTrans, ChamConjTrans ); + break; + case TestUplo: + parameters_read_intrange( param, values, ChamUpper, ChamUpperLower ); + break; + case TestDiag: + parameters_read_intrange( param, values, ChamNonUnit, ChamUnit ); + break; + case TestSide: + parameters_read_intrange( param, values, ChamLeft, ChamRight ); + break; + case TestNormtype: + parameters_read_intrange( param, values, ChamOneNorm, ChamMaxNorm ); + break; + default: + fprintf( stderr, "parameters_read: range is not available for this datatype (%d)\n", + param->valtype ); + } + return; + } + + parameters_read_list( param, values ); +} + +/** + ******************************************************************************** + * + * @brief Generic function to add value(s) to a given parameter + * + ******************************************************************************* + * + * @param[inout] param + * The parameter that will receive the value + * On exit, the value(s) (switch, list, range, ...) is/are added to the + * parameter list of possible values + * + * @param[in] values + * The string that holds the values (list, range, NULL if switch) + * + ******************************************************************************* + */ +void +parameters_addvalues( parameter_t *param, + const char *values ) +{ + if ( param->has_arg == 0 ) { + fprintf( stderr, "%s: enabled\n", param->name ); + param->value.ival = 1; + } + else if ( param->has_arg == 1 ) { + param->value = param->read( values ); + } + else { + parameters_read( param, values ); + } +} + +/** + ******************************************************************************** + * + * @brief Parses an input test file. + * + ******************************************************************************* + * + * @param[in] filename + * The name of the input file. + * + ******************************************************************************* + */ +void +parameters_read_file( const char *filename ) +{ + FILE *fp; + const char *delim = " ="; + char *saveptr; + char *line_read, *line; + char *name, *values; + size_t len = 256; + ssize_t nbread; + parameter_t *param; + + fp = fopen( filename, "r" ); + if ( fp == NULL ) { + fprintf( stderr, "Error reading input file %s\n", filename ); + perror("fopen"); + exit(1); + } + + len = 256; + line_read = malloc( len * sizeof( char ) ); + + while ( (nbread = getline( &line_read, &len, fp )) != -1 ) + { + line = line_read; + + /* Ignores comments and empty lines */ + if ( (line[0] == '#' ) || + (line[0] == '\n') ) + { + continue; + } + + /* Removes possible extra spaces */ + while ( line[0] == ' ' ) { + line++; + } + + /* Reads the parameter name and values */ + name = strtok_r( line, delim, &saveptr ); + values = strtok_r( NULL, "", &saveptr ); + + /* Goes for the listed values */ + while ( (values[0] == ' ') || + (values[0] == '=') ) + { + values++; + } + + //fprintf( stderr, "%s: %s", name, values ); + param = parameters_getbyname( name ); + if ( param == NULL ) { + fprintf( stderr, "Parameter %s is not know. We skip it\n", name ); + continue; + } + parameters_addvalues( param, values ); + } + + free(line_read); + fclose(fp); +} + diff --git a/new-testing/run_list.c b/new-testing/run_list.c new file mode 100644 index 000000000..771813d69 --- /dev/null +++ b/new-testing/run_list.c @@ -0,0 +1,891 @@ +/** + * + * @file run_list.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + *** + * + * @brief Chameleon auxiliary routines for testing structures + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-18 + * + */ +#include "testings.h" + +/** + ******************************************************************************** + * + * @brief Searches for a specific value by its name. + * + ******************************************************************************* + * + * @param[in] arglist + * The list of arguments. + * + * @param[in] name + * The name of the argument to look for. + * + * @retval The argument structure of the argument, NULL if not found. + * + ******************************************************************************* + */ +const run_arg_t * +run_arg_get_byname( const run_arg_list_t *arglist, const char *name ) +{ + const run_arg_t *arg = arglist->head; + + while( arg != NULL ) { + if ( strcasecmp( name, arg->param->name ) == 0 ) { + return arg; + } + arg = arg->next; + } + + return arg; +} + +/** + ******************************************************************************** + * + * @brief Searches for a specific value by its name. + * + ******************************************************************************* + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + * + ******************************************************************************* + */ +val_t +run_arg_get( run_arg_list_t *arglist, const char *name, val_t defval ) +{ + run_arg_t *arg = arglist->head; + + while( arg != NULL ) { + if ( strcasecmp( name, arg->param->name ) == 0 ) { + return arg->value; + } + arg = arg->next; + } + + assert( arg == NULL ); + arg = calloc( 1, sizeof(run_arg_t) ); + + arg->param = parameters_getbyname( name ); + if ( arg->param == NULL ) { + fprintf( stderr, "Argument %s is not registered\n", name ); + exit(1); + } + arg->value = defval; + + if( arglist->head == NULL ) { + assert( arglist->tail == NULL ); + arglist->head = arg; + arglist->tail = arg; + } + else { + assert( arglist->tail != NULL ); + assert( arglist->tail->next == NULL ); + arglist->tail->next = arg; + arglist->tail = arg; + } + + return defval; +} + +/** + ******************************************************************************** + * + * @brief Adds a single generic argument value by name + * + ******************************************************************************* + * + * @param[inout] arglist + * The list of arguments to update. + * On exit, the argument _name_ of value _value_ is added to the list. + * + * @param[in] name + * The name of the argument to add in the list. + * + * @param[in] value + * The value of the argument to add. + * + * @retval 0 for success, -1 if it fails to find a paramater named _name_. + * + ******************************************************************************* + */ +int +run_arg_add( run_arg_list_t *arglist, const char *name, val_t value ) +{ + run_arg_t *arg; + + assert( arglist ); + + arg = calloc( 1, sizeof(run_arg_t) ); + arg->param = parameters_getbyname( name ); + if ( arg->param == NULL ) { + fprintf( stderr, "Argument %s does not exist\n", name ); + free( arg ); + return -1; + } + arg->value = value; + + if( arglist->head == NULL ) { + assert( arglist->tail == NULL ); + arglist->head = arg; + arglist->tail = arg; + } + else { + assert( arglist->tail != NULL ); + assert( arglist->tail->next == NULL ); + arglist->tail->next = arg; + arglist->tail = arg; + } + + return 0; +} + +/** + * @brief Adds a single int argument value by name + * + * @param[inout] arglist + * The list of arguments to update. + * On exit, the argument _name_ of value _value_ is added to the list. + * + * @param[in] name + * The name of the argument to add in the list. + * + * @param[in] value + * The value of the argument to add. + * + * @retval 0 for success, -1 if it fails to find a paramater named _name_. + */ +int +run_arg_add_int( run_arg_list_t *arglist, const char *name, int value ) +{ + val_t v; + v.ival = value; + return run_arg_add( arglist, name, v ); +} + +/** + * @brief Adds a single double argument value by name + * + * @param[inout] arglist + * The list of arguments to update. + * On exit, the argument _name_ of value _value_ is added to the list. + * + * @param[in] name + * The name of the argument to add in the list. + * + * @param[in] value + * The value of the argument to add. + * + * @retval 0 for success, -1 if it fails to find a paramater named _name_. + */ +int +run_arg_add_double( run_arg_list_t *arglist, const char *name, double value ) +{ + val_t v; + v.dval = value; + return run_arg_add( arglist, name, v ); +} + +/** + * @brief Searches for a specific int value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +int +run_arg_get_int( run_arg_list_t *arglist, const char *name, int defval ) +{ + val_t val, rval; + val.ival = defval; + rval = run_arg_get( arglist, name, val ); + return rval.ival; +} + +/** + * @brief Searches for a specific float value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +float +run_arg_get_float( run_arg_list_t *arglist, const char *name, float defval ) +{ + val_t val, rval; + val.sval = defval; + rval = run_arg_get( arglist, name, val ); + return rval.sval; +} + +/** + * @brief Searches for a specific double value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +double +run_arg_get_double( run_arg_list_t *arglist, const char *name, double defval ) +{ + val_t val, rval; + val.dval = defval; + rval = run_arg_get( arglist, name, val ); + return rval.dval; +} + +/** + * @brief Searches for a specific single complex value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +CHAMELEON_Complex32_t +run_arg_get_Complex32( run_arg_list_t *arglist, const char *name, CHAMELEON_Complex32_t defval ) +{ + val_t val, rval; + val.cval = defval; + rval = run_arg_get( arglist, name, val ); + return rval.cval; +} + +/** + * @brief Searches for a double complex value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +CHAMELEON_Complex64_t +run_arg_get_Complex64( run_arg_list_t *arglist, const char *name, CHAMELEON_Complex64_t defval ) +{ + val_t val, rval; + val.zval = defval; + rval = run_arg_get( arglist, name, val ); + return rval.zval; +} + +/** + * @brief Searches for a cham_trans_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_trans_t +run_arg_get_trans( run_arg_list_t *arglist, const char *name, cham_trans_t defval ) +{ + val_t val, rval; + val.trans = defval; + rval = run_arg_get( arglist, name, val ); + return rval.trans; +} + +/** + * @brief Searches for a cham_uplo_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_uplo_t +run_arg_get_uplo( run_arg_list_t *arglist, const char *name, cham_uplo_t defval ) +{ + val_t val, rval; + val.uplo = defval; + rval = run_arg_get( arglist, name, val ); + return rval.uplo; +} + +/** + * @brief Searches for a cham_diag_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_diag_t +run_arg_get_diag( run_arg_list_t *arglist, const char *name, cham_diag_t defval ) +{ + val_t val, rval; + val.diag = defval; + rval = run_arg_get( arglist, name, val ); + return rval.diag; +} + +/** + * @brief Searches for a cham_side_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_side_t +run_arg_get_side( run_arg_list_t *arglist, const char *name, cham_side_t defval ) +{ + val_t val, rval; + val.side = defval; + rval = run_arg_get( arglist, name, val ); + return rval.side; +} + +/** + * @brief Searches for a cham_normtype_t value by its name. + * + * @param[inout] arglist + * The list of arguments. + * On exit, if the argument was not in the list, the default value is + * stored in it. + * + * @param[in] name + * The name of the argument to look for. + * + * @param[in] defval + * The default value if no argument is found with this name. This value + * is added to the list if not found. + * + * @retval The value of the argument _name_. + */ +cham_normtype_t +run_arg_get_ntype( run_arg_list_t *arglist, const char *name, cham_normtype_t defval ) +{ + val_t val, rval; + val.ntype = defval; + rval = run_arg_get( arglist, name, val ); + return rval.ntype; +} + +/** + ******************************************************************************** + * + * @brief Frees all of memory allocated for an argument list. + * + ******************************************************************************* + * + * @param[inout] args + * The list of arguments to free. + * + ******************************************************************************* + */ +void run_arg_destroy( run_arg_list_t *arglist ) +{ + run_arg_t *arg1, *arg2; + + arg1 = arglist->head; + while( arg1 != NULL ) { + arg2 = arg1->next; + free( arg1 ); + arg1 = arg2; + } + + arglist->head = NULL; + arglist->tail = NULL; +} + +/** + ******************************************************************************** + * + * @brief Add a single run argument list to the list of runs to perform + * + ******************************************************************************* + * + * @param[inout] runlist + * The list of all the runs to perform + * + * @param[in] arglist + * The list of running arguments to copy into a new run added to the + * list of run. + * + ******************************************************************************* + */ +void +run_list_add_one( run_list_t *runlist, + run_arg_t *arglist ) +{ + run_arg_t *arg = arglist; + run_arg_t *copy_curr = NULL; + run_list_elt_t *run; + + run = malloc( sizeof( run_list_elt_t ) ); + run->args.head = NULL; + run->args.tail = NULL; + run->next = NULL; + + /* Compute the size */ + while( arg != NULL ) { + copy_curr = malloc( sizeof( run_arg_t ) ); + memcpy( copy_curr, arg, sizeof( run_arg_t ) ); + if ( run->args.head == NULL ) { + run->args.head = copy_curr; + run->args.tail = copy_curr; + } + else { + run->args.tail->next = copy_curr; + run->args.tail = copy_curr; + } + arg = arg->next; + } + + if ( runlist->head == NULL ) { + assert( runlist->tail == NULL ); + runlist->head = run; + runlist->tail = run; + } + else { + assert( runlist->tail->next == NULL ); + runlist->tail->next = run; + runlist->tail = run; + } +} + +/** + ******************************************************************************** + * + * @brief Recursive function to generate the list of runs from the cartesian + * product of the parameter values + * + ******************************************************************************* + * + * @param[in] test_params + * The list of parameters that are considered in the test + * + * @param[inout] runlist + * The list of all the runs generated by the cartesian product. + * + * @param[in] arglist + * The current list of running arguments. + * + ******************************************************************************* + */ +void +run_list_generate_rec( const char **test_params, + run_list_t *runlist, + run_arg_t *arglist ) +{ + parameter_t *param = NULL; + int is_invalid = 1; + run_arg_t runarg; + vallist_t *vallist; + + /* End of the recursion */ + if ( *test_params == NULL ) { + /* Add the current run_arg list to the tests */ + run_list_add_one( runlist, arglist ); + return; + } + + /* Let's get the parameter */ + while( is_invalid && (*test_params != NULL) ) + { + param = parameters_getbyname( *test_params ); + test_params++; + + is_invalid = ( param == NULL ) || + !(param->flags & PARAM_INPUT) || + ( param->vallist == NULL ); + } + + if ( is_invalid ) { + /* Let's recurse one last time to register the test */ + run_list_generate_rec( test_params, runlist, arglist ); + return; + } + + /* Let's iterate on all values */ + vallist = param->vallist; + runarg.param = param; + runarg.next = arglist; + while ( vallist != NULL ) { + runarg.value = vallist->value; + run_list_generate_rec( test_params, runlist, &runarg ); + vallist = vallist->next; + } + + return; +} + +/** + ******************************************************************************** + * + * @brief Generate the list of runs from the cartesian product of the parameter + * values. + * + ******************************************************************************* + * + * @param[in] test_params + * The list of parameters that are considered in the test + * + * @return The list of all the runs generated by the cartesian product. + * + ******************************************************************************* + */ +run_list_t * +run_list_generate( const char **params ) +{ + run_list_t *runlist = calloc( 1, sizeof(run_list_t) ); + run_list_generate_rec( params, runlist, NULL ); + return runlist; +} + +/** + ******************************************************************************** + * + * @brief Frees the run list + * + ******************************************************************************* + * + * @param[inout] run + * The list of run to free. + * + ******************************************************************************* + */ +void +run_list_destroy( run_list_elt_t *run ) +{ + run_arg_destroy( &(run->args) ); + free( run ); +} + +/** + * @brief The common input parameters to all tests + */ +const char *common_input[] = { "threads", "gpus", "P", "Q", NULL }; + +/** + * @brief The common output parameters to all tests + */ +const char *common_output[] = { "time", "gflops", NULL }; + +/** + ******************************************************************************** + * + * @brief Print into a string the header associated to a list of parameters + * + ******************************************************************************* + * + * @param[in] list + * The list of parameters that will be printed. + * + * @param[in] human + * Boolean to to switch between human readable and csv outputs. + * + * @param[in] str + * Pointer to the string that will store the printing + * + * @return The pointer to the end of the string + * + ******************************************************************************* + */ +char * +run_print_header_partial( const char **list, int human, char *str ) +{ + parameter_t *param; + const char **pname = list; + int rc; + + while( *pname != NULL ) { + if ( human ) { + param = parameters_getbyname( *pname ); + assert( param != NULL ); + rc = sprintf( str, " %*s", param->psize, *pname ); + } + else { + rc = sprintf( str, ";%s", *pname ); + } + assert( rc > 0 ); + str += rc; + pname++; + } + return str; +} + +/** + ******************************************************************************** + * + * @brief Print the header associated to a test + * + ******************************************************************************* + * + * @param[in] test + * The test that is used. + * + * @param[in] check + * Tells if check parameters should be printed or not. + * + * @param[in] human + * Boolean to to switch between human readable and csv outputs. + * + ******************************************************************************* + */ +void +run_print_header( const testing_t *test, + int check, int human ) +{ + int rc, rank = CHAMELEON_Comm_rank(); + char str[2048]; + char *str_ptr = str; + + if ( rank ) { + return; + } + + if ( human ) { + rc = sprintf( str_ptr, "%3s %12s", + "Id", "Function" ); + } + else { + rc = sprintf( str_ptr, "%s;%s", + "Id", "Function" ); + } + str_ptr += rc; + + /* Common input */ + str_ptr = run_print_header_partial( common_input, human, str_ptr ); + + /* Specific input */ + str_ptr = run_print_header_partial( test->params, human, str_ptr ); + + /* Common output */ + str_ptr = run_print_header_partial( common_output, human, str_ptr ); + + /* Specific output */ + str_ptr = run_print_header_partial( test->output, human, str_ptr ); + + /* Specific check output */ + if ( check ) { + str_ptr = run_print_header_partial( test->outchk, human, str_ptr ); + } + fprintf( stdout, "%s\n", str ); + return; +} + +/** + ******************************************************************************** + * + * @brief Print into a string the data associated to a list of parameters + * + ******************************************************************************* + * + * @param[in] list + * The list of parameters that will be printed. + * + * @param[in] arglist + * The argument list in which to find the values of the parameters. + * + * @param[in] human + * Boolean to to switch between human readable and csv outputs. + * + * @param[in] str + * Pointer to the string that will store the printing + * + * @return The pointer to the end of the string + * + ******************************************************************************* + */ +char * +run_print_line_partial( const char **list, const run_arg_list_t *arglist, + int human, char *str ) +{ + parameter_t *param; + const char **pname = list; + const run_arg_t *arg; + val_t value; + + while( *pname != NULL ) { + arg = run_arg_get_byname( arglist, *pname ); + if ( arg == NULL ) { + /* Should be a common parameter */ + param = parameters_getbyname( *pname ); + assert( param != NULL ); + + value = param->value; + } + else { + param = arg->param; + assert( param != NULL ); + value = arg->value; + } + + str = param->sprint( value, human, param->psize, str ); + + pname++; + } + return str; +} + +/** + ******************************************************************************** + * + * @brief Print the data associated to one run of a test + * + ******************************************************************************* + * + * @param[in] test + * The test that is used. + * + * @param[in] arglist + * The argument list in which to find the values of the parameters. + * + * @param[in] check + * Tells if check parameters should be printed or not. + * + * @param[in] human + * Boolean to to switch between human readable and csv outputs. + * + * @param[in] id + * The id of the run + * + ******************************************************************************* + */ +void +run_print_line( const testing_t *test, const run_arg_list_t *arglist, + int check, int human, int id ) +{ + int rc, rank = CHAMELEON_Comm_rank(); + char str[2048]; + char *str_ptr = str; + + if ( rank ) { + return; + } + + if ( human ) { + rc = sprintf( str_ptr, "%3d %12s", + id, test->name ); + } + else { + rc = sprintf( str_ptr, "%d;%s", + id, test->name ); + } + str_ptr += rc; + + /* Common input */ + str_ptr = run_print_line_partial( common_input, arglist, human, str_ptr ); + + /* Specific input */ + str_ptr = run_print_line_partial( test->params, arglist, human, str_ptr ); + + /* Common output */ + str_ptr = run_print_line_partial( common_output, arglist, human, str_ptr ); + + /* Specific output */ + str_ptr = run_print_line_partial( test->output, arglist, human, str_ptr ); + + /* Specific check output */ + if ( check ) { + str_ptr = run_print_line_partial( test->outchk, arglist, human, str_ptr ); + } + + fprintf( stdout, "%s\n", str ); + return; +} diff --git a/new-testing/testing_zauxiliary.c b/new-testing/testing_zauxiliary.c new file mode 100644 index 000000000..76d69f43e --- /dev/null +++ b/new-testing/testing_zauxiliary.c @@ -0,0 +1,524 @@ +/** + * + * @file testing_zauxiliary.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings routines + * + * @version 0.9.2 + * @author Mathieu Faverge + * @author Cédric Castagnède + * @author Lucas Barros de Assis + * @date 2014-11-16 + * @precisions normal z -> c d s + * + */ +#include "testings.h" +#if defined(CHAMELEON_HAVE_GETOPT_LONG) +#include <getopt.h> +#else +struct option; +#endif + +/** + * @brief Defines all the parameters of the testings + */ +static parameter_t parameters[] = { + { "id", "Id of the run", 0, PARAM_OUTPUT, 0, 3, TestValInt, {0}, NULL, NULL, sprint_int }, + + { NULL, "Options", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "help", "Show this help", 'h', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "check", "Enable checking of the result", 'c', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "human", "Enable human readable mode", 'H', PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "threads", "Number of CPU workers per node", 't', PARAM_OPTION | PARAM_OUTPUT, 1, 7, TestValInt, {1}, NULL, pread_int, sprint_int }, + { "gpus", "Number of GPU workers per node", 'g', PARAM_OPTION | PARAM_OUTPUT, 1, 4, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "P", "Rows (P) in the PxQ process grid", 'P', PARAM_OPTION | PARAM_OUTPUT, 1, 2, TestValInt, {1}, NULL, pread_int, sprint_int }, + { "Q", "Columns (Q) in the PxQ process grid", 'Q', PARAM_OUTPUT, 1, 2, TestValInt, {1}, NULL, pread_int, sprint_int }, + + { NULL, "Main input parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "op", "Operation to test/time", 'o', PARAM_OPTION | PARAM_OUTPUT, 1, 1, TestString, {0}, NULL, pread_string, sprint_string }, + { "file", "Input file", 'f', PARAM_OPTION, 1, 1, TestString, {0}, NULL, pread_string, sprint_string }, + + { NULL, "Matrix definition parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "m", "Dimension M of the operation", 'm', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "n", "Dimension N of the operation", 'n', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "k", "Dimension K of the operation", 'k', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "nrhs", "Dimension NRHS of the operation", 'r', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { "nb", "Tile size nb", 'b', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "ib", "Inner tile size ib", 'i', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 2, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { "lda", "Leading dimension of the matrix A", 'A', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "ldb", "Leading dimension of the matrix B", 'B', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "ldc", "Leading dimension of the matrix C", 'C', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { "seedA", "Seed for the matrix A random generation", 'X', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "seedB", "Seed for the matrix B random generation", 'Y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "seedC", "Seed for the matrix C random generation", 'Z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { NULL, "Operation specific parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "trans", "Value of the trans parameter", -11, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans, {0}, NULL, pread_trans, sprint_trans }, + { "transA", "Value of the transA parameter", -12, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans, {0}, NULL, pread_trans, sprint_trans }, + { "transB", "Value of the transB parameter", -13, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans, {0}, NULL, pread_trans, sprint_trans }, + { "uplo", "Value of the uplo parameter", -14, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestUplo, {0}, NULL, pread_uplo, sprint_uplo }, + { "diag", "Value of the diag parameter", -15, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 7, TestDiag, {0}, NULL, pread_diag, sprint_diag }, + { "side", "Value of the side parameter", -16, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestSide, {0}, NULL, pread_side, sprint_side }, + { "norm", "Value of the norm parameter", -17, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestNormtype, {0}, NULL, pread_norm, sprint_norm }, + + { NULL, "Operation specific scalar", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "alpha", "Value of the scalar alpha", 'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 12, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 }, + { "beta", "Value of the scalar beta", 'y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 12, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 }, + { "bump", "Bump value to make a matrix diagonal dominant", 'z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 12, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 }, + + { NULL, "QR/LQ parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "qra", "Size of TS domain (=RH for householder trees)", -20, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "qrp", "Size of high level tree for distributed", -21, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "llvl", "Tree used for low level reduction insides nodes", -22, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "hlvl", "Tree used for high level reduction between nodes", -23, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "domino", "Enable/Disable the domino between upper and lower trees", -24, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 6, TestValInt, {0}, NULL, pread_int, sprint_int }, + + { "time", "Time in s", 1000, PARAM_OUTPUT, 2, 12, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl }, + { "gflops", "GFlop/s", 1001, PARAM_OUTPUT, 2, 12, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl }, + { "RETURN", "Result of the testing: SUCCESS/FAILED", 1002, PARAM_OUTPUT, 2, 7, TestValInt, {0}, NULL, pread_int, sprint_check }, + { "||Ax-b||", "Norm of the residual", 1003, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||A-fact(A)||", "Norm of the residual", 1004, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||A||", "Norm of the matrix A", 1005, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||B||", "Norm of the matrix B", 1006, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||C||", "Norm of the matrix C", 1007, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||b||", "Norm of the vector b", 1008, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||x||", "Norm of the vector x", 1009, PARAM_OUTPUT, 2, 12, TestValDouble, {0}, NULL, pread_double, sprint_double }, + { "||Ax-b||/N/eps/(||A||||x||+||b||", "", 1010, PARAM_OUTPUT, 2, 22, TestValDouble, {0}, NULL, pread_double, sprint_double }, +}; + +#define STR_MAX_LENGTH 256 + +void print_usage( const char* prog_name ) +{ + int rank = CHAMELEON_My_Mpi_Rank(); + + if (rank == 0) { + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + printf( "Usage:\n" + " %s -o|--op operation_name [options]\n" + " %s -f|--file input_file [options]\n", + prog_name, prog_name ); + + for (i=0; i<nbparams; i++, param++) { + char str[STR_MAX_LENGTH]; + + /* This is not an option, we skip it */ + if ( !(param->flags & PARAM_OPTION) ) { + continue; + } + + /* This is an option header */ + if ( param->name == NULL ) { + printf( "\n %s:\n", param->helper ); + continue; + } + + if ( param->shname > 0 ) { + snprintf( str, STR_MAX_LENGTH, "-%c, --%s", + param->shname, param->name ); + } + else { + snprintf( str, STR_MAX_LENGTH, " --%s", + param->name ); + } + + /* If an argument is needed, add " x" */ + if ( param->has_arg > 0 ) { + int len = strlen(str); + assert( len < (STR_MAX_LENGTH-3) ); + + str[ len ] = ' '; + str[ len+1 ] = 'x'; + str[ len+2 ] = '\0'; + } + printf( " %-23s %s\n", + str, param->helper ); + } + + printf( "\n" ); + } +} + +/** + * @brief List of all the testings available. + */ +static testing_t *testings = NULL; + +/** + * @brief Function to register a new testing + */ +void +testing_register( testing_t *test ) +{ + assert( test->next == NULL ); + test->next = testings; + testings = test; +} + +/** + * @brief Get the testing structure associated to a test + */ +testing_t * +testing_gettest( const char *prog_name, + const char *func_name ) +{ + testing_t *test = testings; + int rank = CHAMELEON_Comm_rank(); + + if ( func_name == NULL ) { + print_usage( prog_name ); + exit(1); + } + + while( test != NULL ) { + /* Check the name without the precision */ + if ( strcasecmp( func_name, test->name + 1 ) == 0 ) { + break; + } + test = test->next; + } + + if ( test == NULL ) { + if ( rank == 0 ) { + printf( "Operation %s not found\n", func_name ); + printf( "The available operations are:\n" ); + test = testings; + while( test != NULL ) { + printf( " %-10s %s\n", test->name, test->helper ); + test = test->next; + } + } + exit(1); + } + + return test; +} + +int +parameters_compute_q( int p ) +{ + parameter_t *param; + int np = CHAMELEON_Comm_size(); + + if ( (np % p) != 0 ) { + fprintf( stderr, "ERROR: The number of processes (%d) must be a multiple of P (%d)\n", np, p ); + exit(1); + } + + param = parameters_get( 'Q' ); + param->value.ival = np / p; + return param->value.ival; +} + +void +parameters_getopt_init( char *optstring, + struct option **longopts ) +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + int nboptions = 0; + int strpos = 0; + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( !(param->flags & PARAM_OPTION) || + (param->name == NULL) ) + { + continue; + } + + nboptions++; + + if ( param->shname < 0 ) { + continue; + } + + optstring[strpos] = param->shname; + strpos++; + assert( strpos < STR_MAX_LENGTH ); + + if ( param->has_arg > 0 ) { + optstring[strpos] = ':'; + strpos++; + assert( strpos < STR_MAX_LENGTH ); + } + } + optstring[strpos] = '\0'; + + /* Now, let's generate the long opt if needed */ +#if defined(CHAMELEON_HAVE_GETOPT_LONG) + if ( longopts != NULL ) { + struct option *opt; + *longopts = calloc( nboptions+1, sizeof( struct option ) ); + + opt = *longopts; + param = parameters; + + for ( i=0; i<nboptions; i++, opt++, param++ ) { + + /* Look for a valid option */ + while ( !(param->flags & PARAM_OPTION) || + (param->name == NULL) ) + { + param++; + } + + opt->name = param->name; + opt->has_arg = ( param->has_arg > 0 ) ? 1 : 0; + opt->flag = NULL; + opt->val = param->shname; + } + } +#endif +} + +parameter_t * +parameters_get( int shname ) +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( param->name == NULL ) { + continue; + } + + if ( shname == param->shname ) { + return param; + } + } + + fprintf( stderr, "parameters_get could not find parameter %d(%c)\n", shname, shname ); + return NULL; +} + +int +parameters_getvalue_int( const char *name ) +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( param->name == NULL ) { + continue; + } + + if ( strcasecmp( name, param->name ) != 0 ) { + continue; + } + + if ( param->has_arg > 1 ) { + fprintf( stderr, "parameters_getvalue_int should not be called with parameter %s\n", name ); + return -1; + } + + if ( param->valtype != TestValInt ) { + fprintf( stderr, "parameters_getvalue_int has been called with a non integer parameter (%s)\n", name ); + return -1; + } + + return param->value.ival; + } + + fprintf( stderr, "parameters_getvalue_int could not find parameter %s\n", name ); + return -1; +} + +char * +parameters_getvalue_str( const char *name ) +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( param->name == NULL ) { + continue; + } + + if ( strcasecmp( name, param->name ) != 0 ) { + continue; + } + + if ( param->has_arg > 1 ) { + fprintf( stderr, "parameters_getvalue_str should not be called with parameter %s\n", name ); + return NULL; + } + + if ( param->valtype != TestString ) { + fprintf( stderr, "parameters_getvalue_str has been called with a non string parameter (%s)\n", name ); + return NULL; + } + + return param->value.str; + } + + fprintf( stderr, "parameters_getvalue_str could not find parameter %s\n", name ); + return NULL; +} + +parameter_t * +parameters_getbyname( const char *name ) +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( param->name == NULL ) { + continue; + } + + if ( strcasecmp( name, param->name ) != 0 ) { + continue; + } + + /* if ( param->has_arg < 2 ) { */ + /* fprintf( stderr, "parameters_getbyname should not be called with parameter %s\n", name ); */ + /* return NULL; */ + /* } */ + + return param; + } + + fprintf( stderr, "parameters_getbyname could not find parameter %s\n", name ); + return NULL; +} + +void parameters_parser( int argc, char **argv ) +{ + int opt; + char optstring[STR_MAX_LENGTH]; + struct option *longopts = NULL; + parameter_t *param; + + parameters_getopt_init( optstring, &longopts ); + +#if defined(CHAMELEON_HAVE_GETOPT_LONG) + while ((opt = getopt_long(argc, argv, optstring, longopts, NULL)) != -1) +#else + while ((opt = getopt(argc, argv, optstring)) != -1) +#endif + { + switch(opt) { + case 'h': + print_usage(argv[0]); + exit(0); + + case '?': /* error from getopt[_long] */ + exit(1); + break; + + default: + param = parameters_get( opt ); + if ( param == NULL ) { + print_usage(argv[0]); + exit(1); + } + parameters_addvalues( param, optarg ); + } + } + + if ( longopts != NULL ) { + free( longopts ); + } +} + +void +parameters_destroy() +{ + parameter_t *param = parameters; + int i, nbparams = sizeof( parameters ) / sizeof( parameter_t ); + vallist_t *current, *next; + + for ( i=0; i<nbparams; i++, param++ ) { + /* This is not an option, we skip it */ + if ( param->has_arg < 2 ) { + continue; + } + + current = param->vallist; + while ( current != NULL ) + { + next = current->next; + free( current ); + current = next; + } + } + return; +} + +int main (int argc, char **argv) { + + int ncores, ngpus, human, check; + int rc, info = 0; + int run_id = 0; + char *func_name; + char *input_file; + run_list_t *runlist; + testing_t * test; + run_list_elt_t *run, *next; + + /* Reads the arguments from command line */ + parameters_parser( argc, argv ); + input_file = parameters_getvalue_str( "file" ); + if ( input_file != NULL ) { + parameters_read_file( input_file ); + free(input_file); + } + ncores = parameters_getvalue_int( "threads" ); + ngpus = parameters_getvalue_int( "gpus" ); + check = parameters_getvalue_int( "check" ); + human = parameters_getvalue_int( "human" ); + func_name = parameters_getvalue_str( "op" ); + + CHAMELEON_Init( ncores, ngpus ); + + /* Binds the right function to be called and builds the parameters combinations */ + test = testing_gettest( argv[0], func_name ); + free(func_name); + + /* Generate the cartesian product of the parameters */ + runlist = run_list_generate( test->params ); + + /* Executes the tests */ + run_print_header( test, check, human ); + run = runlist->head; + while ( run != NULL ) { + rc = test->fptr( &(run->args), check ); + + /* If rc < 0, we skipped the test */ + if ( rc >= 0 ) { + run_arg_add_int( &(run->args), "RETURN", rc ); + run_print_line( test, &(run->args), check, human, run_id ); + run_id++; + info += rc; + } + + /* Move to next run */ + next = run->next; + run_list_destroy( run ); + run = next; + } + free( runlist ); + + CHAMELEON_Finalize(); + parameters_destroy(); + + return info; +} diff --git a/new-testing/testing_zauxiliary.h b/new-testing/testing_zauxiliary.h new file mode 100644 index 000000000..2be0d88c5 --- /dev/null +++ b/new-testing/testing_zauxiliary.h @@ -0,0 +1,110 @@ +/** + * + * @file testing_zauxiliary.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings header + * + * @version 0.9.2 + * @author Mathieu Faverge + * @author Cédric Castagnède + * @date 2014-11-16 + * @precisions normal z -> c d s + * + */ +#ifndef _testing_zauxiliary_h_ +#define _testing_zauxiliary_h_ + +#include "testings.h" + +/** + * + * Macro for trace generation + * + */ +#define START_TRACING() \ + RUNTIME_start_stats(); \ + if(iparam[IPARAM_TRACE] == 2) { \ + RUNTIME_start_profiling(); \ + } \ + if(iparam[IPARAM_BOUND]) { \ + CHAMELEON_Enable(CHAMELEON_BOUND); \ + } + +#define STOP_TRACING() \ + RUNTIME_stop_stats(); \ + if(iparam[IPARAM_TRACE] == 2) { \ + RUNTIME_stop_profiling(); \ + } \ + if(iparam[IPARAM_BOUND]) { \ + CHAMELEON_Disable(CHAMELEON_BOUND); \ + } + +/** + * + * Macro for DAG generation + * + */ +#if 0 +#define START_DAG() \ + if ( iparam[IPARAM_DAG] == 2 ) \ + CHAMELEON_Enable(CHAMELEON_DAG); + +#define STOP_DAG() \ + if ( iparam[IPARAM_DAG] == 2 ) \ + CHAMELEON_Disable(CHAMELEON_DAG); +#else +#define START_DAG() do {} while(0); +#define STOP_DAG() do {} while(0); +#endif + +/** + * + * Synchro for distributed computations + * + */ +#if defined(CHAMELEON_USE_MPI) +#define START_DISTRIBUTED() CHAMELEON_Distributed_start(); +#define STOP_DISTRIBUTED() CHAMELEON_Distributed_stop(); +#else +#define START_DISTRIBUTED() do {} while(0); +#define STOP_DISTRIBUTED() do {} while(0); +#endif + +/** + * + * General Macros for timing + * + */ +/* #define START_TIMING() \ */ +/* START_DAG(); \ */ +/* START_TRACING(); \ */ +/* START_DISTRIBUTED(); \ */ +/* t = -RUNTIME_get_time(); */ + +/* #define STOP_TIMING() \ */ +/* STOP_DISTRIBUTED(); \ */ +/* t += RUNTIME_get_time(); \ */ +/* STOP_TRACING(); \ */ +/* STOP_DAG(); \ */ +/* if (iparam[IPARAM_PROFILE] == 2) { \ */ +/* RUNTIME_kernelprofile_display(); \ */ +/* RUNTIME_schedprofile_display(); \ */ +/* } \ */ +/* *t_ = t; */ + +#define START_TIMING( _t_ ) \ + START_DISTRIBUTED(); \ + (_t_) = RUNTIME_get_time(); + +#define STOP_TIMING( _t_ ) \ + STOP_DISTRIBUTED(); \ + (_t_) = RUNTIME_get_time() - (_t_); \ + +#endif /* _testing_zauxiliary_h_ */ diff --git a/new-testing/testing_zcheck.c b/new-testing/testing_zcheck.c new file mode 100644 index 000000000..43fc59187 --- /dev/null +++ b/new-testing/testing_zcheck.c @@ -0,0 +1,1856 @@ +/** + * + * @file testing_zcheck.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings routines + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-16 + * @precisions normal z -> c d s + * + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <chameleon.h> +#include <coreblas/cblas.h> +#include <coreblas/lapacke.h> +#include <coreblas.h> +#if defined(CHAMELEON_USE_MPI) +#include <mpi.h> +#endif +#include "../control/common.h" +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +#ifndef max +#define max( _a_, _b_ ) ( (_a_) > (_b_) ? (_a_) : (_b_) ) +#endif + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares two matrices by their norms. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or a general matrix. + * + * @param[in] descA + * The first matrix descriptor. + * + * @param[in] descA2 + * The second matrix descriptor. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zmatrices( cham_uplo_t uplo, CHAM_desc_t *descA, CHAM_desc_t *descB ) +{ + int info_solution = 0; + int M = descA->m; + int N = descB->n; + int LDA = descA->m; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Rnorm, result; + double eps = LAPACKE_dlamch_work('e'); + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *B = NULL; + + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(LDA*N*sizeof(CHAMELEON_Complex64_t)); + B = (CHAMELEON_Complex64_t *)malloc(LDA*N*sizeof(CHAMELEON_Complex64_t)); + } + + /* Converts the matrices to LAPACK layout in order to compare them on the main process */ + CHAMELEON_Tile_to_Lapack( descA, A, LDA ); + CHAMELEON_Tile_to_Lapack( descB, B, LDA ); + + if ( rank == 0 ) { + double *work = (double *)malloc(LDA*N*sizeof(double)); + + /* Computes the norms */ + if ( uplo == ChamUpperLower ) { + Anorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, A, LDA, work ); + } + else { + Anorm = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), 'N', + M, N, B, LDA, work ); + } + + /* Computes the difference with the core function */ + CORE_zgeadd( ChamNoTrans, M, N, 1, A, LDA, -1, B, LDA ); + + /* Computes the residue's norm */ + if ( uplo == ChamUpperLower ) { + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, B, LDA, work ); + } + else { + Rnorm = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), 'N', + M, N, B, LDA, work ); + } + result = Rnorm / (Anorm * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(work); + free(A); + free(B); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares the Chameleon computed norm with a Lapack computed one. + * + ******************************************************************************* + * + * @param[in] matrix_type + * Wether it is a general, triangular or symmetric matrix. + * + * @param[in] norm_type + * Wether it should compare a Max, One, Inf or Frobenius norm. + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or a general matrix. + * + * @param[in] diag + * Wether it is a unitary diagonal matrix or not. + * + * @param[in] norm_cham + * The Chameleon computed norm. + * + * @param[in] descA + * The matrix descriptor. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_znorm( cham_mtxtype_t matrix_type, cham_normtype_t norm_type, cham_uplo_t uplo, + cham_diag_t diag, double norm_cham, CHAM_desc_t *descA ) +{ + int info_solution = 0; + int M = descA->m; + int N = descA->n; + int LDA = descA->m; + int rank = CHAMELEON_Comm_rank(); + CHAMELEON_Complex64_t *A = NULL; + + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(N*LDA*sizeof(CHAMELEON_Complex64_t)); + } + + /* Converts the matrix to LAPACK layout in order to use the LAPACK norm function */ + CHAMELEON_Tile_to_Lapack( descA, A, LDA ); + + if ( rank == 0 ) { + double *work = (double*) malloc(chameleon_max(M, N)*sizeof(double)); + double norm_lapack; + double result; + double eps = LAPACKE_dlamch_work('e'); + + /* Computes the norm with the LAPACK function */ + switch (matrix_type) { + case ChamGeneral: + norm_lapack = LAPACKE_zlange_work( LAPACK_COL_MAJOR, chameleon_lapack_const(norm_type), M, N, A, LDA, work ); + break; +#if defined(PRECISION_z) || defined(PRECISION_c) + case ChamHermitian: + norm_lapack = LAPACKE_zlanhe_work( LAPACK_COL_MAJOR, chameleon_lapack_const(norm_type), chameleon_lapack_const(uplo), M, A, LDA, work ); + break; +#endif + case ChamSymmetric: + norm_lapack = LAPACKE_zlansy_work( LAPACK_COL_MAJOR, chameleon_lapack_const(norm_type), chameleon_lapack_const(uplo), M, A, LDA, work ); + break; + case ChamTriangular: + norm_lapack = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, chameleon_lapack_const(norm_type), chameleon_lapack_const(uplo), chameleon_lapack_const(diag), M, N, A, LDA, work ); + break; + default: + fprintf(stderr, "check_znorm: mtxtype(%d) unsupported\n", matrix_type ); + free( work ); + return 1; + } + + /* Compares the norms */ + result = fabs( norm_cham - norm_lapack ) / ( norm_lapack * eps ); + + switch(norm_type) { + case ChamMaxNorm: + /* result should be perfectly equal */ + break; + case ChamInfNorm: + /* Sum order on the line can differ */ + result = result / (double)N; + break; + case ChamOneNorm: + /* Sum order on the column can differ */ + result = result / (double)M; + break; + case ChamFrobeniusNorm: + /* Sum order on every element can differ */ + result = result / ((double)M * (double)N); + break; + } + + info_solution = ( result < 1 ) ? 0 : 1; + + free(work); + free(A); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed sum with a core function computed one. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or + * a general matrix. + * + * @param[in] trans + * Wether the first matrix is transposed, conjugate transposed or not + * transposed. + * + * @param[in] descA + * The descriptor of the matrix A. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descBref + * The descriptor of the matrix B. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] descBcham + * The matrix descriptor of the Chameleon computed result A+B. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zsum ( cham_uplo_t uplo, cham_trans_t trans, + CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, + CHAMELEON_Complex64_t beta, CHAM_desc_t *descBref, CHAM_desc_t *descBcham ) +{ + int info_solution; + int M = descBref->m; + int N = descBref->n; + int Am = (trans == ChamNoTrans)? M : N; + int An = (trans == ChamNoTrans)? N : M; + int LDA = Am; + int LDB = M; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Binitnorm, Rnorm, result; + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *Bref = NULL; + CHAMELEON_Complex64_t *Bcham = NULL; + CHAMELEON_Complex64_t mzone = -1.0; + + if ( rank == 0 ) { + A = malloc(An*LDA*sizeof(CHAMELEON_Complex64_t)); + Bref = malloc(N*LDB*sizeof(CHAMELEON_Complex64_t)); + Bcham = malloc(N*LDB*sizeof(CHAMELEON_Complex64_t)); + } + + /* Computes the max norms of A, B and A+B */ + if ( uplo == ChamUpperLower ) { + Anorm = CHAMELEON_zlange_Tile( ChamMaxNorm, descA ); + Binitnorm = CHAMELEON_zlange_Tile( ChamMaxNorm, descBref ); + } + else { + if ( trans == ChamNoTrans ) { + Anorm = CHAMELEON_zlantr_Tile( ChamMaxNorm, uplo, ChamNonUnit, descA ); + } + else { + cham_uplo_t uplo_inv = (uplo == ChamUpper) ? ChamLower : ChamUpper; + Anorm = CHAMELEON_zlantr_Tile( ChamMaxNorm, uplo_inv, ChamNonUnit, descA ); + } + Binitnorm = CHAMELEON_zlantr_Tile( ChamMaxNorm, uplo, ChamNonUnit, descBref ); + } + + /* Creates the LAPACK version of the matrices */ + CHAMELEON_Tile_to_Lapack( descA, A, LDA ); + CHAMELEON_Tile_to_Lapack( descBref, Bref, LDB ); + CHAMELEON_Tile_to_Lapack( descBcham, Bcham, LDB ); + + if ( rank == 0 ) { + double eps = LAPACKE_dlamch_work('e'); + double *work = malloc(chameleon_max(M, N)* sizeof(double)); + + /* Makes the sum with the core function */ + if ( uplo == ChamUpperLower ) { + CORE_zgeadd( trans, M, N, + alpha, A, LDA, + beta, Bref, LDB ); + } + else { + CORE_ztradd( uplo, trans, M, N, + alpha, A, LDA, + beta, Bref, LDB ); + } + cblas_zaxpy( LDB*N, CBLAS_SADDR(mzone), Bcham, 1, Bref, 1 ); + + /* Calculates the norm from the core function's result */ + if ( uplo == ChamUpperLower ) { + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Bref, LDB, work ); + } + else { + Rnorm = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), 'N', + M, N, Bref, LDB, work ); + } + result = Rnorm / (max(Anorm, Binitnorm) * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(work); + free(A); + free(Bref); + free(Bcham); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed scale with a core function computed one. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or a general matrix. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descA1 + * The original matrix descriptor. + * + * @param[in] descA2 + * The scaled matrix descriptor. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zscale( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ) +{ + int info_solution; + int M = descA1->m; + int N = descA1->n; + int rank = CHAMELEON_Comm_rank(); + CHAM_desc_t *descBlas; + CHAMELEON_Complex64_t *Ainit = NULL; + + if ( rank == 0 ) { + Ainit = (CHAMELEON_Complex64_t *)malloc(M*N*sizeof(CHAMELEON_Complex64_t)); + } + + /* Converts the matrix to LAPACK layout in order to scale with BLAS */ + CHAMELEON_Tile_to_Lapack( descA1, Ainit, M ); + + if ( rank == 0 ) { + /* Scales using core function */ + CORE_zlascal( uplo, M, N, alpha, Ainit, M ); + } + + /* Converts back into Chameleon to compare with check_zmatrices */ + descBlas = CHAMELEON_Desc_CopyOnZero( descA1, NULL ); + CHAMELEON_Lapack_to_Tile( Ainit, M, descBlas ); + + /* Compares the two matrices */ + info_solution = check_zmatrices( uplo, descA2, descBlas ); + + if ( rank == 0 ) { + free( Ainit ); + } + + CHAMELEON_Desc_Destroy( &descBlas ); + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed product with a core function computed one. + * + ******************************************************************************* + * + * @param[in] transA + * Wether the first product element is transposed, conjugate transposed or not transposed. + * + * @param[in] transB + * Wether the second product element is transposed, conjugate transposed or not transposed. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descA + * The descriptor of the matrix A. + * + * @param[in] descBref + * The descriptor of the matrix B. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] descCref + * The descriptor of the matrix C. + * + * @param[in] descC + * The matrix descriptor of the Chameleon computed result alpha*A*B+beta*C. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgemm( cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, + CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ) +{ + int An, LDA, Bn, LDB, info_solution; + int M = descC->m; + int N = descC->n; + int K = (transA != ChamNoTrans)? descA->m : descA->n; + int LDC = descC->m; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Bnorm, Crefnorm, Rnorm, result; + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *B = NULL; + CHAMELEON_Complex64_t *C = NULL; + CHAMELEON_Complex64_t *Cref = NULL; + CHAMELEON_Complex64_t mzone = -1.0; + + /* Calculates the dimensions according to the transposition */ + if ( transA == ChamNoTrans ) { + Anorm = CHAMELEON_zlange_Tile(ChamInfNorm, descA); + LDA = M; + An = K; + } else { + Anorm = CHAMELEON_zlange_Tile(ChamOneNorm, descA); + LDA = K; + An = M; + } + if ( transB == ChamNoTrans ) { + Bnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descB); + LDB = K; + Bn = N; + } else { + Bnorm = CHAMELEON_zlange_Tile(ChamOneNorm, descB); + LDB = N; + Bn = K; + } + + /* Computes the norms for comparing */ + Crefnorm = CHAMELEON_zlange_Tile(ChamMaxNorm, descCref); + + /* Creates the LAPACK version of the matrices */ + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(An*LDA*sizeof(CHAMELEON_Complex64_t)); + B = (CHAMELEON_Complex64_t *)malloc(Bn*LDB*sizeof(CHAMELEON_Complex64_t)); + Cref = (CHAMELEON_Complex64_t *)malloc(N *LDC*sizeof(CHAMELEON_Complex64_t)); + C = (CHAMELEON_Complex64_t *)malloc(N *LDC*sizeof(CHAMELEON_Complex64_t)); + } + + CHAMELEON_Tile_to_Lapack(descA, A, LDA); + CHAMELEON_Tile_to_Lapack(descB, B, LDB); + CHAMELEON_Tile_to_Lapack(descCref, Cref, LDC); + CHAMELEON_Tile_to_Lapack(descC, C, LDC); + + if ( rank == 0 ) { + double eps = LAPACKE_dlamch_work('e'); + double *work = (double *)malloc(chameleon_max(M, N)* sizeof(double)); + + /* Makes the multiplication with the core function */ + cblas_zgemm( CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, + CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC ); + cblas_zaxpy(LDC * N, CBLAS_SADDR(mzone), C, 1, Cref, 1); + + /* Calculates the norm with the core function's result */ + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); + + result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(work); + free(A); + free(B); + free(C); + free(Cref); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast(&info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed hermitian product with a core function computed one. + * + ******************************************************************************* + * + * @param[in] side + * Wether the hermitian matrix A appears on the left or right in the operation. + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or a general matrix. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descA + * The descriptor of the hermitian matrix A. + * + * @param[in] descB + * The descriptor of the hermitian matrix B. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] descCref + * The descriptor of the hermitian matrix C. + * + * @param[in] descC + * The matrix descriptor of the Chameleon computed result alpha*A*B+beta*C or alpha*B*A+beta*C. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zsymm( cham_mtxtype_t matrix_type, cham_side_t side, cham_uplo_t uplo, + CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, + CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ) +{ + int info_solution = 0; + int An, LDA; + int M = descC->m; + int N = descC->n; + int LDB = M; + int LDC = M; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Bnorm, Crefnorm, Cchamnorm, Clapacknorm, Rnorm, result; + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *B = NULL; + CHAMELEON_Complex64_t *Cref = NULL; + CHAMELEON_Complex64_t *C = NULL; + CHAMELEON_Complex64_t mzone = -1.0; + + if ( side == ChamLeft ) { + if ( matrix_type == ChamHermitian ) { + Anorm = CHAMELEON_zlanhe_Tile(ChamInfNorm, uplo, descA); + } + else { + Anorm = CHAMELEON_zlansy_Tile(ChamInfNorm, uplo, descA); + } + Bnorm = CHAMELEON_zlange_Tile(ChamOneNorm, descB); + LDA = M; + An = M; + } else { + if ( matrix_type == ChamHermitian ) { + Anorm = CHAMELEON_zlanhe_Tile(ChamOneNorm, uplo, descA); + } + else { + Anorm = CHAMELEON_zlansy_Tile(ChamOneNorm, uplo, descA); + } + Bnorm = CHAMELEON_zlange_Tile(ChamInfNorm, descB); + LDA = N; + An = N; + } + + /* Computes the norms for comparing */ + Crefnorm = CHAMELEON_zlange_Tile( ChamMaxNorm, descCref ); + Cchamnorm = CHAMELEON_zlange_Tile( ChamMaxNorm, descC ); + + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(LDA * An * sizeof(CHAMELEON_Complex64_t)); + B = (CHAMELEON_Complex64_t *)malloc(LDB * N * sizeof(CHAMELEON_Complex64_t)); + Cref = (CHAMELEON_Complex64_t *)malloc(LDC * N * sizeof(CHAMELEON_Complex64_t)); + C = (CHAMELEON_Complex64_t *)malloc(LDC * N * sizeof(CHAMELEON_Complex64_t)); + } + + /* Creates the LAPACK version of the matrices */ + CHAMELEON_Tile_to_Lapack( descA, A, LDA ); + CHAMELEON_Tile_to_Lapack( descB, B, LDB ); + CHAMELEON_Tile_to_Lapack( descCref, Cref, LDC ); + CHAMELEON_Tile_to_Lapack( descC, C, LDC ); + + if ( rank == 0 ) { + double eps = LAPACKE_dlamch_work('e'); + + /* Makes the multiplication with the core function */ + if ( matrix_type == ChamHermitian ) { + cblas_zhemm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + M, N, CBLAS_SADDR(alpha), + A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC ); + } + else { + cblas_zsymm( CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + M, N, CBLAS_SADDR(alpha), + A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC ); + } + cblas_zaxpy(LDC * N, CBLAS_SADDR(mzone), C, 1, Cref, 1); + + /* Computes the norm with the core function's result */ + Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL ); + + result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * An * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution= 0 ; + } + + free(A); + free(B); + free(C); + free(Cref); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast(&info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + + (void)Clapacknorm; + (void)Cchamnorm; + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed matrix rank k operation with a core function computed one. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether it is a upper triangular matrix, a lower triangular matrix or a general matrix. + * + * @param[in] trans + * Wether the first product element is transposed, conjugate transposed or not transposed. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descA + * The descriptor of the matrix A. + * + * @param[in] descB + * The descriptor of the matrix B - only used for her2k and sy2k. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] descCref + * The descriptor of the matrix C. + * + * @param[in] descC + * The matrix descriptor of the Chameleon computed result. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zsyrk( cham_mtxtype_t matrix_type, cham_uplo_t uplo, cham_trans_t trans, + CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, + CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ) +{ + int LDA, info_solution = 0; + int An, K, N = descC->n; + int LDC = N; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Bnorm, Crefnorm, Cchamnorm, Clapacknorm, Rnorm, result; + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *B = NULL; + CHAMELEON_Complex64_t *Cref = NULL; + CHAMELEON_Complex64_t *C = NULL; + + Bnorm = 0.; + if ( trans == ChamNoTrans ) { + Anorm = CHAMELEON_zlange_Tile( ChamInfNorm, descA ); + if ( descB != NULL ) { + Bnorm = CHAMELEON_zlange_Tile( ChamInfNorm, descB ); + } + K = descA->n; + LDA = N; + An = K; + } + else { + Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + if ( descB != NULL ) { + Bnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descB ); + } + K = descA->m; + LDA = K; + An = N; + } + + /* Computes the norms for comparing */ + if ( matrix_type == ChamHermitian ) { + Crefnorm = CHAMELEON_zlanhe_Tile( ChamInfNorm, uplo, descCref ); + Cchamnorm = CHAMELEON_zlanhe_Tile( ChamInfNorm, uplo, descC ); + } + else { + Crefnorm = CHAMELEON_zlansy_Tile( ChamInfNorm, uplo, descCref ); + Cchamnorm = CHAMELEON_zlansy_Tile( ChamInfNorm, uplo, descC ); + } + + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(LDA * An * sizeof(CHAMELEON_Complex64_t)); + if ( descB != NULL ) { + B = (CHAMELEON_Complex64_t *)malloc(LDA * An * sizeof(CHAMELEON_Complex64_t)); + } + Cref = (CHAMELEON_Complex64_t *)malloc(LDC * N * sizeof(CHAMELEON_Complex64_t)); + C = (CHAMELEON_Complex64_t *)malloc(LDC * N * sizeof(CHAMELEON_Complex64_t)); + } + + /* Creates the LAPACK version of the matrices */ + CHAMELEON_Tile_to_Lapack( descA, A, LDA ); + CHAMELEON_Tile_to_Lapack( descCref, Cref, LDC ); + CHAMELEON_Tile_to_Lapack( descC, C, LDC ); + if ( descB != NULL ) { + CHAMELEON_Tile_to_Lapack( descB, B, LDA ); + } + + if ( rank == 0 ) { + double eps = LAPACKE_dlamch_work('e'); + double ABnorm; + double *work = malloc(sizeof(double)*N); + + /* Makes the multiplication with the core function */ + if ( matrix_type == ChamHermitian ) { + if ( descB == NULL ) { + cblas_zherk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + N, K, creal(alpha), A, LDA, creal(beta), Cref, LDC ); + ABnorm = Anorm * Anorm; + } + else { + cblas_zher2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + N, K, CBLAS_SADDR(alpha), A, LDA, B, LDA, creal(beta), Cref, LDC ); + ABnorm = 2. * Anorm * Bnorm; + } + } + else { + if ( descB == NULL ) { + cblas_zsyrk( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + N, K, CBLAS_SADDR(alpha), A, LDA, CBLAS_SADDR(beta), Cref, LDC ); + ABnorm = Anorm * Anorm; + } + else { + cblas_zsyr2k( CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + N, K, CBLAS_SADDR(alpha), A, LDA, B, LDA, CBLAS_SADDR(beta), Cref, LDC ); + ABnorm = 2. * Anorm * Bnorm; + } + } + if ( matrix_type == ChamHermitian ) { + Clapacknorm = LAPACKE_zlanhe_work( LAPACK_COL_MAJOR, 'I', chameleon_lapack_const(uplo), N, Cref, LDC, work ); + } + else { + Clapacknorm = LAPACKE_zlansy_work( LAPACK_COL_MAJOR, 'I', chameleon_lapack_const(uplo), N, Cref, LDC, work ); + } + CORE_ztradd( uplo, ChamNoTrans, N, N, + -1., C, LDC, + 1., Cref, LDC ); + + /* Computes the norm with the core function's result */ + if ( matrix_type == ChamHermitian ) { + Rnorm = LAPACKE_zlanhe_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), N, Cref, LDC, NULL ); + } + else { + Rnorm = LAPACKE_zlansy_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), N, Cref, LDC, NULL ); + } + result = Rnorm / ((ABnorm + Crefnorm) * K * eps); + + /* Verifies if the result is inside a threshold */ + if ( isinf(Clapacknorm) || isinf(Cchamnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(work); + free(A); + free(C); + free(Cref); + if ( descB != NULL ) { + free(B); + } + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast(&info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed matrix triangular product with a core function computed one. + * + ******************************************************************************* + * + * @param[in] check_func + * Wether it is a triangular product or a triangular linear solution. + * + * @param[in] side + * Whether A appears on the left or on the right of the product. + * + * @param[in] uplo + * Wether A is a upper triangular matrix or a lower triangular matrix. + * + * @param[in] trans + * Wether A is transposed, conjugate transposed or not transposed. + * + * @param[in] diag + * Wether A is a unitary diagonal matrix or not. + * + * @param[in] alpha + * The scalar alpha. + * + * @param[in] descA + * The descriptor of the matrix A. + * + * @param[in] descB + * The descriptor of the matrix B. + * + * @param[in] beta + * The scalar beta. + * + * @param[in] descBref + * The descriptor of the Chameleon computed result. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_ztrmm( int check_func, cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag, + CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, CHAM_desc_t *descBref ) +{ + int info_solution = 0; + int M = descB->m; + int N = descB->n; + int An, LDA, LDB = M; + int rank = CHAMELEON_Comm_rank(); + double Anorm, Bnorm, Brefnorm, Rnorm, result; + CHAMELEON_Complex64_t *A = NULL; + CHAMELEON_Complex64_t *Bref = NULL; + CHAMELEON_Complex64_t *B = NULL; + CHAMELEON_Complex64_t mzone = -1.0; + + /* Computes the norms for comparing */ + if ( ((side == ChamLeft) && (trans == ChamNoTrans)) || + ((side == ChamRight) && (trans != ChamNoTrans)) ) { + Anorm = CHAMELEON_zlantr_Tile( ChamInfNorm, uplo, diag, descA ); + } + else { + Anorm = CHAMELEON_zlantr_Tile( ChamOneNorm, uplo, diag, descA ); + } + if ( side == ChamLeft ) { + An = M; + LDA = M; + Brefnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descBref ); + Bnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descB ); + } + else { + An = N; + LDA = N; + Brefnorm = CHAMELEON_zlange_Tile( ChamInfNorm, descBref ); + Bnorm = CHAMELEON_zlange_Tile( ChamInfNorm, descB ); + } + + if ( rank == 0 ) { + A = (CHAMELEON_Complex64_t *)malloc(An*LDA*sizeof(CHAMELEON_Complex64_t)); + Bref = (CHAMELEON_Complex64_t *)malloc(N *LDB*sizeof(CHAMELEON_Complex64_t)); + B = (CHAMELEON_Complex64_t *)malloc(N *LDB*sizeof(CHAMELEON_Complex64_t)); + } + + /* Creates the LAPACK version of the matrices */ + CHAMELEON_Tile_to_Lapack(descA, A, LDA); + CHAMELEON_Tile_to_Lapack(descB, B, LDB); + CHAMELEON_Tile_to_Lapack(descBref, Bref, LDB); + + if ( rank == 0 ) { + double eps = LAPACKE_dlamch_work('e'); + + /* Makes the multiplication with the core function */ + if (check_func == CHECK_TRMM) { + cblas_ztrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, Bref, LDB); + } + else { + cblas_ztrsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + (CBLAS_DIAG)diag, M, N, CBLAS_SADDR(alpha), A, LDA, Bref, LDB); + } + + /* Computes the norm with the core function's result */ + cblas_zaxpy( LDB * N, CBLAS_SADDR(mzone), B, 1, Bref, 1 ); + Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Bref, LDB, NULL ); + + result = Rnorm / ((Anorm + Brefnorm) * An * eps); + + /* Verifies if the result is inside a threshold */ + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { + info_solution = 1; + } + else { + info_solution = 0; + } + + free(A); + free(B); + free(Bref); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast(&info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD); +#endif + + (void)Bnorm; + return info_solution; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Compares a Chameleon computed product U*U' or L'*L result with a core function computed one. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether the upper or lower triangle of A is stored. + * + * @param[in] descA1 + * The descriptor of the A matrix. + * + * @param[in] descA2 + * The descriptor of the Chameleon computed result matrix. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zlauum( cham_uplo_t uplo, CHAM_desc_t *descA, CHAM_desc_t *descAAt ) +{ + int info_local, info_global; + int N = descA->n; + double eps = LAPACKE_dlamch_work('e'); + double result, Anorm, AAtnorm, Rnorm; + CHAM_desc_t *descAt; + + Anorm = CHAMELEON_zlantr_Tile( ChamOneNorm, uplo, ChamNonUnit, descA ); + AAtnorm = CHAMELEON_zlantr_Tile( ChamOneNorm, uplo, ChamNonUnit, descAAt ); + + if ( uplo == ChamUpper ) { + descAt = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zlaset_Tile( ChamLower, 0., 0., descAt ); + CHAMELEON_zlacpy_Tile( ChamUpper, descA, descAt ); + + /* Computes U * U' */ + CHAMELEON_ztrmm_Tile( ChamRight, ChamUpper, ChamConjTrans, ChamNonUnit, 1., descA, descAt ); + } + else { + descAt = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zlaset_Tile( ChamUpper, 0., 0., descAt ); + CHAMELEON_zlacpy_Tile( ChamLower, descA, descAt ); + + /* Computes L' * L */ + CHAMELEON_ztrmm_Tile( ChamLeft, ChamLower, ChamConjTrans, ChamNonUnit, 1., descA, descAt ); + } + + /* Computes AAt - A * A' */ + CHAMELEON_ztradd_Tile( uplo, ChamNoTrans, -1., descAAt, 1., descAt ); + + Rnorm = CHAMELEON_zlantr_Tile( ChamMaxNorm, uplo, ChamNonUnit, descAt ); + + CHAMELEON_Desc_Destroy( &descAt ); + + /* Compares the residual's norm */ + result = Rnorm / ( Anorm * Anorm * N * eps ); + if ( isnan(AAtnorm) || isinf(AAtnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Checks if a Chameleon computed factorization is correct. + * + ******************************************************************************* + * + * @param[in] uplo + * Wether it is a upper triangular matrix or a lower triangular matrix. + * + * @param[in] descA + * The descriptor of the original matrix. + * + * @param[in] descB + * The descriptor of the Chameleon factorized matrix. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zxxtrf( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_uplo_t uplo, + CHAM_desc_t *descA, CHAM_desc_t *descLU ) +{ + int info_local, info_global; + int M = descA->m; + int N = descA->n; + double Anorm, Rnorm, result; + double eps = LAPACKE_dlamch_work('e'); + + CHAM_desc_t *descL, *descU; + cham_trans_t transL = ChamNoTrans; + cham_trans_t transU = ChamNoTrans; + + descL = CHAMELEON_Desc_Copy( descA, NULL ); + descU = CHAMELEON_Desc_Copy( descA, NULL ); + + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., descL ); + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., descU ); + + switch ( uplo ) { + case ChamUpper: +#if defined(PRECISION_z) || defined(PRECISION_c) + transL = (mtxtype == ChamHermitian) ? ChamConjTrans : ChamTrans; +#else + transL = ChamTrans; +#endif + CHAMELEON_zlacpy_Tile( ChamUpper, descLU, descL ); + CHAMELEON_zlacpy_Tile( ChamUpper, descLU, descU ); + break; + case ChamLower: +#if defined(PRECISION_z) || defined(PRECISION_c) + transU = (mtxtype == ChamHermitian) ? ChamConjTrans : ChamTrans; +#else + transU = ChamTrans; +#endif + CHAMELEON_zlacpy_Tile( ChamLower, descLU, descL ); + CHAMELEON_zlacpy_Tile( ChamLower, descLU, descU ); + break; + case ChamUpperLower: + default: + CHAMELEON_zlacpy_Tile( ChamLower, descLU, descL ); + CHAMELEON_zlaset_Tile( ChamUpper, 0., 1., descL ); + CHAMELEON_zlacpy_Tile( ChamUpper, descLU, descU ); + } + + switch ( mtxtype ) { + case ChamGeneral: { + CHAM_desc_t *subL, *subU; + subL = chameleon_desc_submatrix( descL, 0, 0, M, chameleon_min(M, N) ); + subU = chameleon_desc_submatrix( descU, 0, 0, chameleon_min(M, N), N ); + + Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + CHAMELEON_zgemm_Tile( transL, transU, -1., subL, subU, 1., descA ); + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + + free( subL ); + free( subU ); + } + break; + +#if defined(PRECISION_z) || defined(PRECISION_c) + case ChamHermitian: + Anorm = CHAMELEON_zlanhe_Tile( ChamOneNorm, uplo, descA ); + CHAMELEON_zgemm_Tile( transL, transU, -1., descL, descU, 1., descA ); + Rnorm = CHAMELEON_zlanhe_Tile( ChamOneNorm, uplo, descA ); + break; +#endif + + case ChamSymmetric: + Anorm = CHAMELEON_zlansy_Tile( ChamOneNorm, uplo, descA ); + CHAMELEON_zgemm_Tile( transL, transU, -1., descL, descU, 1., descA ); + Rnorm = CHAMELEON_zlansy_Tile( ChamOneNorm, uplo, descA ); + break; + + default: + fprintf(stderr, "check_zxxtrf: mtxtype(%d) unsupported\n", mtxtype ); + return 1; + } + + result = Rnorm / ( Anorm * N * eps ); + run_arg_add_double( args, "||A||", Anorm ); + run_arg_add_double( args, "||A-fact(A)||", Rnorm ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else{ + info_local = 0; + } + + CHAMELEON_Desc_Destroy( &descL ); + CHAMELEON_Desc_Destroy( &descU ); + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Checks if the linear solution of op(A) * x = b is correct. + * + ******************************************************************************* + * + * @param[in] mtxtype + * + * @param[in] trans + * Wether the A matrix is non transposed, tranposed or conjugate transposed. + * + * @param[in] uplo + * + * @param[in] descA + * The descriptor of the A matrix. + * + * @param[in] descX + * The descriptor of the X matrix. + * + * @param[inout] descB + * The descriptor of the B = A*X matrix. On exit, it contains the remainder from A*x-B. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zsolve( cham_mtxtype_t mtxtype, cham_trans_t trans, cham_uplo_t uplo, + CHAM_desc_t *descA, CHAM_desc_t *descX, CHAM_desc_t *descB ) +{ + int info_local, info_global; + int M = descA->m; + int N = descA->n; + double Anorm, Bnorm, Xnorm, Rnorm, result; + double eps = LAPACKE_dlamch_work('e'); + cham_normtype_t norm = (trans == ChamNoTrans) ? ChamOneNorm : ChamInfNorm; + + /* Computes the norms */ + Bnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descB ); + Xnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descX ); + + switch ( mtxtype ) { + case ChamGeneral: + Anorm = CHAMELEON_zlange_Tile( norm, descA ); + CHAMELEON_zgemm_Tile( trans, ChamNoTrans, -1., descA, descX, 1., descB ); + break; + +#if defined(PRECISION_z) || defined(PRECISION_c) + case ChamHermitian: + Anorm = CHAMELEON_zlanhe_Tile( norm, uplo, descA ); + CHAMELEON_zhemm_Tile( ChamLeft, uplo, -1., descA, descX, 1., descB ); + break; +#endif + + case ChamSymmetric: + Anorm = CHAMELEON_zlansy_Tile( norm, uplo, descA ); + CHAMELEON_zsymm_Tile( ChamLeft, uplo, -1., descA, descX, 1., descB ); + break; + + default: + fprintf(stderr, "check_zsolve: mtxtype(%d) unsupported\n", mtxtype ); + return 1; + } + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descB ); + result = Rnorm / ( Anorm * Xnorm * chameleon_max( M, N ) * eps ); + + if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + (void)Bnorm; + return info_global; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Checks if the A1 matrix is the inverse of A2. + * + ******************************************************************************* + * + * @param[in] is_herm + * Wether the matrices are hermitian. + * + * @param[in] uplo + * Wether they are upper triangular matrices or lower triangular matrices. + * + * @param[in] diag + * Wether they are unitary diagonal matrices or not. + * + * @param[in] descA1 + * The descriptor of the A1 matrix. + * + * @param[in] descX + * The descriptor of the A2 matrix. + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_ztrtri( cham_mtxtype_t matrix_type, cham_uplo_t uplo, cham_diag_t diag, + CHAM_desc_t *descA0, CHAM_desc_t *descAi ) +{ + int info_local, info_global; + int N = descA0->m; + cham_uplo_t uplo_inv; + CHAM_desc_t *descI, *descB = NULL; + double Rnorm, Anorm, Ainvnorm, result; + double eps = LAPACKE_dlamch_work('e'); + + /* Creates an identity matrix */ + descI = CHAMELEON_Desc_Copy( descA0, NULL ); + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 1., descI ); + + /* Calculates the residual I - A*(A**-1) */ + switch ( matrix_type ) { +#if defined(PRECISION_z) || defined(PRECISION_c) + case ChamHermitian: + /* Ainv comes from potri and is hermitian */ + assert( uplo != ChamUpperLower ); + + Anorm = CHAMELEON_zlanhe_Tile( ChamOneNorm, uplo, descA0 ); + Ainvnorm = CHAMELEON_zlanhe_Tile( ChamOneNorm, uplo, descAi ); + + /* + * Expand Ainv into a full matrix and call ZHEMM to multiply + * Ainv on the left by A. + */ + uplo_inv = ( uplo == ChamUpper ) ? ChamLower : ChamUpper; + descB = CHAMELEON_Desc_Copy( descAi, NULL ); + CHAMELEON_ztradd_Tile( uplo_inv, ChamConjTrans, 1., descAi, 0., descB ); + CHAMELEON_zlacpy_Tile( uplo, descAi, descB ); + + CHAMELEON_zhemm_Tile( ChamLeft, uplo, -1., descA0, descB, 1., descI ); + break; +#endif + + case ChamSymmetric: + /* Ainv comes from potri and is symmetric */ + assert( uplo != ChamUpperLower ); + + Anorm = CHAMELEON_zlansy_Tile( ChamOneNorm, uplo, descA0 ); + Ainvnorm = CHAMELEON_zlansy_Tile( ChamOneNorm, uplo, descAi ); + + /* + * Expand Ainv into a full matrix and call ZHEMM to multiply + * Ainv on the left by A. + */ + uplo_inv = ( uplo == ChamUpper ) ? ChamLower : ChamUpper; + descB = CHAMELEON_Desc_Copy( descAi, NULL ); + CHAMELEON_ztradd_Tile( uplo_inv, ChamTrans, 1., descAi, 0., descB ); + CHAMELEON_zlacpy_Tile( uplo, descAi, descB ); + + CHAMELEON_zsymm_Tile( ChamLeft, uplo, -1., descA0, descB, 1., descI ); + break; + + case ChamTriangular: + /* Ainv comes from trtri */ + assert( uplo != ChamUpperLower ); + + Anorm = CHAMELEON_zlantr_Tile( ChamOneNorm, uplo, diag, descA0 ); + Ainvnorm = CHAMELEON_zlantr_Tile( ChamOneNorm, uplo, diag, descAi ); + + /* + * Expand Ainv into a full matrix and call ZHEMM to multiply + * Ainv on the left by A. + */ + uplo_inv = ( uplo == ChamUpper ) ? ChamLower : ChamUpper; + descB = CHAMELEON_Desc_Copy( descAi, NULL ); + + if ( diag == ChamUnit ) { + //CHAMELEON_ztradd_Tile( uplo, ChamNoTrans, 1., descAi, 0., descB ); + CHAMELEON_zlacpy_Tile( uplo, descAi, descB ); + CHAMELEON_zlaset_Tile( uplo_inv, 0., 1., descB ); + } + else { + CHAMELEON_zlaset_Tile( uplo_inv, 0., 1., descB ); + CHAMELEON_zlacpy_Tile( uplo, descAi, descB ); + //CHAMELEON_ztradd_Tile( uplo, ChamNoTrans, 1., descAi, 0., descB ); + } + + /* Computes - A * A^-1 */ + CHAMELEON_ztrmm_Tile( ChamLeft, uplo, ChamNoTrans, diag, -1., descA0, descB ); + /* Computes I - A * A^-1 */ + CHAMELEON_zgeadd_Tile( ChamNoTrans, 1., descB, 1., descI ); + break; + + case ChamGeneral: + default: + /* Ainv comes from getri */ + assert( uplo == ChamUpperLower ); + + Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA0 ); + Ainvnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descAi ); + + CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, -1., descA0, descAi, 1., descI ); + break; + } + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descI ); + + /* Compares the residual's norm */ + result = Rnorm / ( Anorm * Ainvnorm * N * eps ); + if ( isnan(Ainvnorm) || isinf(Ainvnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + CHAMELEON_Desc_Destroy( &descI ); + if ( descB != NULL ) { + CHAMELEON_Desc_Destroy( &descB ); + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +int check_zortho( CHAM_desc_t *descQ ) +{ + int info_local, info_global; + int M = descQ->m; + int N = descQ->n; + int minMN = chameleon_min(M, N); + double result, normR; + double eps = LAPACKE_dlamch_work('e'); + CHAM_desc_t *descI, *subI; + + /* Builds the identity */ + descI = CHAMELEON_Desc_Copy( descQ, NULL ); + subI = chameleon_desc_submatrix( descI, 0, 0, minMN, minMN ); + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 1., subI ); + + /* Performs Id - Q'Q */ + if ( M >= N ) { + CHAMELEON_zherk_Tile( ChamUpper, ChamConjTrans, -1., descQ, 1., subI ); + } + else { + CHAMELEON_zherk_Tile( ChamUpper, ChamNoTrans, -1., descQ, 1., subI ); + } + + /* Verifies the residual's norm */ + normR = CHAMELEON_zlansy_Tile( ChamOneNorm, ChamUpper, subI ); + result = normR / ( (double)minMN * eps ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + free( subI ); + CHAMELEON_Desc_Destroy( &descI ); + + /* Reduces the result on all processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Checks if a linear solution is correct. + * + ******************************************************************************* + * + * @param[in] descA + * The descriptor of the initial matrix A. + * + * @param[in] descAF + * The descriptor of the factorized matrix A. + * + * @param[in] descQ + * The descriptor of the Q matrix generated with a call to ungqr and + * the factorized matrix A (descAF). + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgelqf( CHAM_desc_t *descA, CHAM_desc_t *descAF, CHAM_desc_t *descQ ) +{ + int info_local, info_global; + int M = descQ->m; + int N = descQ->n; + int K = chameleon_min( descA->m, descA->n ); + double result, Anorm, Rnorm; + double eps = LAPACKE_dlamch_work('e'); + CHAM_desc_t *descL; + + descL = CHAMELEON_Desc_Copy( descA, NULL ); + + if ( (K < chameleon_min( M, N )) && (N >= M) ) { + /* + * Cas lapack zlqt02.f + */ + CHAM_desc_t *subL, *subAF; + + /* Copy L(1:k,1:m) */ + subL = chameleon_desc_submatrix( descL, 0, 0, K, M ); + subAF = chameleon_desc_submatrix( descAF, 0, 0, K, M ); + + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., subL ); + CHAMELEON_zlacpy_Tile( ChamLower, subAF, subL ); + + /* Compute L(1:k,1:m) - A(1:k,1:n) * Q(1:m,1:n)' */ + CHAMELEON_zgemm_Tile( ChamNoTrans, ChamConjTrans, -1., descA, descQ, 1., subL ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, subL ); + + free( subL ); + free( subAF ); + } + else { + /* + * Cas lapack zlqt01.f + */ + + /* Copy L */ + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., descL ); + CHAMELEON_zlacpy_Tile( ChamLower, descAF, descL ); + + /* Compute L - A * Q' */ + CHAMELEON_zgemm_Tile( ChamNoTrans, ChamConjTrans, -1., descA, descQ, 1., descL ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descL ); + } + + CHAMELEON_Desc_Destroy(&descL); + + Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + result = Rnorm / ( (double)N * Anorm * eps ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Reduces the result on all processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +/** + ******************************************************************************** + * + * @ingroup new_testing + * + * @brief Checks if a linear solution is correct. + * + ******************************************************************************* + * + * @param[in] descA + * The descriptor of the initial matrix A. + * + * @param[in] descAF + * The descriptor of the factorized matrix A. + * + * @param[in] descQ + * The descriptor of the Q matrix generated with a call to ungqr and + * the factorized matrix A (descAF). + * + * @retval 0 successfull comparison + * + ******************************************************************************* + */ +int check_zgeqrf( CHAM_desc_t *descA, CHAM_desc_t *descAF, CHAM_desc_t *descQ ) +{ + int info_local, info_global; + int M = descQ->m; + int N = descQ->n; + int K = chameleon_min( descA->m, descA->n ); + double result, Anorm, Rnorm; + double eps = LAPACKE_dlamch_work('e'); + CHAM_desc_t *descR; + + descR = CHAMELEON_Desc_Copy( descA, NULL ); + + if ( (K < chameleon_min( M, N )) && (M >= N) ) { + /* + * Cas lapack zqrt02.f + */ + CHAM_desc_t *subR, *subAF; + + /* Copy R(1:n,1:k) */ + subR = chameleon_desc_submatrix( descR, 0, 0, N, K ); + subAF = chameleon_desc_submatrix( descAF, 0, 0, N, K ); + + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., subR ); + CHAMELEON_zlacpy_Tile( ChamUpper, subAF, subR ); + + /* Compute R(1:n,1:k) - Q(1:m,1:n)' * A(1:m,1:k) */ + CHAMELEON_zgemm_Tile( ChamConjTrans, ChamNoTrans, -1., descQ, descA, 1., subR ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, subR ); + + free( subR ); + free( subAF ); + } + else { + /* + * Cas lapack zqrt01.f + */ + + /* Copy R */ + CHAMELEON_zlaset_Tile( ChamUpperLower, 0., 0., descR ); + CHAMELEON_zlacpy_Tile( ChamUpper, descAF, descR ); + + /* Compute R - Q'*A */ + CHAMELEON_zgemm_Tile( ChamConjTrans, ChamNoTrans, -1., descQ, descA, 1., descR ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descR ); + } + + CHAMELEON_Desc_Destroy(&descR); + + Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + result = Rnorm / ( (double)M * Anorm * eps ); + + if ( isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Reduces the result on all processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +int check_zqc( cham_side_t side, cham_trans_t trans, + CHAM_desc_t *descC, CHAM_desc_t *descQ, CHAM_desc_t *descCC ) +{ + int info_local, info_global; + int M = descQ->m; + double Cnorm, Qnorm, CCnorm, Rnorm, result; + double eps = LAPACKE_dlamch_work('e'); + + Cnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descC ); + Qnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descQ ); + CCnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descCC ); + + if ( side == ChamLeft ) { + CHAMELEON_zgemm_Tile( trans, ChamNoTrans, -1., descQ, descC, 1., descCC ); + } + else { + CHAMELEON_zgemm_Tile( ChamNoTrans, trans, -1., descC, descQ, 1., descCC ); + } + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descCC ); + result = Rnorm / ( M * Cnorm * eps ); + + if ( isnan(CCnorm) || isinf(CCnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + (void)Qnorm; + return info_global; +} + +int check_zgeqrs( cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ) +{ + int info_local, info_global, nb; + int M = descA->m; + int N = descA->n; + int NRHS = descR->n; + int maxMNK = chameleon_max( M, chameleon_max( N, NRHS ) ); + double Rnorm, result; + double Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + double eps = LAPACKE_dlamch_work('e'); + + CHAMELEON_Get( CHAMELEON_TILE_SIZE, &nb ); + + if ( trans == ChamNoTrans ) { + CHAM_desc_t *descRR; + /* + * Corresponds to lapack/testings/lin/[sdcz]qrt17.f + * + * ZQRT17 computes the ratio + * + * || R'*op(A) ||/(||A||*alpha*max(M,N,NRHS)*eps) + * + * where R = op(A)*X - B, op(A) is A or A', and alpha = ||B|| + * + */ + CHAMELEON_Desc_Create( &descRR, NULL, ChamComplexDouble, nb, nb, nb*nb, + NRHS, N, 0, 0, NRHS, N, descA->p, descA->q ); + + CHAMELEON_zgemm_Tile( ChamConjTrans, trans, 1., descR, descA, 0., descRR ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descRR ); + result = Rnorm / (Anorm * Bnorm * eps * maxMNK); + CHAMELEON_Desc_Destroy( &descRR ); + } + else { + /* + * To implement this test, we need to look at LAPACK working note 41, page 29 + * and more especially to lapack/testings/lin/[sdcz]qrt14.f + */ + fprintf(stderr, "GEQRS testing not implemented with M >= N when transA = ChamConjTrans\n"); + return 0; + } + + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +int check_zgelqs( cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ) +{ + int info_local, info_global, nb; + int M = descA->m; + int N = descA->n; + int NRHS = descR->n; + int maxMNK = chameleon_max( M, chameleon_max( N, NRHS ) ); + double Rnorm, result; + double Anorm = CHAMELEON_zlange_Tile( ChamOneNorm, descA ); + double eps = LAPACKE_dlamch_work('e'); + + CHAMELEON_Get( CHAMELEON_TILE_SIZE, &nb ); + + if ( trans == ChamNoTrans ) { + /* + * To implement this test, we need to look at LAPACK working note 41, page 29 + * and more especially to lapack/testings/lin/[sdcz]lqt14.f + */ + fprintf(stderr, "GELQS testing not implemented with N > M when transA = ChamNoTrans\n"); + return 0; + } + else { + CHAM_desc_t *descRR; + /* + * Corresponds to lapack/testings/lin/[sdcz]qrt17.f + * + * ZQRT17 computes the ratio + * + * || R'*op(A) ||/(||A||*alpha*max(M,N,NRHS)*eps) + * + * where R = op(A)*X - B, op(A) is A or A', and alpha = ||B|| + * + */ + CHAMELEON_Desc_Create( &descRR, NULL, ChamComplexDouble, nb, nb, nb*nb, + NRHS, M, 0, 0, NRHS, M, descA->p, descA->q ); + + CHAMELEON_zgemm_Tile( ChamConjTrans, trans, 1., descR, descA, 0., descRR ); + + Rnorm = CHAMELEON_zlange_Tile( ChamOneNorm, descRR ); + result = Rnorm / (Anorm * Bnorm * eps * maxMNK); + CHAMELEON_Desc_Destroy( &descRR ); + } + + if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { + info_local = 1; + } + else { + info_local = 0; + } + + /* Broadcasts the result from the main processus */ +#if defined(CHAMELEON_USE_MPI) + MPI_Allreduce( &info_local, &info_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD ); +#else + info_global = info_local; +#endif + + return info_global; +} + +int check_zgels( cham_trans_t trans, CHAM_desc_t *descA, CHAM_desc_t *descX, CHAM_desc_t *descB ) +{ + int info_solution; + int M = descA->m; + int N = descA->n; + double Bnorm = CHAMELEON_zlange_Tile( ChamInfNorm, descB ); + + info_solution = check_zsolve( ChamGeneral, trans, ChamUpperLower, + descA, descX, descB ); + + if ( M >= N ) { + info_solution = check_zgeqrs( trans, descA, Bnorm, descB ); + } + else { + info_solution = check_zgelqs( trans, descA, Bnorm, descB ); + } + +#if defined(CHAMELEON_USE_MPI) + MPI_Bcast( &info_solution, 1, MPI_INT, 0, MPI_COMM_WORLD ); +#endif + + return info_solution; +} + diff --git a/new-testing/testing_zcheck.h b/new-testing/testing_zcheck.h new file mode 100644 index 000000000..08e98f2e3 --- /dev/null +++ b/new-testing/testing_zcheck.h @@ -0,0 +1,69 @@ +/** + * + * @file testing_zcheck.h + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon CHAMELEON_Complex64_t auxiliary testings header + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-16 + * @precisions normal z -> c d s + * + */ + +#ifndef _testing_zcheck_h_ +#define _testing_zcheck_h_ + +#include "testings.h" +#include <math.h> + +#ifdef WIN32 +#include <float.h> +#define isnan _isnan +#endif + +#define CHECK_TRMM 3 +#define CHECK_TRSM 4 + +void print_zmatrix ( int M, int N, CHAMELEON_Complex64_t *A, int LDA ); +void print_zdesc_matrix ( CHAM_desc_t *descA ); +void zsabotage ( CHAM_desc_t *descA ); +void potri_product ( cham_uplo_t uplo, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ); + +int check_zmatrices ( cham_uplo_t uplo, CHAM_desc_t *descA, CHAM_desc_t *descB ); +int check_znorm ( cham_mtxtype_t mtxtype, cham_normtype_t norm_type, cham_uplo_t uplo, + cham_diag_t diag, double norm_cham, CHAM_desc_t *descA ); +int check_zsum ( cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, + CHAMELEON_Complex64_t beta, CHAM_desc_t *descBref, CHAM_desc_t *descBcham ); +int check_zscale ( cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ); +int check_zgemm ( cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, + CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ); +int check_zsymm ( cham_mtxtype_t mtxtype, cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, + CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ); +int check_zsyrk ( cham_mtxtype_t mtxtype, cham_uplo_t uplo, cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, + CHAM_desc_t *descB, CHAMELEON_Complex64_t beta, CHAM_desc_t *descCref, CHAM_desc_t *descC ); +int check_ztrmm ( int check_func, cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, cham_diag_t diag, + CHAMELEON_Complex64_t alpha, CHAM_desc_t *descA, CHAM_desc_t *descB, CHAM_desc_t *descBref ); +int check_zlauum ( cham_uplo_t uplo, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ); +int check_zxxtrf ( run_arg_list_t *args, cham_mtxtype_t mtxtype, cham_uplo_t uplo, CHAM_desc_t *descA1, CHAM_desc_t *descA2 ); +int check_zsolve ( cham_mtxtype_t mtxtype, cham_trans_t trans, cham_uplo_t uplo, + CHAM_desc_t *descA, CHAM_desc_t *descX, CHAM_desc_t *descB ); +int check_ztrtri ( cham_mtxtype_t mtxtype, cham_uplo_t uplo, cham_diag_t diag, + CHAM_desc_t *descA, CHAM_desc_t *descAi ); + +/* Using QR factorization */ +int check_zortho ( CHAM_desc_t *descQ ); +int check_zgeqrf ( CHAM_desc_t *descA, CHAM_desc_t *descAF, CHAM_desc_t *descQ ); +int check_zgelqf ( CHAM_desc_t *descA, CHAM_desc_t *descAF, CHAM_desc_t *descQ ); +int check_zgels ( cham_trans_t trans, CHAM_desc_t *descA, CHAM_desc_t *descX, CHAM_desc_t *descB ); +int check_zgeqrs ( cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ); +int check_zgelqs ( cham_trans_t trans, CHAM_desc_t *descA, double Bnorm, CHAM_desc_t *descR ); +int check_zqc ( cham_side_t side, cham_trans_t trans, CHAM_desc_t *descC, CHAM_desc_t *descQ, CHAM_desc_t *descCC ); + +#endif + diff --git a/new-testing/testing_zgeadd.c b/new-testing/testing_zgeadd.c new file mode 100644 index 000000000..3f9cdb749 --- /dev/null +++ b/new-testing/testing_zgeadd.c @@ -0,0 +1,133 @@ +/** + * + * @file testing_zgeadd.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeadd testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-06 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zgeadd( int M, int N ) +{ + cham_fixdbl_t flops = 0.; +#if defined( PRECISION_z ) || defined( PRECISION_c ) + /* 2 multiplications and 1 addition per element */ + flops = ( 2. * 6. + 2. ) * M * N; +#else + flops = ( 2. + 1. ) * M * N; +#endif + + return flops; +} + +int +testing_zgeadd( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + int Am, An; + CHAM_desc_t *descA, *descB; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgeadd( M, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + if ( trans != ChamNoTrans ) { + Am = N; + An = M; + } + else { + Am = M; + An = N; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q ); + + /* Fill the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + /* Compute the sum */ + START_TIMING( t ); + hres = CHAMELEON_zgeadd_Tile( trans, alpha, descA, beta, descB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Check the solution */ + if ( check ) { + CHAM_desc_t *descB0 = CHAMELEON_Desc_Copy( descB, NULL ); + CHAMELEON_zplrnt_Tile( descB0, seedB ); + + hres += check_zsum( ChamUpperLower, trans, alpha, descA, beta, descB0, descB ); + + CHAMELEON_Desc_Destroy( &descB0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + + run_id++; + return hres; +} + +testing_t test_zgeadd; +const char *zgeadd_params[] = { "nb", "trans", "m", "n", "lda", "ldb", + "alpha", "beta", "seedA", "seedB", NULL }; +const char *zgeadd_output[] = { NULL }; +const char *zgeadd_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgeadd_init( void ) __attribute__( ( constructor ) ); +void +testing_zgeadd_init( void ) +{ + test_zgeadd.name = "zgeadd"; + test_zgeadd.helper = "zgeadd"; + test_zgeadd.params = zgeadd_params; + test_zgeadd.output = zgeadd_output; + test_zgeadd.outchk = zgeadd_outchk; + test_zgeadd.params_list = "nb;P;trans;m;n;lda;ldb;alpha;beta;seedA;seedB"; + test_zgeadd.fptr = testing_zgeadd; + test_zgeadd.next = NULL; + + testing_register( &test_zgeadd ); +} diff --git a/new-testing/testing_zgelqf.c b/new-testing/testing_zgelqf.c new file mode 100644 index 000000000..cf1fba3bd --- /dev/null +++ b/new-testing/testing_zgelqf.c @@ -0,0 +1,117 @@ +/** + * + * @file testing_zgelqf.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgelqf testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-10 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgelqf( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgelqf( M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgelqf_Tile( descA, descT ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descQ; + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, N, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + CHAMELEON_zunglq_Tile( descA, descT, descQ ); + + hres += check_zgelqf( descA0, descA, descQ ); + hres += check_zortho( descQ ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + + run_id++; + return hres; +} + +testing_t test_zgelqf; +const char *zgelqf_params[] = { "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL }; +const char *zgelqf_output[] = { NULL }; +const char *zgelqf_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgelqf_init( void ) __attribute__( ( constructor ) ); +void +testing_zgelqf_init( void ) +{ + test_zgelqf.name = "zgelqf"; + test_zgelqf.helper = "zgelqf"; + test_zgelqf.params = zgelqf_params; + test_zgelqf.output = zgelqf_output; + test_zgelqf.outchk = zgelqf_outchk; + test_zgelqf.params_list = "nb;ib;P;m;n;lda;rh;seedA"; + test_zgelqf.fptr = testing_zgelqf; + test_zgelqf.next = NULL; + + testing_register( &test_zgelqf ); +} diff --git a/new-testing/testing_zgelqf_hqr.c b/new-testing/testing_zgelqf_hqr.c new file mode 100644 index 000000000..10796b061 --- /dev/null +++ b/new-testing/testing_zgelqf_hqr.c @@ -0,0 +1,128 @@ +/** + * + * @file testing_zgelqf_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgelqf_param testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-10 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgelqf_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgelqf( M, N ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descQ; + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, N, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + CHAMELEON_zunglq_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + + hres += check_zgelqf( descA0, descA, descQ ); + hres += check_zortho( descQ ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zgelqf_hqr; +const char *zgelqf_hqr_params[] = { "nb", "ib", "m", "n", "lda", "qra", + "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; +const char *zgelqf_hqr_output[] = { NULL }; +const char *zgelqf_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgelqf_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zgelqf_hqr_init( void ) +{ + test_zgelqf_hqr.name = "zgelqf_hqr"; + test_zgelqf_hqr.helper = "zgelqf_hqr"; + test_zgelqf_hqr.params = zgelqf_hqr_params; + test_zgelqf_hqr.output = zgelqf_hqr_output; + test_zgelqf_hqr.outchk = zgelqf_hqr_outchk; + test_zgelqf_hqr.params_list = "nb;ib;P;m;n;lda;qra;qrp;llvl;hlvl;domino;seedA"; + test_zgelqf_hqr.fptr = testing_zgelqf_hqr; + test_zgelqf_hqr.next = NULL; + + testing_register( &test_zgelqf_hqr ); +} diff --git a/new-testing/testing_zgelqs.c b/new-testing/testing_zgelqs.c new file mode 100644 index 000000000..77e704575 --- /dev/null +++ b/new-testing/testing_zgelqs.c @@ -0,0 +1,135 @@ +/** + * + * @file testing_zgelqs.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgelqs testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-10 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" +#include "control/common.h" + +static cham_fixdbl_t +flops_zgelqs() +{ + cham_fixdbl_t flops = 0.; + return flops; +} + +int +testing_zgelqs( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA1, *descA2, *descB1, *descB2, *descT, *descQ, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgelqs(); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( M >= N ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: The LQ solution is performed only when N > M\n" ); + } + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, K, 0, 0, M, K, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Calculates the solution */ + hres = CHAMELEON_zgelqf_Tile( descA, descT ); + + /* Checks the factorisation, orthogonality and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAM_desc_t *descB = CHAMELEON_Desc_Copy( descX, NULL ); + CHAM_desc_t *subX = chameleon_desc_submatrix( descX, 0, 0, N, NRHS ); + CHAM_desc_t *subB = chameleon_desc_submatrix( descB, 0, 0, M, NRHS ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, subX, subB ); + + free( subB ); + free( subX ); + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + CHAMELEON_Desc_Destroy( &descT ); + + run_id++; + return hres; +} + +testing_t test_zgelqs; +const char *zgelqs_params[] = { "nb", "ib", "m", "n", "k", "lda", + "ldb", "qra", "seedA", "seedB", NULL }; +const char *zgelqs_output[] = { NULL }; +const char *zgelqs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgelqs_init( void ) __attribute__( ( constructor ) ); +void +testing_zgelqs_init( void ) +{ + test_zgelqs.name = "zgelqs"; + test_zgelqs.helper = "zgelqs"; + test_zgelqs.params = zgelqs_params; + test_zgelqs.output = zgelqs_output; + test_zgelqs.outchk = zgelqs_outchk; + test_zgelqs.params_list = "nb;ib;P;m;n;k;lda;ldb;rh;seedA;seedB"; + test_zgelqs.fptr = testing_zgelqs; + test_zgelqs.next = NULL; + + testing_register( &test_zgelqs ); +} diff --git a/new-testing/testing_zgels.c b/new-testing/testing_zgels.c new file mode 100644 index 000000000..c5f88d3d5 --- /dev/null +++ b/new-testing/testing_zgels.c @@ -0,0 +1,151 @@ +/** + * + * @file testing_zgels.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgels testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-28 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" +#include "../control/common.h" + +static cham_fixdbl_t +flops_zgels( cham_trans_t trans, int M, int N, int NRHS ) +{ + cham_fixdbl_t flops = 0.; + return flops; +} + +int +testing_zgels( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX, *descT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int maxMN = chameleon_max( M, N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", maxMN ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgels( trans, M, N, NRHS ); + + /* Make sure trans is only Notrans or ConjTrans */ + trans = ( trans == ChamNoTrans ) ? trans : ChamConjTrans; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, maxMN, NRHS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Computes the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgels_Tile( trans, descA, descT, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + if ( check ) { + CHAM_desc_t *descA0, *descB; + CHAM_desc_t *subX, *subB; + + CHAMELEON_Desc_Create( + &descA0, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, maxMN, NRHS, P, Q ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + if ( trans == ChamNoTrans ) { + subX = chameleon_desc_submatrix( descX, 0, 0, N, NRHS ); + subB = chameleon_desc_submatrix( descB, 0, 0, M, NRHS ); + } + else { + subX = chameleon_desc_submatrix( descX, 0, 0, M, NRHS ); + subB = chameleon_desc_submatrix( descB, 0, 0, N, NRHS ); + } + + /* Check the factorization and the residual */ + hres = check_zgels( trans, descA0, subX, subB ); // A(Ax-B) == 0? + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + + free( subB ); + free( subX ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zgels; +const char *zgels_params[] = { "nb", "ib", "trans", "m", "n", "k", + "lda", "ldb", "qra", "seedA", "seedB", NULL }; +const char *zgels_output[] = { NULL }; +const char *zgels_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgels_init( void ) __attribute__( ( constructor ) ); +void +testing_zgels_init( void ) +{ + test_zgels.name = "zgels"; + test_zgels.helper = "zgels"; + test_zgels.params = zgels_params; + test_zgels.output = zgels_output; + test_zgels.outchk = zgels_outchk; + test_zgels.params_list = "nb;ib;P;trans;m;n;k;lda;ldb;rh;seedA;seedB"; + test_zgels.fptr = testing_zgels; + test_zgels.next = NULL; + + testing_register( &test_zgels ); +} diff --git a/new-testing/testing_zgels_hqr.c b/new-testing/testing_zgels_hqr.c new file mode 100644 index 000000000..9ae361b25 --- /dev/null +++ b/new-testing/testing_zgels_hqr.c @@ -0,0 +1,161 @@ +/** + * + * @file testing_zgels_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgels_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-28 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" +#include "../control/common.h" + +static cham_fixdbl_t +flops_zgels_hqr( cham_trans_t trans, int M, int N, int NRHS ) +{ + cham_fixdbl_t flops = 0.; + return flops; +} + +int +testing_zgels_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX, *descTS, *descTT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int maxMN = chameleon_max( M, N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", maxMN ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgels_hqr( trans, M, N, NRHS ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, maxMN, NRHS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( + &qrtree, ( M >= N ) ? LIBHQR_QR : LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Computes the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgels_param_Tile( &qrtree, trans, descA, descTS, descTT, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + if ( check ) { + CHAM_desc_t *descA0, *descB; + CHAM_desc_t *subX, *subB; + + CHAMELEON_Desc_Create( + &descA0, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, maxMN, NRHS, P, Q ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + if ( trans == ChamNoTrans ) { + subX = chameleon_desc_submatrix( descX, 0, 0, N, NRHS ); + subB = chameleon_desc_submatrix( descB, 0, 0, M, NRHS ); + } + else { + subX = chameleon_desc_submatrix( descX, 0, 0, M, NRHS ); + subB = chameleon_desc_submatrix( descB, 0, 0, N, NRHS ); + } + + /* Check the factorization and the residual */ + hres = check_zgels( trans, descA0, subX, subB ); // A(Ax-B) == 0? + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + + free( subB ); + free( subX ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + CHAMELEON_Desc_Destroy( &descX ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zgels_hqr; +const char *zgels_hqr_params[] = { "nb", "ib", "trans", "m", "n", "k", + "lda", "ldb", "qra", "qra", "qrp", "llvl", + "hlvl", "domino", "seedA", "seedB", NULL }; +const char *zgels_hqr_output[] = { NULL }; +const char *zgels_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgels_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zgels_hqr_init( void ) +{ + test_zgels_hqr.name = "zgels_hqr"; + test_zgels_hqr.helper = "zgels_hqr"; + test_zgels_hqr.params = zgels_hqr_params; + test_zgels_hqr.output = zgels_hqr_output; + test_zgels_hqr.outchk = zgels_hqr_outchk; + test_zgels_hqr.params_list = + "nb;ib;P;trans;m;n;k;lda;ldb;rh;qra;qrp;llvl;hlvl;domino;seedA;seedB"; + test_zgels_hqr.fptr = testing_zgels_hqr; + test_zgels_hqr.next = NULL; + + testing_register( &test_zgels_hqr ); +} diff --git a/new-testing/testing_zgemm.c b/new-testing/testing_zgemm.c new file mode 100644 index 000000000..8e2aa0f59 --- /dev/null +++ b/new-testing/testing_zgemm.c @@ -0,0 +1,137 @@ +/** + * + * @file testing_zgemm.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgemm testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-07 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgemm( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am, An, Bm, Bn; + int hres = 0; + CHAM_desc_t *descA, *descB, *descC, *descCinit; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t transA = run_arg_get_trans( args, "transA", ChamNoTrans ); + cham_trans_t transB = run_arg_get_trans( args, "transB", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( transA == ChamNoTrans ) ? M : K ) ); + int LDB = run_arg_get_int( args, "LDB", ( ( transB == ChamNoTrans ) ? K : N ) ); + int LDC = run_arg_get_int( args, "LDC", M ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgemm( M, N, K ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculate the dimensions according to the transposition */ + if ( transA == ChamNoTrans ) { + Am = M; + An = K; + } + else { + Am = K; + An = M; + } + if ( transB == ChamNoTrans ) { + Bm = K; + Bn = N; + } + else { + Bm = N; + Bn = K; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + + /* Fill the matrices with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Calculate the product */ + START_TIMING( t ); + hres = CHAMELEON_zgemm_Tile( transA, transB, alpha, descA, descB, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Check the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_zplrnt_Tile( descCinit, seedC ); + + hres += check_zgemm( transA, transB, alpha, descA, descB, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zgemm; +const char *zgemm_params[] = { "nb", "transA", "transB", "m", "n", "k", "lda", "ldb", + "ldc", "alpha", "beta", "seedA", "seedB", "seedC", NULL }; +const char *zgemm_output[] = { NULL }; +const char *zgemm_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgemm_init( void ) __attribute__( ( constructor ) ); +void +testing_zgemm_init( void ) +{ + test_zgemm.name = "zgemm"; + test_zgemm.helper = "zgemm"; + test_zgemm.params = zgemm_params; + test_zgemm.output = zgemm_output; + test_zgemm.outchk = zgemm_outchk; + test_zgemm.params_list = "nb;P;transA;transB;m;n;k;lda;ldb;ldc;alpha;beta;seedA;seedB;seedC"; + test_zgemm.fptr = testing_zgemm; + test_zgemm.next = NULL; + + testing_register( &test_zgemm ); +} diff --git a/new-testing/testing_zgeqrf.c b/new-testing/testing_zgeqrf.c new file mode 100644 index 000000000..bd2e3a159 --- /dev/null +++ b/new-testing/testing_zgeqrf.c @@ -0,0 +1,117 @@ +/** + * + * @file testing_zgeqrf.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeqrf testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgeqrf( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgeqrf( M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgeqrf_Tile( descA, descT ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descQ; + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, M, M, 0, 0, M, M, P, Q ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + CHAMELEON_zungqr_Tile( descA, descT, descQ ); + + hres += check_zgeqrf( descA0, descA, descQ ); + hres += check_zortho( descQ ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + + run_id++; + return hres; +} + +testing_t test_zgeqrf; +const char *zgeqrf_params[] = { "nb", "ib", "m", "n", "lda", "qra", "seedA", NULL }; +const char *zgeqrf_output[] = { NULL }; +const char *zgeqrf_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgeqrf_init( void ) __attribute__( ( constructor ) ); +void +testing_zgeqrf_init( void ) +{ + test_zgeqrf.name = "zgeqrf"; + test_zgeqrf.helper = "zgeqrf"; + test_zgeqrf.params = zgeqrf_params; + test_zgeqrf.output = zgeqrf_output; + test_zgeqrf.outchk = zgeqrf_outchk; + test_zgeqrf.params_list = "nb;ib;P;m;n;lda;rh;seedA"; + test_zgeqrf.fptr = testing_zgeqrf; + test_zgeqrf.next = NULL; + + testing_register( &test_zgeqrf ); +} diff --git a/new-testing/testing_zgeqrf_hqr.c b/new-testing/testing_zgeqrf_hqr.c new file mode 100644 index 000000000..ffd0a4149 --- /dev/null +++ b/new-testing/testing_zgeqrf_hqr.c @@ -0,0 +1,128 @@ +/** + * + * @file testing_zgeqrf_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeqrf_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgeqrf_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgeqrf( M, N ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descQ; + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, M, M, 0, 0, M, M, P, Q ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + CHAMELEON_zungqr_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + + hres += check_zgeqrf( descA0, descA, descQ ); + hres += check_zortho( descQ ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zgeqrf_hqr; +const char *zgeqrf_hqr_params[] = { "nb", "ib", "m", "n", "lda", "qra", + "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; +const char *zgeqrf_hqr_output[] = { NULL }; +const char *zgeqrf_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgeqrf_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zgeqrf_hqr_init( void ) +{ + test_zgeqrf_hqr.name = "zgeqrf_hqr"; + test_zgeqrf_hqr.helper = "zgeqrf_hqr"; + test_zgeqrf_hqr.params = zgeqrf_hqr_params; + test_zgeqrf_hqr.output = zgeqrf_hqr_output; + test_zgeqrf_hqr.outchk = zgeqrf_hqr_outchk; + test_zgeqrf_hqr.params_list = "nb;ib;P;m;n;lda;qra;qrp;llvl;hlvl;domino;seedA"; + test_zgeqrf_hqr.fptr = testing_zgeqrf_hqr; + test_zgeqrf_hqr.next = NULL; + + testing_register( &test_zgeqrf_hqr ); +} diff --git a/new-testing/testing_zgeqrs.c b/new-testing/testing_zgeqrs.c new file mode 100644 index 000000000..d33730f8f --- /dev/null +++ b/new-testing/testing_zgeqrs.c @@ -0,0 +1,135 @@ +/** + * + * @file testing_zgeqrs.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgeqrs testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-10 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" +#include "control/common.h" + +int +testing_zgeqrs( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX, *descT; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgeqrs( M, N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( N > M ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: The QR solution is performed only when M >= N\n" ); + } + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, M, NRHS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Calculates the solution */ + hres = CHAMELEON_zgeqrf_Tile( descA, descT ); + + START_TIMING( t ); + hres = CHAMELEON_zgeqrs_Tile( descA, descT, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation, orthogonality and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAM_desc_t *descB = CHAMELEON_Desc_Copy( descX, NULL ); + CHAM_desc_t *subX = chameleon_desc_submatrix( descX, 0, 0, N, NRHS ); + CHAM_desc_t *subB = chameleon_desc_submatrix( descB, 0, 0, M, NRHS ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, subX, subB ); + + free( subB ); + free( subX ); + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + CHAMELEON_Desc_Destroy( &descT ); + + run_id++; + return hres; +} + +testing_t test_zgeqrs; +const char *zgeqrs_params[] = { "nb", "ib", "m", "n", "k", "lda", + "ldb", "qra", "seedA", "seedB", NULL }; +const char *zgeqrs_output[] = { NULL }; +const char *zgeqrs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgeqrs_init( void ) __attribute__( ( constructor ) ); +void +testing_zgeqrs_init( void ) +{ + test_zgeqrs.name = "zgeqrs"; + test_zgeqrs.helper = "zgeqrs"; + test_zgeqrs.params = zgeqrs_params; + test_zgeqrs.output = zgeqrs_output; + test_zgeqrs.outchk = zgeqrs_outchk; + test_zgeqrs.params_list = "nb;ib;P;m;n;k;lda;ldb;rh;seedA;seedB"; + test_zgeqrs.fptr = testing_zgeqrs; + test_zgeqrs.next = NULL; + + testing_register( &test_zgeqrs ); +} diff --git a/new-testing/testing_zgesv.c b/new-testing/testing_zgesv.c new file mode 100644 index 000000000..b429f4014 --- /dev/null +++ b/new-testing/testing_zgesv.c @@ -0,0 +1,120 @@ +/** + * + * @file testing_zgesv.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgesv testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zgesv( int N, int NRHS ) +{ + cham_fixdbl_t flops = flops_zgetrf( N, N ) + flops_zgetrs( N, NRHS ); + return flops; +} + +int +testing_zgesv( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgesv( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgesv_nopiv_Tile( descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0, *descB; + + /* Check the factorization */ + descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, descA0, descA ); + + /* Check the solve */ + descB = CHAMELEON_Desc_Copy( descX, NULL ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zgesv; +const char *zgesv_params[] = { "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zgesv_output[] = { NULL }; +const char *zgesv_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgesv_init( void ) __attribute__( ( constructor ) ); +void +testing_zgesv_init( void ) +{ + test_zgesv.name = "zgesv"; + test_zgesv.helper = "zgesv"; + test_zgesv.params = zgesv_params; + test_zgesv.output = zgesv_output; + test_zgesv.outchk = zgesv_outchk; + test_zgesv.params_list = "nb;P;n;nrhs;lda;ldb;seedA;seedB"; + test_zgesv.fptr = testing_zgesv; + test_zgesv.next = NULL; + + testing_register( &test_zgesv ); +} diff --git a/new-testing/testing_zgetrf.c b/new-testing/testing_zgetrf.c new file mode 100644 index 000000000..8bedc058d --- /dev/null +++ b/new-testing/testing_zgetrf.c @@ -0,0 +1,98 @@ +/** + * + * @file testing_zgetrf.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgetrf( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgetrf( M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgetrf_nopiv_Tile( descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + fprintf( stdout, "hres = %d\n", hres ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zgetrf; +const char *zgetrf_params[] = { "nb", "m", "n", "lda", "seedA", NULL }; +const char *zgetrf_output[] = { NULL }; +const char *zgetrf_outchk[] = { "||A||", "||A-fact(A)||", "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgetrf_init( void ) __attribute__( ( constructor ) ); +void +testing_zgetrf_init( void ) +{ + test_zgetrf.name = "zgetrf"; + test_zgetrf.helper = "zgetrf"; + test_zgetrf.params = zgetrf_params; + test_zgetrf.output = zgetrf_output; + test_zgetrf.outchk = zgetrf_outchk; + test_zgetrf.params_list = "nb;P;m;n;lda;seedA"; + test_zgetrf.fptr = testing_zgetrf; + test_zgetrf.next = NULL; + + testing_register( &test_zgetrf ); +} diff --git a/new-testing/testing_zgetrs.c b/new-testing/testing_zgetrs.c new file mode 100644 index 000000000..084b61f38 --- /dev/null +++ b/new-testing/testing_zgetrs.c @@ -0,0 +1,110 @@ +/** + * + * @file testing_zgetrs.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrs testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zgetrs( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zgetrs( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + hres = CHAMELEON_zgetrf_nopiv_Tile( descA ); + assert( hres == 0 ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zgetrs_nopiv_Tile( descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAM_desc_t *descB = CHAMELEON_Desc_Copy( descX, NULL ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zgetrs; +const char *zgetrs_params[] = { "nb", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zgetrs_output[] = { NULL }; +const char *zgetrs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgetrs_init( void ) __attribute__( ( constructor ) ); +void +testing_zgetrs_init( void ) +{ + test_zgetrs.name = "zgetrs"; + test_zgetrs.helper = "zgetrs"; + test_zgetrs.params = zgetrs_params; + test_zgetrs.output = zgetrs_output; + test_zgetrs.outchk = zgetrs_outchk; + test_zgetrs.params_list = "nb;P;n;nrhs;lda;ldb;seedA;seedB"; + test_zgetrs.fptr = testing_zgetrs; + test_zgetrs.next = NULL; + + testing_register( &test_zgetrs ); +} diff --git a/new-testing/testing_zhemm.c b/new-testing/testing_zhemm.c new file mode 100644 index 000000000..291bdc3bd --- /dev/null +++ b/new-testing/testing_zhemm.c @@ -0,0 +1,129 @@ +/** + * + * @file testing_zhemm.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zhemm testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-08 + * @precisions normal z -> c + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zhemm( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am; + int hres = 0; + CHAM_desc_t *descA, *descB, *descC, *descCinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( side == ChamLeft ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + int LDC = run_arg_get_int( args, "LDC", M ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + double bump = testing_dalea(); + bump = run_arg_get_double( args, "bump", bump ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zhemm( side, M, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculate the dimensions according to the side */ + if ( side == ChamLeft ) { + Am = M; + } + else { + Am = N; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, Am, 0, 0, Am, Am, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplghe_Tile( bump, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_zhemm_Tile( side, uplo, alpha, descA, descB, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_zplrnt_Tile( descCinit, seedC ); + + hres += + check_zsymm( ChamHermitian, side, uplo, alpha, descA, descB, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zhemm; +const char *zhemm_params[] = { "nb", "side", "uplo", "m", "n", "lda", "ldb", "ldc", + "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL }; +const char *zhemm_output[] = { NULL }; +const char *zhemm_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zhemm_init( void ) __attribute__( ( constructor ) ); +void +testing_zhemm_init( void ) +{ + test_zhemm.name = "zhemm"; + test_zhemm.helper = "zhemm"; + test_zhemm.params = zhemm_params; + test_zhemm.output = zhemm_output; + test_zhemm.outchk = zhemm_outchk; + test_zhemm.params_list = "nb;P;side;uplo;m;n;lda;ldb;ldc;alpha;beta;seedA;seedB;seedC;bump"; + test_zhemm.fptr = testing_zhemm; + test_zhemm.next = NULL; + + testing_register( &test_zhemm ); +} diff --git a/new-testing/testing_zher2k.c b/new-testing/testing_zher2k.c new file mode 100644 index 000000000..2dc6a560c --- /dev/null +++ b/new-testing/testing_zher2k.c @@ -0,0 +1,131 @@ +/** + * + * @file testing_zher2k.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zher2k testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-09 + * @precisions normal z -> z c + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zher2k( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am, An; + int hres = 0; + CHAM_desc_t *descA, *descB, *descC, *descCinit; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDB = run_arg_get_int( args, "LDB", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDC = run_arg_get_int( args, "LDC", N ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + double beta = testing_dalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + double bump = testing_dalea(); + bump = run_arg_get_double( args, "bump", bump ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zher2k( K, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_double( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculate the dimensions according to the transposition */ + if ( trans == ChamNoTrans ) { + Am = N; + An = K; + } + else { + Am = K; + An = N; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + + /* Fill the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + CHAMELEON_zplghe_Tile( bump, uplo, descC, seedC ); + + /* Calculate the product */ + START_TIMING( t ); + hres = CHAMELEON_zher2k_Tile( uplo, trans, alpha, descA, descB, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Check the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplghe_Tile( bump, uplo, descCinit, seedC ); + + hres += + check_zsyrk( ChamHermitian, uplo, trans, alpha, descA, descB, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zher2k; +const char *zher2k_params[] = { "nb", "trans", "uplo", "n", "k", "lda", "ldb", "ldc", + "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL }; +const char *zher2k_output[] = { NULL }; +const char *zher2k_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zher2k_init( void ) __attribute__( ( constructor ) ); +void +testing_zher2k_init( void ) +{ + test_zher2k.name = "zher2k"; + test_zher2k.helper = "zher2k"; + test_zher2k.params = zher2k_params; + test_zher2k.output = zher2k_output; + test_zher2k.outchk = zher2k_outchk; + test_zher2k.params_list = "nb;P;trans;uplo;n;k;lda;ldb;ldc;alpha;beta;seedA;seedB;seedC;bump"; + test_zher2k.fptr = testing_zher2k; + test_zher2k.next = NULL; + + testing_register( &test_zher2k ); +} diff --git a/new-testing/testing_zherk.c b/new-testing/testing_zherk.c new file mode 100644 index 000000000..b7f650b8b --- /dev/null +++ b/new-testing/testing_zherk.c @@ -0,0 +1,126 @@ +/** + * + * @file testing_zherk.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zherk testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-09 + * @precisions normal z -> z c + * + */ +#include <chameleon.h> +#include "flops.h" +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zherk( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am, An; + int hres = 0; + CHAM_desc_t *descA, *descC, *descCinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDC = run_arg_get_int( args, "LDC", N ); + double alpha = testing_dalea(); + double beta = testing_dalea(); + double bump = testing_dalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zherk( K, N ); + + alpha = run_arg_get_double( args, "alpha", alpha ); + beta = run_arg_get_double( args, "beta", beta ); + bump = run_arg_get_double( args, "bump", bump ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculates the dimensions according to the transposition */ + if ( trans == ChamNoTrans ) { + Am = N; + An = K; + } + else { + Am = K; + An = N; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplghe_Tile( bump, uplo, descC, seedC ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_zherk_Tile( uplo, trans, alpha, descA, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplghe_Tile( bump, uplo, descCinit, seedC ); + + hres += + check_zsyrk( ChamHermitian, uplo, trans, alpha, descA, NULL, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zherk; +const char *zherk_params[] = { "nb", "trans", "uplo", "n", "k", "lda", "ldc", + "alpha", "beta", "seedA", "seedC", "bump", NULL }; +const char *zherk_output[] = { NULL }; +const char *zherk_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zherk_init( void ) __attribute__( ( constructor ) ); +void +testing_zherk_init( void ) +{ + test_zherk.name = "zherk"; + test_zherk.helper = "zherk"; + test_zherk.params = zherk_params; + test_zherk.output = zherk_output; + test_zherk.outchk = zherk_outchk; + test_zherk.params_list = "nb;P;trans;uplo;n;k;lda;ldc;alpha;beta;seedA;seedC;bump"; + test_zherk.fptr = testing_zherk; + test_zherk.next = NULL; + + testing_register( &test_zherk ); +} diff --git a/new-testing/testing_zlacpy.c b/new-testing/testing_zlacpy.c new file mode 100644 index 000000000..4892f2cc5 --- /dev/null +++ b/new-testing/testing_zlacpy.c @@ -0,0 +1,129 @@ +/** + * + * @file testing_zlacpy.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlacpy testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-04 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlacpy( cham_uplo_t uplo, int M, int N ) +{ + cham_fixdbl_t flops; + + switch ( uplo ) { + case ChamUpper: + if ( N > M ) { + flops = ( M * ( M + 1 ) / 2 ) + M * ( N - M ); + } + else { + flops = N * ( N + 1 ) / 2; + } + break; + case ChamLower: + if ( M > N ) { + flops = ( N * ( N + 1 ) / 2 ) + N * ( M - N ); + } + else { + flops = M * ( M + 1 ) / 2; + } + break; + case ChamUpperLower: + default: + flops = M * N; + } + flops *= sizeof( CHAMELEON_Complex64_t ); + + return flops; +} + +int +testing_zlacpy( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descB; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int LDB = run_arg_get_int( args, "LDB", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlacpy( uplo, M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates two different matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q ); + + /* Fills each matrix with different random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + /* We use seedA + 1, just to create a variation in B */ + CHAMELEON_zplrnt_Tile( descB, seedA + 1 ); + + /* Makes a copy of descA to descB */ + START_TIMING( t ); + hres = CHAMELEON_zlacpy_Tile( uplo, descA, descB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks their differences */ + if ( check ) { + hres += check_zmatrices( uplo, descA, descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + + run_id++; + return hres; +} + +testing_t test_zlacpy; +const char *zlacpy_params[] = { "nb", "uplo", "m", "n", "lda", "ldb", "seedA", NULL }; +const char *zlacpy_output[] = { NULL }; +const char *zlacpy_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlacpy_init( void ) __attribute__( ( constructor ) ); +void +testing_zlacpy_init( void ) +{ + test_zlacpy.name = "zlacpy"; + test_zlacpy.helper = "zlacpy"; + test_zlacpy.params = zlacpy_params; + test_zlacpy.output = zlacpy_output; + test_zlacpy.outchk = zlacpy_outchk; + test_zlacpy.params_list = "nb;P;uplo;m;n;lda;ldb;seedA"; + test_zlacpy.fptr = testing_zlacpy; + test_zlacpy.next = NULL; + + testing_register( &test_zlacpy ); +} diff --git a/new-testing/testing_zlange.c b/new-testing/testing_zlange.c new file mode 100644 index 000000000..fc2ff6b4e --- /dev/null +++ b/new-testing/testing_zlange.c @@ -0,0 +1,120 @@ +/** + * + * @file testing_zlange.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlange testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2014-07-13 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlange( cham_normtype_t ntype, int M, int N ) +{ + cham_fixdbl_t flops = 0.; + double coefabs = 1.; +#if defined( PRECISION_z ) || defined( PRECISION_c ) + coefabs = 3.; +#endif + + switch ( ntype ) { + case ChamMaxNorm: + flops = coefabs * M * N; + break; + case ChamOneNorm: + flops = coefabs * M * N + M * ( N - 1 ); + break; + case ChamInfNorm: + flops = coefabs * M * N + N * ( M - 1 ); + break; + case ChamFrobeniusNorm: + flops = ( coefabs + 1. ) * M * N; + break; + default:; + } + return flops; +} + +int +testing_zlange( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + double norm; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_normtype_t norm_type = run_arg_get_ntype( args, "norm", ChamMaxNorm ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlange( norm_type, M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the norm */ + START_TIMING( t ); + norm = CHAMELEON_zlange_Tile( norm_type, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + hres = check_znorm( ChamGeneral, norm_type, ChamUpperLower, ChamNonUnit, norm, descA ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlange; +const char *zlange_params[] = { "nb", "norm", "m", "n", "lda", "seedA", NULL }; +const char *zlange_output[] = { NULL }; +const char *zlange_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlange_init( void ) __attribute__( ( constructor ) ); +void +testing_zlange_init( void ) +{ + test_zlange.name = "zlange"; + test_zlange.helper = "zlange"; + test_zlange.params = zlange_params; + test_zlange.output = zlange_output; + test_zlange.outchk = zlange_outchk; + test_zlange.params_list = "nb;P;norm;m;n;lda;seedA"; + test_zlange.fptr = testing_zlange; + test_zlange.next = NULL; + + testing_register( &test_zlange ); +} diff --git a/new-testing/testing_zlanhe.c b/new-testing/testing_zlanhe.c new file mode 100644 index 000000000..8ca447cc8 --- /dev/null +++ b/new-testing/testing_zlanhe.c @@ -0,0 +1,121 @@ +/** + * + * @file testing_zlanhe.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlanhe testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-17 + * @precisions normal z -> c + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlanhe( cham_normtype_t ntype, cham_uplo_t uplo, int N ) +{ + cham_fixdbl_t flops = 0.; + double coefabs = 1.; +#if defined( PRECISION_z ) || defined( PRECISION_c ) + coefabs = 3.; +#endif + + switch ( ntype ) { + case ChamMaxNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2.; + break; + case ChamOneNorm: + case ChamInfNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2. + N * ( N - 1 ); + break; + case ChamFrobeniusNorm: + flops = ( coefabs + 1. ) * ( N * ( N + 1 ) ) / 2.; + break; + default:; + } + return flops; +} + +int +testing_zlanhe( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + double norm; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_normtype_t norm_type = run_arg_get_ntype( args, "norm", ChamMaxNorm ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + double bump = testing_dalea(); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlanhe( norm_type, uplo, N ); + + bump = run_arg_get_double( args, "bump", bump ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplghe_Tile( bump, uplo, descA, seedA ); + + /* Calculates the norm */ + START_TIMING( t ); + norm = CHAMELEON_zlanhe_Tile( norm_type, uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + hres = check_znorm( ChamHermitian, norm_type, uplo, ChamNonUnit, norm, descA ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlanhe; +const char *zlanhe_params[] = { "nb", "norm", "uplo", "n", "lda", "seedA", "bump", NULL }; +const char *zlanhe_output[] = { NULL }; +const char *zlanhe_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlanhe_init( void ) __attribute__( ( constructor ) ); +void +testing_zlanhe_init( void ) +{ + test_zlanhe.name = "zlanhe"; + test_zlanhe.helper = "zlanhe"; + test_zlanhe.params = zlanhe_params; + test_zlanhe.output = zlanhe_output; + test_zlanhe.outchk = zlanhe_outchk; + test_zlanhe.params_list = "nb;P;norm;uplo;n;lda;seedA;bump"; + test_zlanhe.fptr = testing_zlanhe; + test_zlanhe.next = NULL; + + testing_register( &test_zlanhe ); +} diff --git a/new-testing/testing_zlansy.c b/new-testing/testing_zlansy.c new file mode 100644 index 000000000..b55e0b761 --- /dev/null +++ b/new-testing/testing_zlansy.c @@ -0,0 +1,121 @@ +/** + * + * @file testing_zlansy.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlansy testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-17 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlansy( cham_normtype_t ntype, cham_uplo_t uplo, int N ) +{ + cham_fixdbl_t flops = 0.; + double coefabs = 1.; +#if defined( PRECISION_z ) || defined( PRECISION_c ) + coefabs = 3.; +#endif + + switch ( ntype ) { + case ChamMaxNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2.; + break; + case ChamOneNorm: + case ChamInfNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2. + N * ( N - 1 ); + break; + case ChamFrobeniusNorm: + flops = ( coefabs + 1. ) * ( N * ( N + 1 ) ) / 2.; + break; + default:; + } + return flops; +} + +int +testing_zlansy( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + double norm; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_normtype_t norm_type = run_arg_get_ntype( args, "norm", ChamMaxNorm ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + CHAMELEON_Complex64_t bump = testing_zalea(); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlansy( norm_type, uplo, N ); + + bump = run_arg_get_Complex64( args, "bump", bump ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy_Tile( bump, uplo, descA, seedA ); + + /* Calculates the norm */ + START_TIMING( t ); + norm = CHAMELEON_zlansy_Tile( norm_type, uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + hres = check_znorm( ChamSymmetric, norm_type, uplo, ChamNonUnit, norm, descA ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlansy; +const char *zlansy_params[] = { "nb", "norm", "uplo", "n", "lda", "seedA", "bump", NULL }; +const char *zlansy_output[] = { NULL }; +const char *zlansy_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlansy_init( void ) __attribute__( ( constructor ) ); +void +testing_zlansy_init( void ) +{ + test_zlansy.name = "zlansy"; + test_zlansy.helper = "zlansy"; + test_zlansy.params = zlansy_params; + test_zlansy.output = zlansy_output; + test_zlansy.outchk = zlansy_outchk; + test_zlansy.params_list = "nb;P;norm;uplo;n;lda;seedA;bump"; + test_zlansy.fptr = testing_zlansy; + test_zlansy.next = NULL; + + testing_register( &test_zlansy ); +} diff --git a/new-testing/testing_zlantr.c b/new-testing/testing_zlantr.c new file mode 100644 index 000000000..b1d0e2da3 --- /dev/null +++ b/new-testing/testing_zlantr.c @@ -0,0 +1,121 @@ +/** + * + * @file testing_zlantr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlantr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2014-07-17 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlantr( cham_normtype_t ntype, cham_uplo_t uplo, cham_diag_t diag, int M, int N ) +{ + /* TODO: update formula */ + cham_fixdbl_t flops = 0.; + double coefabs = 1.; +#if defined( PRECISION_z ) || defined( PRECISION_c ) + coefabs = 3.; +#endif + + switch ( ntype ) { + case ChamMaxNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2.; + break; + case ChamOneNorm: + case ChamInfNorm: + flops = coefabs * ( N * ( N + 1 ) ) / 2. + N * ( N - 1 ); + break; + case ChamFrobeniusNorm: + flops = ( coefabs + 1. ) * ( N * ( N + 1 ) ) / 2.; + break; + default:; + } + return flops; +} + +int +testing_zlantr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + double norm; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_normtype_t norm_type = run_arg_get_ntype( args, "norm", ChamMaxNorm ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlantr( norm_type, uplo, diag, M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Calculates the norm */ + START_TIMING( t ); + norm = CHAMELEON_zlantr_Tile( norm_type, uplo, diag, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + hres = check_znorm( ChamTriangular, norm_type, uplo, diag, norm, descA ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlantr; +const char *zlantr_params[] = { "nb", "norm", "uplo", "diag", "m", "n", "lda", "seedA", NULL }; +const char *zlantr_output[] = { NULL }; +const char *zlantr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlantr_init( void ) __attribute__( ( constructor ) ); +void +testing_zlantr_init( void ) +{ + test_zlantr.name = "zlantr"; + test_zlantr.helper = "zlantr"; + test_zlantr.params = zlantr_params; + test_zlantr.output = zlantr_output; + test_zlantr.outchk = zlantr_outchk; + test_zlantr.params_list = "nb;P;norm;uplo;diag;m;n;lda;seedA"; + test_zlantr.fptr = testing_zlantr; + test_zlantr.next = NULL; + + testing_register( &test_zlantr ); +} diff --git a/new-testing/testing_zlascal.c b/new-testing/testing_zlascal.c new file mode 100644 index 000000000..21abe8987 --- /dev/null +++ b/new-testing/testing_zlascal.c @@ -0,0 +1,124 @@ +/** + * + * @file testing_zlascal.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlascal testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2014-07-13 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlascal( cham_uplo_t uplo, int M, int N ) +{ + cham_fixdbl_t flops = 0.; + int minMN = chameleon_min( M, N ); + switch ( uplo ) { + case ChamUpper: + flops = ( minMN * ( minMN + 1 ) / 2 ) + M * chameleon_max( 0, N - M ); + break; + case ChamLower: + flops = ( minMN * ( minMN + 1 ) / 2 ) + N * chameleon_max( 0, M - N ); + break; + case ChamUpperLower: + default: + flops = M * N; + } + +#if defined( PRECISION_z ) || defined( PRECISION_c ) + /* 1 multiplications per element */ + flops *= 6.; +#endif + + return flops; +} + +int +testing_zlascal( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descAinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + CHAMELEON_Complex64_t alpha = run_arg_get_Complex64( args, "alpha", 1. ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlascal( uplo, M, N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrix */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + + /* Scales the matrix */ + START_TIMING( t ); + hres = CHAMELEON_zlascal_Tile( uplo, alpha, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descAinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_zplrnt_Tile( descAinit, seedA ); + + hres += check_zscale( uplo, alpha, descAinit, descA ); + + CHAMELEON_Desc_Destroy( &descAinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlascal; +const char *zlascal_params[] = { "nb", "uplo", "m", "n", "lda", "alpha", "seedA", NULL }; +const char *zlascal_output[] = { NULL }; +const char *zlascal_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlascal_init( void ) __attribute__( ( constructor ) ); +void +testing_zlascal_init( void ) +{ + test_zlascal.name = "zlascal"; + test_zlascal.helper = "zlascal"; + test_zlascal.params = zlascal_params; + test_zlascal.output = zlascal_output; + test_zlascal.outchk = zlascal_outchk; + test_zlascal.params_list = "nb;P;uplo;m;n;lda;alpha;seedA"; + test_zlascal.fptr = testing_zlascal; + test_zlascal.next = NULL; + + testing_register( &test_zlascal ); +} diff --git a/new-testing/testing_zlauum.c b/new-testing/testing_zlauum.c new file mode 100644 index 000000000..4a9b16557 --- /dev/null +++ b/new-testing/testing_zlauum.c @@ -0,0 +1,102 @@ +/** + * + * @file testing_zlauum.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlauum testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-26 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zlauum( int N ) +{ + cham_fixdbl_t flops = flops_zpotri( N ) - flops_ztrtri( N ); + return flops; +} + +int +testing_zlauum( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zlauum( N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Initialises the matrices with the same values */ + CHAMELEON_zplghe_Tile( 0., uplo, descA, seedA ); + + /* Calculates the matrix product */ + START_TIMING( t ); + hres = CHAMELEON_zlauum_Tile( uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplghe_Tile( 0., uplo, descA0, seedA ); + + hres += check_zlauum( uplo, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zlauum; +const char *zlauum_params[] = { "nb", "uplo", "n", "lda", "seedA", NULL }; +const char *zlauum_output[] = { NULL }; +const char *zlauum_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlauum_init( void ) __attribute__( ( constructor ) ); +void +testing_zlauum_init( void ) +{ + test_zlauum.name = "zlauum"; + test_zlauum.helper = "zlauum"; + test_zlauum.params = zlauum_params; + test_zlauum.output = zlauum_output; + test_zlauum.outchk = zlauum_outchk; + test_zlauum.params_list = "nb;P;uplo;n;lda;seedA"; + test_zlauum.fptr = testing_zlauum; + test_zlauum.next = NULL; + + testing_register( &test_zlauum ); +} diff --git a/new-testing/testing_zposv.c b/new-testing/testing_zposv.c new file mode 100644 index 000000000..0e1e47559 --- /dev/null +++ b/new-testing/testing_zposv.c @@ -0,0 +1,121 @@ +/** + * + * @file testing_zposv.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zposv testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zposv( int N, int NRHS ) +{ + cham_fixdbl_t flops = flops_zpotrf( N ) + flops_zpotrs( N, NRHS ); + return flops; +} + +int +testing_zposv( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zposv( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zposv_Tile( uplo, descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0, *descB; + + /* Check the factorization */ + descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_zxxtrf( args, ChamHermitian, uplo, descA0, descA ); + + /* Check the solve */ + descB = CHAMELEON_Desc_Copy( descX, NULL ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + hres += check_zsolve( ChamHermitian, ChamNoTrans, uplo, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zposv; +const char *zposv_params[] = { "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zposv_output[] = { NULL }; +const char *zposv_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zposv_init( void ) __attribute__( ( constructor ) ); +void +testing_zposv_init( void ) +{ + test_zposv.name = "zposv"; + test_zposv.helper = "zposv"; + test_zposv.params = zposv_params; + test_zposv.output = zposv_output; + test_zposv.outchk = zposv_outchk; + test_zposv.params_list = "nb;P;uplo;n;nrhs;lda;ldb;seedA;seedB"; + test_zposv.fptr = testing_zposv; + test_zposv.next = NULL; + + testing_register( &test_zposv ); +} diff --git a/new-testing/testing_zpotrf.c b/new-testing/testing_zpotrf.c new file mode 100644 index 000000000..11b0c45b2 --- /dev/null +++ b/new-testing/testing_zpotrf.c @@ -0,0 +1,96 @@ +/** + * + * @file testing_zpotrf.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpotrf testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zpotrf( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zpotrf( N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zpotrf_Tile( uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_zxxtrf( args, ChamHermitian, uplo, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zpotrf; +const char *zpotrf_params[] = { "nb", "uplo", "n", "lda", "seedA", NULL }; +const char *zpotrf_output[] = { NULL }; +const char *zpotrf_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zpotrf_init( void ) __attribute__( ( constructor ) ); +void +testing_zpotrf_init( void ) +{ + test_zpotrf.name = "zpotrf"; + test_zpotrf.helper = "zpotrf"; + test_zpotrf.params = zpotrf_params; + test_zpotrf.output = zpotrf_output; + test_zpotrf.outchk = zpotrf_outchk; + test_zpotrf.params_list = "nb;P;uplo;n;lda;seedA"; + test_zpotrf.fptr = testing_zpotrf; + test_zpotrf.next = NULL; + + testing_register( &test_zpotrf ); +} diff --git a/new-testing/testing_zpotri.c b/new-testing/testing_zpotri.c new file mode 100644 index 000000000..1372d0b65 --- /dev/null +++ b/new-testing/testing_zpotri.c @@ -0,0 +1,99 @@ +/** + * + * @file testing_zpotri.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpotri testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-13 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zpotri( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zpotri( N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Initialise the matrix with the random values */ + CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA ); + + /* Calculates the inversed matrix */ + START_TIMING( t ); + hres = CHAMELEON_zpotrf_Tile( uplo, descA ); + assert( hres == 0 ); + hres = CHAMELEON_zpotri_Tile( uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Check the inverse */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_ztrtri( ChamHermitian, uplo, ChamNonUnit, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zpotri; +const char *zpotri_params[] = { "nb", "uplo", "n", "lda", "seedA", NULL }; +const char *zpotri_output[] = { NULL }; +const char *zpotri_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zpotri_init( void ) __attribute__( ( constructor ) ); +void +testing_zpotri_init( void ) +{ + test_zpotri.name = "zpotri"; + test_zpotri.helper = "zpotri"; + test_zpotri.params = zpotri_params; + test_zpotri.output = zpotri_output; + test_zpotri.outchk = zpotri_outchk; + test_zpotri.params_list = "nb;P;uplo;n;lda;seedA"; + test_zpotri.fptr = testing_zpotri; + test_zpotri.next = NULL; + + testing_register( &test_zpotri ); +} diff --git a/new-testing/testing_zpotrs.c b/new-testing/testing_zpotrs.c new file mode 100644 index 000000000..84936b5fb --- /dev/null +++ b/new-testing/testing_zpotrs.c @@ -0,0 +1,113 @@ +/** + * + * @file testing_zpotrs.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpotrs testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-13 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zpotrs( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zpotrs( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + hres = CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + assert( hres == 0 ); + hres = CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + assert( hres == 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + hres = CHAMELEON_zpotrf_Tile( uplo, descA ); + assert( hres == 0 ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zpotrs_Tile( uplo, descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAM_desc_t *descB = CHAMELEON_Desc_Copy( descX, NULL ); + + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamHermitian, ChamNoTrans, uplo, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zpotrs; +const char *zpotrs_params[] = { "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zpotrs_output[] = { NULL }; +const char *zpotrs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zpotrs_init( void ) __attribute__( ( constructor ) ); +void +testing_zpotrs_init( void ) +{ + test_zpotrs.name = "zpotrs"; + test_zpotrs.helper = "zpotrs"; + test_zpotrs.params = zpotrs_params; + test_zpotrs.output = zpotrs_output; + test_zpotrs.outchk = zpotrs_outchk; + test_zpotrs.params_list = "nb;P;uplo;n;nrhs;lda;ldb;seedA;seedB"; + test_zpotrs.fptr = testing_zpotrs; + test_zpotrs.next = NULL; + + testing_register( &test_zpotrs ); +} diff --git a/new-testing/testing_zsymm.c b/new-testing/testing_zsymm.c new file mode 100644 index 000000000..2e43fa2f7 --- /dev/null +++ b/new-testing/testing_zsymm.c @@ -0,0 +1,129 @@ +/** + * + * @file testing_zsymm.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsymm testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-08 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zsymm( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am; + int hres = 0; + CHAM_desc_t *descA, *descB, *descC, *descCinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( side == ChamLeft ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + int LDC = run_arg_get_int( args, "LDC", M ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + double bump = testing_dalea(); + bump = run_arg_get_double( args, "bump", bump ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zsymm( side, M, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculate the dimensions according to the side */ + if ( side == ChamLeft ) { + Am = M; + } + else { + Am = N; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, Am, 0, 0, Am, Am, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy_Tile( bump, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_zsymm_Tile( side, uplo, alpha, descA, descB, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_zplrnt_Tile( descCinit, seedC ); + + hres += + check_zsymm( ChamSymmetric, side, uplo, alpha, descA, descB, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zsymm; +const char *zsymm_params[] = { "nb", "side", "uplo", "m", "n", "lda", "ldb", "ldc", + "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL }; +const char *zsymm_output[] = { NULL }; +const char *zsymm_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsymm_init( void ) __attribute__( ( constructor ) ); +void +testing_zsymm_init( void ) +{ + test_zsymm.name = "zsymm"; + test_zsymm.helper = "zsymm"; + test_zsymm.params = zsymm_params; + test_zsymm.output = zsymm_output; + test_zsymm.outchk = zsymm_outchk; + test_zsymm.params_list = "nb;P;side;uplo;m;n;lda;ldb;ldc;alpha;beta;seedA;seedB;seedC;bump"; + test_zsymm.fptr = testing_zsymm; + test_zsymm.next = NULL; + + testing_register( &test_zsymm ); +} diff --git a/new-testing/testing_zsyr2k.c b/new-testing/testing_zsyr2k.c new file mode 100644 index 000000000..134452e2c --- /dev/null +++ b/new-testing/testing_zsyr2k.c @@ -0,0 +1,131 @@ +/** + * + * @file testing_zsyr2k.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyr2k testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-09 + * @precisions normal z -> z c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zsyr2k( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am, An; + int hres = 0; + CHAM_desc_t *descA, *descB, *descC, *descCinit; + + /* Read arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDB = run_arg_get_int( args, "LDB", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDC = run_arg_get_int( args, "LDC", N ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + double bump = testing_dalea(); + bump = run_arg_get_double( args, "bump", bump ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zher2k( K, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculate the dimensions according to the transposition */ + if ( trans == ChamNoTrans ) { + Am = N; + An = K; + } + else { + Am = K; + An = N; + } + + /* Create the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + + /* Fill the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + CHAMELEON_zplgsy_Tile( bump, uplo, descC, seedC ); + + /* Calculate the product */ + START_TIMING( t ); + hres = CHAMELEON_zsyr2k_Tile( uplo, trans, alpha, descA, descB, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Check the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplgsy_Tile( bump, uplo, descCinit, seedC ); + + hres += + check_zsyrk( ChamSymmetric, uplo, trans, alpha, descA, descB, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zsyr2k; +const char *zsyr2k_params[] = { "nb", "trans", "uplo", "n", "k", "lda", "ldb", "ldc", + "alpha", "beta", "seedA", "seedB", "seedC", "bump", NULL }; +const char *zsyr2k_output[] = { NULL }; +const char *zsyr2k_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsyr2k_init( void ) __attribute__( ( constructor ) ); +void +testing_zsyr2k_init( void ) +{ + test_zsyr2k.name = "zsyr2k"; + test_zsyr2k.helper = "zsyr2k"; + test_zsyr2k.params = zsyr2k_params; + test_zsyr2k.output = zsyr2k_output; + test_zsyr2k.outchk = zsyr2k_outchk; + test_zsyr2k.params_list = "nb;P;trans;uplo;n;k;lda;ldb;ldc;alpha;beta;seedA;seedB;seedC;bump"; + test_zsyr2k.fptr = testing_zsyr2k; + test_zsyr2k.next = NULL; + + testing_register( &test_zsyr2k ); +} diff --git a/new-testing/testing_zsyrk.c b/new-testing/testing_zsyrk.c new file mode 100644 index 000000000..e236637a8 --- /dev/null +++ b/new-testing/testing_zsyrk.c @@ -0,0 +1,125 @@ +/** + * + * @file testing_zsyrk.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsyrk testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-09 + * @precisions normal z -> z c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zsyrk( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am, An; + int hres = 0; + CHAM_desc_t *descA, *descC, *descCinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? N : K ) ); + int LDC = run_arg_get_int( args, "LDC", N ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + CHAMELEON_Complex64_t bump = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zsyrk( K, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + bump = run_arg_get_Complex64( args, "bump", bump ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculates the dimensions according to the transposition */ + if ( trans == ChamNoTrans ) { + Am = N; + An = K; + } + else { + Am = K; + An = N; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplgsy_Tile( bump, uplo, descC, seedC ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_zsyrk_Tile( uplo, trans, alpha, descA, beta, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descCinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, N, N, P, Q ); + CHAMELEON_zplgsy_Tile( bump, uplo, descCinit, seedC ); + + hres += + check_zsyrk( ChamSymmetric, uplo, trans, alpha, descA, NULL, beta, descCinit, descC ); + + CHAMELEON_Desc_Destroy( &descCinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zsyrk; +const char *zsyrk_params[] = { "nb", "trans", "uplo", "n", "k", "lda", "ldc", + "alpha", "beta", "seedA", "seedC", "bump", NULL }; +const char *zsyrk_output[] = { NULL }; +const char *zsyrk_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsyrk_init( void ) __attribute__( ( constructor ) ); +void +testing_zsyrk_init( void ) +{ + test_zsyrk.name = "zsyrk"; + test_zsyrk.helper = "zsyrk"; + test_zsyrk.params = zsyrk_params; + test_zsyrk.output = zsyrk_output; + test_zsyrk.outchk = zsyrk_outchk; + test_zsyrk.params_list = "nb;P;trans;uplo;n;k;lda;ldc;alpha;beta;seedA;seedC;bump"; + test_zsyrk.fptr = testing_zsyrk; + test_zsyrk.next = NULL; + + testing_register( &test_zsyrk ); +} diff --git a/new-testing/testing_zsysv.c b/new-testing/testing_zsysv.c new file mode 100644 index 000000000..e05e45c68 --- /dev/null +++ b/new-testing/testing_zsysv.c @@ -0,0 +1,121 @@ +/** + * + * @file testing_zsysv.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsysv testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_zsysv( int N, int NRHS ) +{ + cham_fixdbl_t flops = flops_zpotrf( N ) + flops_zpotrs( N, NRHS ); + return flops; +} + +int +testing_zsysv( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zsysv( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zsysv_Tile( uplo, descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0, *descB; + + /* Check the factorization */ + descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_zxxtrf( args, ChamSymmetric, uplo, descA0, descA ); + + /* Check the solve */ + descB = CHAMELEON_Desc_Copy( descX, NULL ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA0, seedA ); + hres += check_zsolve( ChamSymmetric, ChamNoTrans, uplo, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zsysv; +const char *zsysv_params[] = { "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zsysv_output[] = { NULL }; +const char *zsysv_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsysv_init( void ) __attribute__( ( constructor ) ); +void +testing_zsysv_init( void ) +{ + test_zsysv.name = "zsysv"; + test_zsysv.helper = "zsysv"; + test_zsysv.params = zsysv_params; + test_zsysv.output = zsysv_output; + test_zsysv.outchk = zsysv_outchk; + test_zsysv.params_list = "nb;P;uplo;n;nrhs;lda;ldb;seedA;seedB"; + test_zsysv.fptr = testing_zsysv; + test_zsysv.next = NULL; + + testing_register( &test_zsysv ); +} diff --git a/new-testing/testing_zsytrf.c b/new-testing/testing_zsytrf.c new file mode 100644 index 000000000..6252f929c --- /dev/null +++ b/new-testing/testing_zsytrf.c @@ -0,0 +1,96 @@ +/** + * + * @file testing_zsytrf.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsytrf testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zsytrf( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zpotrf( N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA, seedA ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zsytrf_Tile( uplo, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_zxxtrf( args, ChamSymmetric, uplo, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_zsytrf; +const char *zsytrf_params[] = { "nb", "uplo", "n", "lda", "seedA", NULL }; +const char *zsytrf_output[] = { NULL }; +const char *zsytrf_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsytrf_init( void ) __attribute__( ( constructor ) ); +void +testing_zsytrf_init( void ) +{ + test_zsytrf.name = "zsytrf"; + test_zsytrf.helper = "zsytrf"; + test_zsytrf.params = zsytrf_params; + test_zsytrf.output = zsytrf_output; + test_zsytrf.outchk = zsytrf_outchk; + test_zsytrf.params_list = "nb;P;uplo;n;lda;seedA"; + test_zsytrf.fptr = testing_zsytrf; + test_zsytrf.next = NULL; + + testing_register( &test_zsytrf ); +} diff --git a/new-testing/testing_zsytrs.c b/new-testing/testing_zsytrs.c new file mode 100644 index 000000000..1bd4cfcdd --- /dev/null +++ b/new-testing/testing_zsytrs.c @@ -0,0 +1,113 @@ +/** + * + * @file testing_zsytrs.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zsytrs testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-13 + * @precisions normal z -> c + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zsytrs( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descX; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = 0; // flops_zsytrs( N, NRHS ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + hres = CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + assert( hres == 0 ); + hres = CHAMELEON_Desc_Create( + &descX, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, NRHS, 0, 0, N, NRHS, P, Q ); + assert( hres == 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + hres = CHAMELEON_zsytrf_Tile( uplo, descA ); + assert( hres == 0 ); + + /* Calculates the solution */ + START_TIMING( t ); + hres = CHAMELEON_zsytrs_Tile( uplo, descA, descX ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and residue */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAM_desc_t *descB = CHAMELEON_Desc_Copy( descX, NULL ); + + CHAMELEON_zplgsy_Tile( (double)N, uplo, descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( ChamSymmetric, ChamNoTrans, uplo, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descX ); + + run_id++; + return hres; +} + +testing_t test_zsytrs; +const char *zsytrs_params[] = { "nb", "uplo", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +const char *zsytrs_output[] = { NULL }; +const char *zsytrs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zsytrs_init( void ) __attribute__( ( constructor ) ); +void +testing_zsytrs_init( void ) +{ + test_zsytrs.name = "zsytrs"; + test_zsytrs.helper = "zsytrs"; + test_zsytrs.params = zsytrs_params; + test_zsytrs.output = zsytrs_output; + test_zsytrs.outchk = zsytrs_outchk; + test_zsytrs.params_list = "nb;P;uplo;n;nrhs;lda;ldb;seedA;seedB"; + test_zsytrs.fptr = testing_zsytrs; + test_zsytrs.next = NULL; + + testing_register( &test_zsytrs ); +} diff --git a/new-testing/testing_ztradd.c b/new-testing/testing_ztradd.c new file mode 100644 index 000000000..761a4e0c1 --- /dev/null +++ b/new-testing/testing_ztradd.c @@ -0,0 +1,168 @@ +/** + * + * @file testing_ztradd.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztradd testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-06 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +static cham_fixdbl_t +flops_ztradd( cham_uplo_t uplo, int M, int N ) +{ + cham_fixdbl_t flops = 0.; + int minMN = chameleon_min( M, N ); + switch ( uplo ) { + case ChamUpper: + flops = ( minMN * ( minMN + 1 ) / 2 ) + M * chameleon_max( 0, N - M ); + break; + case ChamLower: + flops = ( minMN * ( minMN + 1 ) / 2 ) + N * chameleon_max( 0, M - N ); + break; + case ChamUpperLower: + default: + flops = M * N; + } + +#if defined( PRECISION_z ) || defined( PRECISION_c ) + /* 2 multiplications and 1 addition per element */ + flops *= ( 2. * 6. + 2. ); +#else + flops *= ( 2. + 1. ); +#endif + + return flops; +} + +int +testing_ztradd( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + int Am, An; + CHAM_desc_t *descA, *descB; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", ( ( trans == ChamNoTrans ) ? M : N ) ); + int LDB = run_arg_get_int( args, "LDB", M ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + CHAMELEON_Complex64_t beta = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_ztradd( uplo, M, N ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + beta = run_arg_get_Complex64( args, "beta", beta ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + if ( trans != ChamNoTrans ) { + Am = N; + An = M; + } + else { + Am = M; + An = N; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, Am, An, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + switch ( uplo ) { + case ChamUpper: + case ChamLower: + if ( trans == ChamNoTrans ) { + CHAMELEON_zplgsy_Tile( 0., uplo, descA, seedA ); + } + else { + cham_uplo_t uplo_inv = ( uplo == ChamUpper ) ? ChamLower : ChamUpper; + CHAMELEON_zplgsy_Tile( 0., uplo_inv, descA, seedA ); + } + CHAMELEON_zplgsy_Tile( 0., uplo, descB, seedB ); + break; + case ChamUpperLower: + default: + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + break; + } + + /* Calculates the sum */ + START_TIMING( t ); + hres = CHAMELEON_ztradd_Tile( uplo, trans, alpha, descA, beta, descB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAM_desc_t *descB0 = CHAMELEON_Desc_Copy( descB, NULL ); + + if ( uplo == ChamUpperLower ) { + CHAMELEON_zplrnt_Tile( descB0, seedB ); + } + else { + CHAMELEON_zplgsy_Tile( 0., uplo, descB0, seedB ); + } + hres += check_zsum( uplo, trans, alpha, descA, beta, descB0, descB ); + + CHAMELEON_Desc_Destroy( &descB0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + + run_id++; + return hres; +} + +testing_t test_ztradd; +const char *ztradd_params[] = { "nb", "trans", "uplo", "m", "n", "lda", + "ldb", "alpha", "beta", "seedA", "seedB", NULL }; +const char *ztradd_output[] = { NULL }; +const char *ztradd_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_ztradd_init( void ) __attribute__( ( constructor ) ); +void +testing_ztradd_init( void ) +{ + test_ztradd.name = "ztradd"; + test_ztradd.helper = "ztradd"; + test_ztradd.params = ztradd_params; + test_ztradd.output = ztradd_output; + test_ztradd.outchk = ztradd_outchk; + test_ztradd.params_list = "nb;P;trans;uplo;m;n;lda;ldb;alpha;beta;seedA;seedB"; + test_ztradd.fptr = testing_ztradd; + test_ztradd.next = NULL; + + testing_register( &test_ztradd ); +} diff --git a/new-testing/testing_ztrmm.c b/new-testing/testing_ztrmm.c new file mode 100644 index 000000000..c14d20ee0 --- /dev/null +++ b/new-testing/testing_ztrmm.c @@ -0,0 +1,122 @@ +/** + * + * @file testing_ztrmm.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrmm testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_ztrmm( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Bm, Bn; + int hres = 0; + CHAM_desc_t *descA, *descB, *descBinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_ztrmm( side, N, K ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculates the dimensions according to the side */ + if ( side == ChamLeft ) { + Bm = N; + Bn = K; + } + else { + Bm = K; + Bn = N; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_ztrmm_Tile( side, uplo, trans, diag, alpha, descA, descB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descBinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q ); + CHAMELEON_zplrnt_Tile( descBinit, seedB ); + + hres += check_ztrmm( CHECK_TRMM, side, uplo, trans, diag, alpha, descA, descB, descBinit ); + + CHAMELEON_Desc_Destroy( &descBinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + + run_id++; + return hres; +} + +testing_t test_ztrmm; +const char *ztrmm_params[] = { "nb", "trans", "side", "uplo", "diag", "n", "k", + "lda", "ldb", "alpha", "seedA", "seedB", NULL }; +const char *ztrmm_output[] = { NULL }; +const char *ztrmm_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_ztrmm_init( void ) __attribute__( ( constructor ) ); +void +testing_ztrmm_init( void ) +{ + test_ztrmm.name = "ztrmm"; + test_ztrmm.helper = "ztrmm"; + test_ztrmm.params = ztrmm_params; + test_ztrmm.output = ztrmm_output; + test_ztrmm.outchk = ztrmm_outchk; + test_ztrmm.params_list = "nb;P;trans;side;uplo;diag;n;k;lda;ldb;alpha;seedA;seedB"; + test_ztrmm.fptr = testing_ztrmm; + test_ztrmm.next = NULL; + + testing_register( &test_ztrmm ); +} diff --git a/new-testing/testing_ztrsm.c b/new-testing/testing_ztrsm.c new file mode 100644 index 000000000..db2e149b5 --- /dev/null +++ b/new-testing/testing_ztrsm.c @@ -0,0 +1,123 @@ +/** + * + * @file testing_ztrsm.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrsm testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-12 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_ztrsm( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Bm, Bn; + int hres = 0; + CHAM_desc_t *descA, *descB, *descBinit; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int N = run_arg_get_int( args, "N", 1000 ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + CHAMELEON_Complex64_t alpha = testing_zalea(); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedB = run_arg_get_int( args, "seedB", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_ztrsm( side, N, K ); + + alpha = run_arg_get_Complex64( args, "alpha", alpha ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Calculates the dimensions according to the side */ + if ( side == ChamLeft ) { + Bm = N; + Bn = K; + } + else { + Bm = K; + Bn = N; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + CHAMELEON_Desc_Create( + &descB, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q ); + + /* Fills the matrix with random values */ + /* We bump a little bit the diagonal to make it stable */ + CHAMELEON_zplgsy_Tile( 2., uplo, descA, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + /* Calculates the product */ + START_TIMING( t ); + hres = CHAMELEON_ztrsm_Tile( side, uplo, trans, diag, alpha, descA, descB ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the solution */ + if ( check ) { + CHAMELEON_Desc_Create( + &descBinit, NULL, ChamComplexDouble, nb, nb, nb * nb, LDB, Bn, 0, 0, Bm, Bn, P, Q ); + CHAMELEON_zplrnt_Tile( descBinit, seedB ); + + hres += check_ztrmm( CHECK_TRSM, side, uplo, trans, diag, alpha, descA, descB, descBinit ); + + CHAMELEON_Desc_Destroy( &descBinit ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descB ); + + run_id++; + return hres; +} + +testing_t test_ztrsm; +const char *ztrsm_params[] = { "nb", "trans", "side", "uplo", "diag", "n", "k", + "lda", "ldb", "alpha", "seedA", "seedB", NULL }; +const char *ztrsm_output[] = { NULL }; +const char *ztrsm_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_ztrsm_init( void ) __attribute__( ( constructor ) ); +void +testing_ztrsm_init( void ) +{ + test_ztrsm.name = "ztrsm"; + test_ztrsm.helper = "ztrsm"; + test_ztrsm.params = ztrsm_params; + test_ztrsm.output = ztrsm_output; + test_ztrsm.outchk = ztrsm_outchk; + test_ztrsm.params_list = "nb;P;trans;side;uplo;diag;n;k;lda;ldb;alpha;seedA;seedB"; + test_ztrsm.fptr = testing_ztrsm; + test_ztrsm.next = NULL; + + testing_register( &test_ztrsm ); +} diff --git a/new-testing/testing_ztrtri.c b/new-testing/testing_ztrtri.c new file mode 100644 index 000000000..f168221e1 --- /dev/null +++ b/new-testing/testing_ztrtri.c @@ -0,0 +1,97 @@ +/** + * + * @file testing_ztrtri.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon ztrtri testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-08-14 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_ztrtri( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + cham_uplo_t uplo = run_arg_get_uplo( args, "uplo", ChamUpper ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int N = run_arg_get_int( args, "N", 1000 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_ztrtri( N ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q ); + + /* Initialises the matrices with the same values */ + CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA ); + + /* Calculates the inversed matrices */ + START_TIMING( t ); + hres = CHAMELEON_ztrtri_Tile( uplo, diag, descA ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the inverse */ + if ( check ) { + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + CHAMELEON_zplghe_Tile( (double)N, uplo, descA0, seedA ); + + hres += check_ztrtri( ChamTriangular, uplo, diag, descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + run_id++; + return hres; +} + +testing_t test_ztrtri; +const char *ztrtri_params[] = { "nb", "uplo", "diag", "n", "lda", "seedA", NULL }; +const char *ztrtri_output[] = { NULL }; +const char *ztrtri_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_ztrtri_init( void ) __attribute__( ( constructor ) ); +void +testing_ztrtri_init( void ) +{ + test_ztrtri.name = "ztrtri"; + test_ztrtri.helper = "ztrtri"; + test_ztrtri.params = ztrtri_params; + test_ztrtri.output = ztrtri_output; + test_ztrtri.outchk = ztrtri_outchk; + test_ztrtri.params_list = "nb;P;uplo;diag;n;lda;seedA"; + test_ztrtri.fptr = testing_ztrtri; + test_ztrtri.next = NULL; + + testing_register( &test_ztrtri ); +} diff --git a/new-testing/testing_zunglq.c b/new-testing/testing_zunglq.c new file mode 100644 index 000000000..d1882ffc1 --- /dev/null +++ b/new-testing/testing_zunglq.c @@ -0,0 +1,116 @@ +/** + * + * @file testing_zunglq.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunglq testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunglq( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descT, *descQ; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunglq( M, N, K ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( K > chameleon_min( M, N ) ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq (K > min(M,N))\n" ); + } + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, K, N, P, Q ); + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + hres = CHAMELEON_zgelqf_Tile( descA, descT ); + + /* Calculates the solution */ + START_TIMING( t ); + CHAMELEON_zunglq_Tile( descA, descT, descQ ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + hres += check_zortho( descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + CHAMELEON_Desc_Destroy( &descQ ); + + run_id++; + return hres; +} + +testing_t test_zunglq; +const char *zunglq_params[] = { "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL }; +const char *zunglq_output[] = { NULL }; +const char *zunglq_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunglq_init( void ) __attribute__( ( constructor ) ); +void +testing_zunglq_init( void ) +{ + test_zunglq.name = "zunglq"; + test_zunglq.helper = "zunglq"; + test_zunglq.params = zunglq_params; + test_zunglq.output = zunglq_output; + test_zunglq.outchk = zunglq_outchk; + test_zunglq.params_list = "nb;ib;P;m;n;k;lda;rh;seedA"; + test_zunglq.fptr = testing_zunglq; + test_zunglq.next = NULL; + + testing_register( &test_zunglq ); +} diff --git a/new-testing/testing_zunglq_hqr.c b/new-testing/testing_zunglq_hqr.c new file mode 100644 index 000000000..12109b3b4 --- /dev/null +++ b/new-testing/testing_zunglq_hqr.c @@ -0,0 +1,127 @@ +/** + * + * @file testing_zunglq_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunglq_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunglq_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT, *descQ; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunglq( M, N, K ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( K > chameleon_min( M, N ) ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for unglq_hqr (K > min(M,N))\n" ); + } + return -1; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, K, N, P, Q ); + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, N, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT ); + + /* Calculates the solution */ + START_TIMING( t ); + CHAMELEON_zunglq_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + hres += check_zortho( descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + CHAMELEON_Desc_Destroy( &descQ ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zunglq_hqr; +const char *zunglq_hqr_params[] = { "nb", "ib", "m", "n", "k", "lda", "qra", + "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; +const char *zunglq_hqr_output[] = { NULL }; +const char *zunglq_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunglq_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zunglq_hqr_init( void ) +{ + test_zunglq_hqr.name = "zunglq_hqr"; + test_zunglq_hqr.helper = "zunglq_hqr"; + test_zunglq_hqr.params = zunglq_hqr_params; + test_zunglq_hqr.output = zunglq_hqr_output; + test_zunglq_hqr.outchk = zunglq_hqr_outchk; + test_zunglq_hqr.params_list = "nb;ib;P;m;n;k;lda;qra;qrp;llvl;hlvl;domino;seedA"; + test_zunglq_hqr.fptr = testing_zunglq_hqr; + test_zunglq_hqr.next = NULL; + + testing_register( &test_zunglq_hqr ); +} diff --git a/new-testing/testing_zungqr.c b/new-testing/testing_zungqr.c new file mode 100644 index 000000000..d41176458 --- /dev/null +++ b/new-testing/testing_zungqr.c @@ -0,0 +1,116 @@ +/** + * + * @file testing_zungqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zungqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zungqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descT, *descQ; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int RH = run_arg_get_int( args, "qra", 0 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zungqr( M, N, K ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( K > chameleon_min( M, N ) ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr (K > min(M,N))\n" ); + } + return -1; + } + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, K, 0, 0, M, K, P, Q ); + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + hres = CHAMELEON_zgeqrf_Tile( descA, descT ); + + /* Calculates the solution */ + START_TIMING( t ); + CHAMELEON_zungqr_Tile( descA, descT, descQ ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + hres += check_zortho( descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + CHAMELEON_Desc_Destroy( &descQ ); + + run_id++; + return hres; +} + +testing_t test_zungqr; +const char *zungqr_params[] = { "nb", "ib", "m", "n", "k", "lda", "qra", "seedA", NULL }; +const char *zungqr_output[] = { NULL }; +const char *zungqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zungqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zungqr_init( void ) +{ + test_zungqr.name = "zungqr"; + test_zungqr.helper = "zungqr"; + test_zungqr.params = zungqr_params; + test_zungqr.output = zungqr_output; + test_zungqr.outchk = zungqr_outchk; + test_zungqr.params_list = "nb;ib;P;m;n;k;lda;rh;seedA"; + test_zungqr.fptr = testing_zungqr; + test_zungqr.next = NULL; + + testing_register( &test_zungqr ); +} diff --git a/new-testing/testing_zungqr_hqr.c b/new-testing/testing_zungqr_hqr.c new file mode 100644 index 000000000..bf89f256e --- /dev/null +++ b/new-testing/testing_zungqr_hqr.c @@ -0,0 +1,127 @@ +/** + * + * @file testing_zungqr_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zungqr_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-09-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zungqr_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT, *descQ; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zungqr( M, N, K ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( K > chameleon_min( M, N ) ) { + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, "SKIPPED: Incorrect parameters for ungqr_hqr (K > min(M,N))\n" ); + } + return -1; + } + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, K, 0, 0, M, K, P, Q ); + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( M, K, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT ); + + /* Calculates the solution */ + START_TIMING( t ); + CHAMELEON_zungqr_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + hres += check_zortho( descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + CHAMELEON_Desc_Destroy( &descQ ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zungqr_hqr; +const char *zungqr_hqr_params[] = { "nb", "ib", "m", "n", "k", "lda", "qra", + "qrp", "llvl", "hlvl", "domino", "seedA", NULL }; +const char *zungqr_hqr_output[] = { NULL }; +const char *zungqr_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zungqr_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zungqr_hqr_init( void ) +{ + test_zungqr_hqr.name = "zungqr_hqr"; + test_zungqr_hqr.helper = "zungqr_hqr"; + test_zungqr_hqr.params = zungqr_hqr_params; + test_zungqr_hqr.output = zungqr_hqr_output; + test_zungqr_hqr.outchk = zungqr_hqr_outchk; + test_zungqr_hqr.params_list = "nb;ib;P;m;n;k;lda;qra;qrp;llvl;hlvl;domino;seedA"; + test_zungqr_hqr.fptr = testing_zungqr_hqr; + test_zungqr_hqr.next = NULL; + + testing_register( &test_zungqr_hqr ); +} diff --git a/new-testing/testing_zunmlq.c b/new-testing/testing_zunmlq.c new file mode 100644 index 000000000..500110f19 --- /dev/null +++ b/new-testing/testing_zunmlq.c @@ -0,0 +1,135 @@ +/** + * + * @file testing_zunmlq.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmlq testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-11-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunmlq( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int An; + int hres = 0; + CHAM_desc_t *descA, *descT, *descC; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", K ); + int LDC = run_arg_get_int( args, "LDC", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunmlq( side, M, N, K ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Calculates the dimensions according to the transposition and the side */ + An = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, K, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgelqf_Tile( descA, descT ); + assert( hres == 0 ); + + /* Computes unmlq */ + START_TIMING( t ); + hres = CHAMELEON_zunmlq_Tile( side, trans, descA, descT, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descC0 = CHAMELEON_Desc_Copy( descC, NULL ); + CHAM_desc_t *descQ; + + CHAMELEON_zplrnt_Tile( descC0, seedC ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, An, An, 0, 0, An, An, P, Q ); + CHAMELEON_zunglq_Tile( descA, descT, descQ ); + + hres = check_zqc( side, trans, descC0, descQ, descC ); + + CHAMELEON_Desc_Destroy( &descC0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zunmlq; +const char *zunmlq_params[] = { "nb", "ib", "side", "trans", "m", "n", "k", + "lda", "ldc", "qra", "seedA", "seedC", NULL }; +const char *zunmlq_output[] = { NULL }; +const char *zunmlq_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunmlq_init( void ) __attribute__( ( constructor ) ); +void +testing_zunmlq_init( void ) +{ + test_zunmlq.name = "zunmlq"; + test_zunmlq.helper = "zunmlq"; + test_zunmlq.params = zunmlq_params; + test_zunmlq.output = zunmlq_output; + test_zunmlq.outchk = zunmlq_outchk; + test_zunmlq.params_list = "nb;ib;P;side;trans;m;n;k;lda;ldc;rh;seedA;seedC"; + test_zunmlq.fptr = testing_zunmlq; + test_zunmlq.next = NULL; + + testing_register( &test_zunmlq ); +} diff --git a/new-testing/testing_zunmlq_hqr.c b/new-testing/testing_zunmlq_hqr.c new file mode 100644 index 000000000..7214546eb --- /dev/null +++ b/new-testing/testing_zunmlq_hqr.c @@ -0,0 +1,147 @@ +/** + * + * @file testing_zunmlq_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmlq_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-11-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunmlq_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int An; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT, *descC; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", N ); + int LDA = run_arg_get_int( args, "LDA", K ); + int LDC = run_arg_get_int( args, "LDC", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunmlq( side, M, N, K ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Calculates the dimensions according to the transposition and the side */ + An = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, An, 0, 0, K, An, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( K, An, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_LQ, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgelqf_param_Tile( &qrtree, descA, descTS, descTT ); + assert( hres == 0 ); + + /* Computes unmlq_hqr */ + START_TIMING( t ); + hres = CHAMELEON_zunmlq_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descC0 = CHAMELEON_Desc_Copy( descC, NULL ); + CHAM_desc_t *descQ; + + CHAMELEON_zplrnt_Tile( descC0, seedC ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, An, An, 0, 0, An, An, P, Q ); + CHAMELEON_zunglq_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + + hres = check_zqc( side, trans, descC0, descQ, descC ); + + CHAMELEON_Desc_Destroy( &descC0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + CHAMELEON_Desc_Destroy( &descC ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zunmlq_hqr; +const char *zunmlq_hqr_params[] = { "nb", "ib", "side", "trans", "m", "n", + "k", "lda", "ldc", "qra", "qrp", "llvl", + "hlvl", "domino", "seedA", "seedC", NULL }; +const char *zunmlq_hqr_output[] = { NULL }; +const char *zunmlq_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunmlq_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zunmlq_hqr_init( void ) +{ + test_zunmlq_hqr.name = "zunmlq_hqr"; + test_zunmlq_hqr.helper = "zunmlq_hqr"; + test_zunmlq_hqr.params = zunmlq_hqr_params; + test_zunmlq_hqr.output = zunmlq_hqr_output; + test_zunmlq_hqr.outchk = zunmlq_hqr_outchk; + test_zunmlq_hqr.params_list = + "nb;ib;P;side;trans;m;n;k;lda;ldc;qra;qrp;llvl;hlvl;domino;seedA;seedC"; + test_zunmlq_hqr.fptr = testing_zunmlq_hqr; + test_zunmlq_hqr.next = NULL; + + testing_register( &test_zunmlq_hqr ); +} diff --git a/new-testing/testing_zunmqr.c b/new-testing/testing_zunmqr.c new file mode 100644 index 000000000..3c9211600 --- /dev/null +++ b/new-testing/testing_zunmqr.c @@ -0,0 +1,135 @@ +/** + * + * @file testing_zunmqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-11-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunmqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am; + int hres = 0; + CHAM_desc_t *descA, *descT, *descC; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", ( side == ChamLeft ) ? M : N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int RH = run_arg_get_int( args, "qra", 4 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunmqr( side, M, N, K ); + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + if ( RH > 0 ) { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamTreeHouseholder ); + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_SIZE, RH ); + } + else { + CHAMELEON_Set( CHAMELEON_HOUSEHOLDER_MODE, ChamFlatHouseholder ); + } + + /* Calculates the dimensions according to the transposition and the side */ + Am = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, K, 0, 0, Am, K, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descT, P, Q ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgeqrf_Tile( descA, descT ); + assert( hres == 0 ); + + /* Computes unmqr */ + START_TIMING( t ); + hres = CHAMELEON_zunmqr_Tile( side, trans, descA, descT, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descC0 = CHAMELEON_Desc_Copy( descC, NULL ); + CHAM_desc_t *descQ; + + CHAMELEON_zplrnt_Tile( descC0, seedC ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, Am, Am, 0, 0, Am, Am, P, Q ); + CHAMELEON_zungqr_Tile( descA, descT, descQ ); + + hres = check_zqc( side, trans, descC0, descQ, descC ); + + CHAMELEON_Desc_Destroy( &descC0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descT ); + CHAMELEON_Desc_Destroy( &descC ); + + run_id++; + return hres; +} + +testing_t test_zunmqr; +const char *zunmqr_params[] = { "nb", "ib", "side", "trans", "m", "n", "k", + "lda", "ldc", "qra", "seedA", "seedC", NULL }; +const char *zunmqr_output[] = { NULL }; +const char *zunmqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunmqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zunmqr_init( void ) +{ + test_zunmqr.name = "zunmqr"; + test_zunmqr.helper = "zunmqr"; + test_zunmqr.params = zunmqr_params; + test_zunmqr.output = zunmqr_output; + test_zunmqr.outchk = zunmqr_outchk; + test_zunmqr.params_list = "nb;ib;P;side;trans;m;n;k;lda;ldc;rh;seedA;seedC"; + test_zunmqr.fptr = testing_zunmqr; + test_zunmqr.next = NULL; + + testing_register( &test_zunmqr ); +} diff --git a/new-testing/testing_zunmqr_hqr.c b/new-testing/testing_zunmqr_hqr.c new file mode 100644 index 000000000..791cc9d7a --- /dev/null +++ b/new-testing/testing_zunmqr_hqr.c @@ -0,0 +1,147 @@ +/** + * + * @file testing_zunmqr_hqr.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zunmqr_hqr testing + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-11-09 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <assert.h> +#include "testing_zauxiliary.h" +#include "testing_zcheck.h" +#include "flops.h" + +int +testing_zunmqr_hqr( run_arg_list_t *args, int check ) +{ + static int run_id = 0; + int Am; + int hres = 0; + CHAM_desc_t *descA, *descTS, *descTT, *descC; + + /* Reads arguments */ + int nb = run_arg_get_int( args, "nb", 320 ); + int ib = run_arg_get_int( args, "ib", 48 ); + int P = parameters_getvalue_int( "P" ); + cham_side_t side = run_arg_get_uplo( args, "side", ChamLeft ); + cham_trans_t trans = run_arg_get_trans( args, "trans", ChamNoTrans ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int K = run_arg_get_int( args, "K", chameleon_min( M, N ) ); + int LDA = run_arg_get_int( args, "LDA", ( side == ChamLeft ) ? M : N ); + int LDC = run_arg_get_int( args, "LDC", M ); + int qr_a = run_arg_get_int( args, "qra", -1 ); + int qr_p = run_arg_get_int( args, "qrp", -1 ); + int llvl = run_arg_get_int( args, "llvl", -1 ); + int hlvl = run_arg_get_int( args, "hlvl", -1 ); + int domino = run_arg_get_int( args, "domino", -1 ); + int seedA = run_arg_get_int( args, "seedA", random() ); + int seedC = run_arg_get_int( args, "seedC", random() ); + int Q = parameters_compute_q( P ); + cham_fixdbl_t t, gflops; + cham_fixdbl_t flops = flops_zunmqr( side, M, N, K ); + + libhqr_tree_t qrtree; + libhqr_matrix_t matrix; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Calculates the dimensions according to the transposition and the side */ + Am = ( side == ChamLeft ) ? M : N; + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, NULL, ChamComplexDouble, nb, nb, nb * nb, LDA, K, 0, 0, Am, K, P, Q ); + CHAMELEON_Desc_Create( + &descC, NULL, ChamComplexDouble, nb, nb, nb * nb, LDC, N, 0, 0, M, N, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descTS, P, Q ); + CHAMELEON_Alloc_Workspace_zgels( Am, K, &descTT, P, Q ); + + /* Initialize matrix tree */ + matrix.mt = descTS->mt; + matrix.nt = descTS->nt; + matrix.nodes = P * Q; + matrix.p = P; + + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, 0 ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descC, seedC ); + + /* Computes the factorization */ + hres = CHAMELEON_zgeqrf_param_Tile( &qrtree, descA, descTS, descTT ); + assert( hres == 0 ); + + /* Computes unmqr_hqr */ + START_TIMING( t ); + hres = CHAMELEON_zunmqr_param_Tile( &qrtree, side, trans, descA, descTS, descTT, descC ); + STOP_TIMING( t ); + gflops = flops * 1.e-9 / t; + run_arg_add_fixdbl( args, "time", t ); + run_arg_add_fixdbl( args, "gflops", gflops ); + + /* Checks the factorisation and orthogonality */ + if ( check ) { + CHAM_desc_t *descC0 = CHAMELEON_Desc_Copy( descC, NULL ); + CHAM_desc_t *descQ; + + CHAMELEON_zplrnt_Tile( descC0, seedC ); + + CHAMELEON_Desc_Create( + &descQ, NULL, ChamComplexDouble, nb, nb, nb * nb, Am, Am, 0, 0, Am, Am, P, Q ); + CHAMELEON_zungqr_param_Tile( &qrtree, descA, descTS, descTT, descQ ); + + hres = check_zqc( side, trans, descC0, descQ, descC ); + + CHAMELEON_Desc_Destroy( &descC0 ); + CHAMELEON_Desc_Destroy( &descQ ); + } + + CHAMELEON_Desc_Destroy( &descA ); + CHAMELEON_Desc_Destroy( &descTS ); + CHAMELEON_Desc_Destroy( &descTT ); + CHAMELEON_Desc_Destroy( &descC ); + libhqr_finalize( &qrtree ); + + run_id++; + return hres; +} + +testing_t test_zunmqr_hqr; +const char *zunmqr_hqr_params[] = { "nb", "ib", "side", "trans", "m", "n", + "k", "lda", "ldc", "qra", "qrp", "llvl", + "hlvl", "domino", "seedA", "seedC", NULL }; +const char *zunmqr_hqr_output[] = { NULL }; +const char *zunmqr_hqr_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zunmqr_hqr_init( void ) __attribute__( ( constructor ) ); +void +testing_zunmqr_hqr_init( void ) +{ + test_zunmqr_hqr.name = "zunmqr_hqr"; + test_zunmqr_hqr.helper = "zunmqr_hqr"; + test_zunmqr_hqr.params = zunmqr_hqr_params; + test_zunmqr_hqr.output = zunmqr_hqr_output; + test_zunmqr_hqr.outchk = zunmqr_hqr_outchk; + test_zunmqr_hqr.params_list = + "nb;ib;P;side;trans;m;n;k;lda;ldc;qra;qrp;llvl;hlvl;domino;seedA;seedC"; + test_zunmqr_hqr.fptr = testing_zunmqr_hqr; + test_zunmqr_hqr.next = NULL; + + testing_register( &test_zunmqr_hqr ); +} diff --git a/new-testing/testings.h b/new-testing/testings.h new file mode 100644 index 000000000..b0ec579bb --- /dev/null +++ b/new-testing/testings.h @@ -0,0 +1,217 @@ +/** + * + * @file testings.h + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + *** + * + * @brief Chameleon auxiliary routines for testing structures + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @date 2019-07-18 + * + */ +#ifndef _testings_h_ +#define _testings_h_ + +#include <chameleon.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <math.h> +#include <string.h> +#include <strings.h> + +typedef enum valtype_ { + TestValInt, + TestValFloat, + TestValDouble, + TestValComplex32, + TestValComplex64, + TestTrans, + TestUplo, + TestDiag, + TestSide, + TestNormtype, + TestString, +} valtype_e; + +/* Define to avoid conversion */ +#define TestValFixdbl TestValDouble + +union val_u { + int ival; + cham_trans_t trans; + cham_uplo_t uplo; + cham_diag_t diag; + cham_side_t side; + cham_normtype_t ntype; + CHAMELEON_Complex64_t zval; + CHAMELEON_Complex32_t cval; + double dval; + float sval; + char *str; +}; +typedef union val_u val_t; + +/** + * @brief Defines a values that can be taken by any parameter + */ +struct vallist_s; +typedef struct vallist_s vallist_t; + +struct vallist_s { + val_t value; /**< Value of the parameter */ + vallist_t *next; /**< Pointer to the next parameter */ +}; + +#define PARAM_OPTION (1 << 0) +#define PARAM_INPUT (1 << 1) +#define PARAM_OUTPUT (1 << 2) + +typedef val_t (*read_fct_t)( const char * ); +typedef char *(*sprint_fct_t)( val_t, int, int, char * ); + +/** + * @brief Defines the possible testing parameters used to define the tests + */ +typedef struct parameter_s { + const char *name; /**< Name of the option */ + const char *helper; /**< Helper string for the usage function */ + int shname; /**< The associated short option */ + int flags; /**< Flags to define if it is: an option, an input, an output */ + int has_arg; /**< Defines the number of arguments: 0- no arguments, 1- A single argument for all test, 2- multiple arguments possible */ + int psize; /**< Number of characters to printed in the human readable format */ + valtype_e valtype; /**< Type of the argument if has_arg > 1, unread otherwise */ + val_t value; /**< Default value if has_arg < 2 */ + vallist_t *vallist; /**< List of values for input parameters (has_arg >= 2) */ + read_fct_t read; /**< Parser for one argument */ + sprint_fct_t sprint; /**< Sprint function for one argument */ +} parameter_t; + +/** + * @brief Defines a single parameter for one test case. The parameters are + * chained together through a NULL terminated list. + */ +struct run_arg_s; +typedef struct run_arg_s run_arg_t; + +struct run_arg_s { + parameter_t *param; /**< Pointer to the testing parameter information */ + val_t value; /**< Value of the parameter */ + run_arg_t *next; /**< Pointer to the next parameter */ +}; + +typedef struct run_arg_list_s { + run_arg_t *head; + run_arg_t *tail; +} run_arg_list_t; + +/** + * @brief Defines a single run to perform. The tests are chained together by a + * null terminated list. + */ +struct run_list_elt_s; +typedef struct run_list_elt_s run_list_elt_t; + +struct run_list_elt_s { + run_arg_list_t args; /**< List of parameters defining the run */ + run_list_elt_t *next; /**< Pointer to the next run description */ +}; + +typedef struct run_list_s { + run_list_elt_t *head; /**< List of parameters defining the run */ + run_list_elt_t *tail; /**< Pointer to the next run description */ +} run_list_t; + +/** + * @brief Defines the possible testing available for the given precision + * + * This is a NULL terminated list. + */ +struct testing_; +typedef struct testing_ testing_t; +typedef int (*test_fct_t)( run_arg_list_t *, int ); + +typedef struct testing_ { + const char *name; /**< Name of the operation tested */ + const char *helper; /**< Helper of the function tested for the usage */ + const char **params; /**< Parameters used by the operation */ + const char **output; /**< Parameters used by the operation */ + const char **outchk; /**< Parameters used by the operation */ + const char *params_list; /**< Parameters used by the operation */ + test_fct_t fptr; /**< Function performing the test */ + testing_t *next; /**< Pointer to following test */ +} testing_t; + +val_t pread_int ( const char *str ); +val_t pread_float ( const char *str ); +val_t pread_double ( const char *str ); +val_t pread_complex32( const char *str ); +val_t pread_complex64( const char *str ); +val_t pread_trans ( const char *str ); +val_t pread_uplo ( const char *str ); +val_t pread_diag ( const char *str ); +val_t pread_side ( const char *str ); +val_t pread_norm ( const char *str ); +val_t pread_string ( const char *str ); + +#define pread_fixdbl pread_double + +char *sprint_int ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_float ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_double ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_complex32( val_t val, int human, int nbchar, char *str_in ); +char *sprint_complex64( val_t val, int human, int nbchar, char *str_in ); +char *sprint_trans ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_uplo ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_diag ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_side ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_norm ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_string ( val_t val, int human, int nbchar, char *str_in ); +char *sprint_check ( val_t val, int human, int nbchar, char *str_in ); + +#define sprint_fixdbl sprint_double + +float testing_salea(); +double testing_dalea(); +CHAMELEON_Complex32_t testing_calea(); +CHAMELEON_Complex64_t testing_zalea(); + +const run_arg_t *run_arg_get_byname( const run_arg_list_t *arglist, const char *name ); + +int run_arg_get_int ( run_arg_list_t *arglist, const char *name, int defval ); +float run_arg_get_float ( run_arg_list_t *arglist, const char *name, float defval ); +double run_arg_get_double ( run_arg_list_t *arglist, const char *name, double defval ); +CHAMELEON_Complex32_t run_arg_get_Complex32( run_arg_list_t *arglist, const char *name, CHAMELEON_Complex32_t defval ); +CHAMELEON_Complex64_t run_arg_get_Complex64( run_arg_list_t *arglist, const char *name, CHAMELEON_Complex64_t defval ); +cham_trans_t run_arg_get_trans ( run_arg_list_t *arglist, const char *name, cham_trans_t defval ); +cham_uplo_t run_arg_get_uplo ( run_arg_list_t *arglist, const char *name, cham_uplo_t defval ); +cham_diag_t run_arg_get_diag ( run_arg_list_t *arglist, const char *name, cham_diag_t defval ); +cham_side_t run_arg_get_side ( run_arg_list_t *arglist, const char *name, cham_side_t defval ); +cham_normtype_t run_arg_get_ntype ( run_arg_list_t *arglist, const char *name, cham_normtype_t defval ); + +int run_arg_add_int ( run_arg_list_t *arglist, const char *name, int defval ); +int run_arg_add_double( run_arg_list_t *arglist, const char *name, double defval ); +#define run_arg_add_fixdbl run_arg_add_double + +void run_print_header( const testing_t *test, int check, int human ); +void run_print_line( const testing_t *test, const run_arg_list_t *arglist, + int check, int human, int id ); + +void parameters_read( parameter_t *param, const char *values ); +void parameters_read_file( const char *filename ); +parameter_t *parameters_getbyname( const char *name ); +void parameters_addvalues( parameter_t *param, const char *values ); +int parameters_getvalue_int( const char *name ); +int parameters_compute_q( int p ); +parameter_t *parameters_get( int shname ); + +run_list_t *run_list_generate( const char **params ); +void run_list_destroy( run_list_elt_t *run ); + +void testing_register( testing_t *test ); + +#endif /* _testings_h_ */ diff --git a/new-testing/values.c b/new-testing/values.c new file mode 100644 index 000000000..4fe9e6141 --- /dev/null +++ b/new-testing/values.c @@ -0,0 +1,616 @@ +/** + * + * @file values.c + * + * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + *** + * + * @brief Chameleon testing values toutine to read/print the parameters + * + * @version 0.9.2 + * @author Lucas Barros de Assis + * @author Mathieu Faverge + * @date 2019-07-18 + * + */ +#include "testings.h" +#include <coreblas.h> + +/** + * @brief Convert the input string to an integer + * @param[in] str + * The input string + * @return The integer read. + */ +val_t pread_int( const char *str ) +{ + val_t val; + val.ival = atoi( str ); + fprintf( stderr, " %d", val.ival ); + return val; +} + +/** + * @brief Convert the input string to a float + * @param[in] str + * The input string + * @return The float read. + */ +val_t pread_float( const char *str ) +{ + val_t val; + val.sval = strtof( str, NULL ); + fprintf( stderr, " %e", val.sval ); + return val; +} + +/** + * @brief Convert the input string to a double + * @param[in] str + * The input string + * @return The double read. + */ +val_t pread_double( const char *str ) +{ + val_t val; + val.dval = strtod( str, NULL ); + fprintf( stderr, " %le", val.dval ); + return val; +} + +/** + * @brief Convert the input string to a complex single precision + * @param[in] str + * The input string + * @return The complex single precision read. + */ +val_t pread_complex32( const char *str ) +{ + float re, im; + val_t val; + int rc; + + rc = sscanf( str, "%e,%e", &re, &im ); + + if ( rc == 2 ) { + val.cval = re + I * im; + } + else if (rc == 1){ + val.cval = re; + } + else { + val.cval = nan("NaN"); + } + + fprintf( stderr, " (%le,%le)", crealf(val.cval), cimagf(val.cval) ); + return val; +} + +/** + * @brief Convert the input string to a complex double precision + * @param[in] str + * The input string + * @return The complex double precision read. + */ +val_t pread_complex64( const char *str ) +{ + double re, im; + val_t val; + int rc; + + rc = sscanf( str, "%le,%le", &re, &im ); + + if ( rc == 2 ) { + val.zval = re + I * im; + } + else if (rc == 1){ + val.zval = re; + } + else { + val.zval = nan("NaN"); + } + + fprintf( stderr, " (%le,%le)", crealf(val.zval), cimagf(val.zval) ); + return val; +} + +/** + * @brief Convert the input string to a cham_trans_t + * @param[in] str + * The input string + * @return The cham_trans_t read. + */ +val_t pread_trans( const char *str ) +{ + val_t val; + val.trans = ChamNoTrans; + + if ( ( strcasecmp( "ChamConjTrans", str ) == 0 ) || + ( strcasecmp( "ConjTrans", str ) == 0 ) ) + { + val.trans = ChamConjTrans; + } + else if ( ( strcasecmp( "ChamTrans", str ) == 0 ) || + ( strcasecmp( "Trans", str ) == 0 ) ) + { + val.trans = ChamTrans; + } + else if ( ( strcasecmp( "ChamNoTrans", str ) == 0 ) || + ( strcasecmp( "NoTrans", str ) == 0 ) ) + { + val.trans = ChamNoTrans; + } + else { + int v = atoi( str ); + if ( (v == ChamConjTrans) || (v == (ChamConjTrans-ChamNoTrans)) ) { + val.trans = ChamConjTrans; + } + else if ( (v == ChamTrans) || (v == (ChamTrans-ChamNoTrans)) ) { + val.trans = ChamTrans; + } + else { + val.trans = ChamNoTrans; + } + } + fprintf( stderr, " %d", val.trans ); + return val; +} + +/** + * @brief Convert the input string to a cham_uplo_t + * @param[in] str + * The input string + * @return The cham_uplo_t read. + */ +val_t pread_uplo( const char *str ) +{ + val_t val; + val.uplo = ChamUpperLower; + + if ( ( strcasecmp( "ChamUpper", str ) == 0 ) || + ( strcasecmp( "Upper", str ) == 0 ) ) + { + val.uplo = ChamUpper; + } + else if ( ( strcasecmp( "ChamLower", str ) == 0 ) || + ( strcasecmp( "Lower", str ) == 0 ) ) + { + val.uplo = ChamLower; + } + else if ( ( strcasecmp( "ChamUpperLower", str ) == 0 ) || + ( strcasecmp( "UpperLower", str ) == 0 ) || + ( strcasecmp( "General", str ) == 0 ) ) + { + val.uplo = ChamUpperLower; + } + else { + int v = atoi( str ); + if ( (v == ChamUpper) || (v == 0) ) { + val.uplo = ChamUpper; + } + else if ( (v == ChamLower) || (v == (ChamLower-ChamUpper)) ) { + val.uplo = ChamLower; + } + else { + val.uplo = ChamUpperLower; + } + } + fprintf( stderr, " %d", val.uplo ); + return val; +} + +/** + * @brief Convert the input string to a cham_diag_t + * @param[in] str + * The input string + * @return The cham_diag_t read. + */ +val_t pread_diag( const char *str ) +{ + val_t val; + val.diag = ChamNonUnit; + + if ( ( strcasecmp( "ChamNonUnit", str ) == 0 ) || + ( strcasecmp( "NonUnit", str ) == 0 ) ) + { + val.diag = ChamNonUnit; + } + else if ( ( strcasecmp( "ChamUnit", str ) == 0 ) || + ( strcasecmp( "Unit", str ) == 0 ) ) + { + val.diag = ChamUnit; + } + else { + int v = atoi( str ); + if ( (v == ChamUnit) || (v == (ChamUnit-ChamNonUnit)) ) { + val.diag = ChamUnit; + } + else { + val.diag = ChamNonUnit; + } + } + fprintf( stderr, " %d", val.diag ); + return val; +} + +/** + * @brief Convert the input string to a cham_side_t + * @param[in] str + * The input string + * @return The cham_side_t read. + */ +val_t pread_side( const char *str ) +{ + val_t val; + val.side = ChamLeft; + + if ( ( strcasecmp( "ChamLeft", str ) == 0 ) || + ( strcasecmp( "Left", str ) == 0 ) ) + { + val.side = ChamLeft; + } + else if ( ( strcasecmp( "ChamRight", str ) == 0 ) || + ( strcasecmp( "Right", str ) == 0 ) ) + { + val.side = ChamRight; + } + else { + int v = atoi( str ); + if ( (v == ChamRight) || (v == (ChamRight-ChamLeft)) ) { + val.side = ChamRight; + } + else { + val.side = ChamLeft; + } + } + fprintf( stderr, " %d", val.side ); + return val; +} + +/** + * @brief Convert the input string to a cham_normtype_t + * @param[in] str + * The input string + * @return The cham_normtype_t read. + */ +val_t pread_norm( const char *str ) +{ + val_t val; + val.ntype = ChamOneNorm; + + if ( ( strcasecmp( "ChamOneNorm", str ) == 0 ) || + ( strcasecmp( "OneNorm", str ) == 0 ) ) + { + val.ntype = ChamOneNorm; + } + else if ( ( strcasecmp( "ChamFrobeniusNorm", str ) == 0 ) || + ( strcasecmp( "FrobeniusNorm", str ) == 0 ) ) + { + val.ntype = ChamFrobeniusNorm; + } + else if ( ( strcasecmp( "ChamInfNorm", str ) == 0 ) || + ( strcasecmp( "InfNorm", str ) == 0 ) ) + { + val.ntype = ChamInfNorm; + } + else if ( ( strcasecmp( "ChamMaxNorm", str ) == 0 ) || + ( strcasecmp( "MaxNorm", str ) == 0 ) ) + { + val.ntype = ChamMaxNorm; + } + else { + int v = atoi( str ); + if ( (v == ChamMaxNorm) || (v == (ChamMaxNorm-ChamOneNorm)) ) { + val.ntype = ChamMaxNorm; + } + else if ( (v == ChamInfNorm) || (v == (ChamInfNorm-ChamOneNorm)) ) { + val.ntype = ChamInfNorm; + } + else if ( (v == ChamFrobeniusNorm) || (v == (ChamFrobeniusNorm-ChamOneNorm)) ) { + val.ntype = ChamFrobeniusNorm; + } + else { + val.ntype = ChamOneNorm; + } + } + fprintf( stderr, " %d", val.ntype ); + return val; +} + +/** + * @brief Convert the input string to a string + * @param[in] str + * The input string + * @return The string read. + */ +val_t pread_string( const char *str ) +{ + val_t val; + val.str = strdup( str ); + fprintf( stderr, " %s", val.str ); + return val; +} + +/** + * @brief Convert the input string to an integer + * @param[in] str + * The input string + * @return The integer read. + */ +char *sprint_int( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %*d", nbchar, val.ival ); + } + else { + rc = sprintf( str_in, ";%d", val.ival ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a float + * @param[in] str + * The input string + * @return The float read. + */ +char *sprint_float( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %*e", nbchar, val.sval ); + } + else { + rc = sprintf( str_in, ";%e", val.sval ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a double + * @param[in] str + * The input string + * @return The double read. + */ +char *sprint_double( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %*e", nbchar, val.dval ); + } + else { + rc = sprintf( str_in, ";%e", val.dval ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a complex single precision + * @param[in] str + * The input string + * @return The complex single precision read. + */ +char *sprint_complex32( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %e,%e", crealf(val.cval), cimagf(val.cval) ); + } + else { + rc = sprintf( str_in, ";%e,%e", crealf(val.cval), cimagf(val.cval) ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a complex double precision + * @param[in] str + * The input string + * @return The complex double precision read. + */ +char *sprint_complex64( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %e,%e", creal(val.zval), cimag(val.zval) ); + } + else { + rc = sprintf( str_in, ";%e,%e", creal(val.zval), cimag(val.zval) ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a cham_trans_t + * @param[in] str + * The input string + * @return The cham_trans_t read. + */ +char *sprint_trans( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %-*s", nbchar, + (val.trans == ChamConjTrans) ? "ConjTrans" : + ((val.trans == ChamTrans) ? "Trans" : "NoTrans") ); + } + else { + rc = sprintf( str_in, ";%d", val.trans ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a cham_uplo_t + * @param[in] str + * The input string + * @return The cham_uplo_t read. + */ +char *sprint_uplo( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %-*s", nbchar, + (val.uplo == ChamUpper) ? "Upper" : + ((val.uplo == ChamLower) ? "Lower" : "General") ); + } + else { + rc = sprintf( str_in, ";%d", val.uplo ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a cham_diag_t + * @param[in] str + * The input string + * @return The cham_diag_t read. + */ +char *sprint_diag( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %-*s", nbchar, + (val.diag == ChamUnit) ? "Unit" : "NonUnit" ); + } + else { + rc = sprintf( str_in, ";%d", val.diag ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a cham_side_t + * @param[in] str + * The input string + * @return The cham_side_t read. + */ +char *sprint_side( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %-*s", nbchar, + (val.side == ChamLeft) ? "Left" : "Right" ); + } + else { + rc = sprintf( str_in, ";%d", val.side ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a cham_normtype_t + * @param[in] str + * The input string + * @return The cham_normtype_t read. + */ +char *sprint_norm( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + char *name; + switch( val.ntype ) { + case ChamMaxNorm: + name = "Max"; + break; + case ChamOneNorm: + name = "One"; + break; + case ChamInfNorm: + name = "Inf"; + break; + case ChamFrobeniusNorm: + name = "Frb"; + break; + default: + name = "ERR"; + } + rc = sprintf( str_in, " %-*s", nbchar, name ); + } + else { + rc = sprintf( str_in, ";%d", val.ntype ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a string + * @param[in] str + * The input string + * @return The string read. + */ +char *sprint_check( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %*s", nbchar, ( val.ival == 0 ) ? "SUCCESS" : "FAILED" ); + } + else { + rc = sprintf( str_in, ";%s", ( val.ival == 0 ) ? "SUCCESS" : "FAILED" ); + } + return str_in+rc; +} + +/** + * @brief Convert the input string to a string + * @param[in] str + * The input string + * @return The string read. + */ +char *sprint_string( val_t val, int human, int nbchar, char *str_in ) +{ + int rc; + if ( human ) { + rc = sprintf( str_in, " %-*s", nbchar, val.str ); + } + else { + rc = sprintf( str_in, ";%s", val.str ); + } + return str_in+rc; +} + +/** + * @brief Generate a random float + */ +float +testing_salea() +{ + float val; + CORE_splrnt( 1, 1, &val, 1, 1, 1, 0, random() ); + return val; +} + +/** + * @brief Generate a random double + */ +double +testing_dalea() +{ + double val; + CORE_dplrnt( 1, 1, &val, 1, 1, 1, 0, random() ); + return val; +} + +/** + * @brief Generate a random complex single precision + */ +CHAMELEON_Complex32_t +testing_calea() +{ + CHAMELEON_Complex32_t val; + CORE_cplrnt( 1, 1, &val, 1, 1, 1, 0, random() ); + return val; +} + +/** + * @brief Generate a random complex double precision + */ +CHAMELEON_Complex64_t +testing_zalea() +{ + CHAMELEON_Complex64_t val; + CORE_zplrnt( 1, 1, &val, 1, 1, 1, 0, random() ); + return val; +} diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 4ffbae9ea..3fb42f5da 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -152,7 +152,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/chameleon_testing.py DESTINATION bin/testing ) #-------- Tests --------- -include(CTestLists.cmake) +#include(CTestLists.cmake) ### ### END CMakeLists.txt diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt index 8167545fd..eb70fd776 100644 --- a/timing/CMakeLists.txt +++ b/timing/CMakeLists.txt @@ -187,7 +187,7 @@ foreach(_timing ${TIMINGS}) endforeach() #-------- Tests --------- -include(CTestLists.cmake) +#include(CTestLists.cmake) ### ### END CMakeLists.txt diff --git a/tools/analysis.sh b/tools/analysis.sh index c4057ca04..b619a807d 100755 --- a/tools/analysis.sh +++ b/tools/analysis.sh @@ -47,7 +47,7 @@ sonar.projectDescription=Dense linear algebra subroutines for heterogeneous and sonar.projectVersion=0.9 sonar.language=c -sonar.sources=build-openmp/runtime/openmp, build-parsec/runtime/parsec, build-quark/runtime/quark, build-starpu, compute, control, coreblas, example, include, runtime, testing, timing +sonar.sources=build-openmp/runtime/openmp, build-parsec/runtime/parsec, build-quark/runtime/quark, build-starpu, compute, control, coreblas, example, include, runtime, new-testing sonar.inclusions=`cat filelist.txt | sed ':a;N;$!ba;s/\n/, /g'` sonar.c.includeDirectories=$(echo | gcc -E -Wp,-v - 2>&1 | grep "^ " | tr '\n' ',').,$(find . -type f -name '*.h' | sed -r 's|/[^/]+$||' |sort |uniq | xargs echo | sed -e 's/ /,/g'),$PARSEC_DIR/include,$QUARK_DIR/include,$STARPU_DIR/include/starpu/1.2,$SIMGRID_DIR/include sonar.sourceEncoding=UTF-8 -- GitLab