diff --git a/CMakeLists.txt b/CMakeLists.txt index 766390a783946fc8e069b7fce9dd82fdad3f40b3..df0a1d955014bad9adf73ae4fe136590df1f726d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,18 +38,33 @@ if ( NOT BUILD_SUBPROJECT ) endif() set(hdrs - include/common.h - include/libdraw.h include/libhqr.h - include/queue.h + include/libhqr_internal.h + include/libhqr_queue.h ) set(srcs - src/libhqr.c - src/libhqr_dbg.c - src/libhqr_systolic.c + # Low level tree functions + src/low_flat.c + src/low_binary.c + src/low_fibonacci.c + src/low_greedy.c + src/low_greedy1p.c + src/low_adaptiv.c + # High level tree functions + src/high_flat.c + src/high_binary.c + src/high_fibonacci.c + src/high_greedy.c + # Direct access tree functions + src/systolic.c + src/svd.c + src/hqr.c + src/mtxtree.c + # Others + src/check.c + src/gendot.c + src/gensvg.c src/queue.c - src/treedraw.c - src/treewalk.c ) include_directories(include) @@ -60,13 +75,14 @@ add_subdirectory(testings) install(FILES include/libhqr.h - include/libhqr_common.h - # include/libhqr_dbg.h DESTINATION include ) -install(TARGETS hqr DESTINATION lib) +install(TARGETS hqr + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) -#generate_hqr_pkgconfig_file() +generate_hqr_pkgconfig_file() #-- Add a custom target to generate tags add_custom_target (tags diff --git a/FindHQR.cmake b/FindHQR.cmake new file mode 100644 index 0000000000000000000000000000000000000000..14863c2b8e5e6e31499cf968d3903598475a07bc --- /dev/null +++ b/FindHQR.cmake @@ -0,0 +1,307 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2017 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find HQR include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(HQR +# [REQUIRED]) # Fail with error if hqr is not found +# +# This module finds headers and hqr library. +# Results are reported in variables: +# HQR_FOUND - True if headers and requested libraries were found +# HQR_INCLUDE_DIRS - hqr include directories +# HQR_LIBRARY_DIRS - Link directories for hqr libraries +# HQR_LIBRARIES - hqr component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DHQR_DIR=path/to/hqr): +# HQR_DIR - Where to find the base directory of hqr +# HQR_INCDIR - Where to find the header files +# HQR_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: HQR_DIR, HQR_INCDIR, HQR_LIBDIR + +#============================================================================= +# Copyright 2012-2017 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2017 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT HQR_FOUND) + set(HQR_DIR "" CACHE PATH "Installation directory of HQR library") + if (NOT HQR_FIND_QUIETLY) + message(STATUS "A cache variable, namely HQR_DIR, has been set to specify the install directory of HQR") + endif() +endif() + +set(ENV_HQR_DIR "$ENV{HQR_DIR}") +set(ENV_HQR_INCDIR "$ENV{HQR_INCDIR}") +set(ENV_HQR_LIBDIR "$ENV{HQR_LIBDIR}") +set(HQR_GIVEN_BY_USER "FALSE") +if ( HQR_DIR OR ( HQR_INCDIR AND HQR_LIBDIR) OR ENV_HQR_DIR OR (ENV_HQR_INCDIR AND ENV_HQR_LIBDIR) ) + set(HQR_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +include(FindPkgConfig) +find_package(PkgConfig QUIET) +if(PKG_CONFIG_EXECUTABLE AND NOT HQR_GIVEN_BY_USER) + + pkg_search_module(HQR hqr) + if (NOT HQR_FIND_QUIETLY) + if (HQR_FOUND AND HQR_LIBRARIES) + message(STATUS "Looking for HQR - found using PkgConfig") + #if(NOT HQR_INCLUDE_DIRS) + # message("${Magenta}HQR_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to hqr headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + else() + message(STATUS "${Magenta}Looking for HQR - not found using PkgConfig." + "\n Perhaps you should add the directory containing hqr.pc to the" + "\n PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + +endif(PKG_CONFIG_EXECUTABLE AND NOT HQR_GIVEN_BY_USER) + +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HQR_FOUND) OR (HQR_GIVEN_BY_USER) ) + + if (NOT HQR_FIND_QUIETLY) + message(STATUS "Looking for HQR - PkgConfig not used") + endif() + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_HQR_DIR "$ENV{HQR_DIR}") + set(ENV_HQR_INCDIR "$ENV{HQR_INCDIR}") + if(ENV_HQR_INCDIR) + list(APPEND _inc_env "${ENV_HQR_INCDIR}") + elseif(ENV_HQR_DIR) + list(APPEND _inc_env "${ENV_HQR_DIR}") + list(APPEND _inc_env "${ENV_HQR_DIR}/include") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # Try to find the hqr header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(HQR_INCDIR) + set(HQR_HQR.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND") + find_path(HQR_libhqr.h_DIRS + NAMES libhqr.h + HINTS ${HQR_INCDIR}) + else() + if(HQR_DIR) + set(HQR_libhqr.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND") + find_path(HQR_libhqr.h_DIRS + NAMES libhqr.h + HINTS ${HQR_DIR} + else() + set(HQR_libhqr.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND") + find_path(HQR_libhqr.h_DIRS + NAMES libhqr.h + HINTS ${_inc_env} + PATH_SUFFIXES "hqr") + endif() + endif() + mark_as_advanced(HQR_libhqr.h_DIRS) + + # Add path to cmake variable + # ------------------------------------ + if (HQR_libhqr.h_DIRS) + set(HQR_INCLUDE_DIRS "${HQR_libhqr.h_DIRS}") + else () + set(HQR_INCLUDE_DIRS "HQR_INCLUDE_DIRS-NOTFOUND") + if(NOT HQR_FIND_QUIETLY) + message(STATUS "Looking for hqr -- libhqr.h not found") + endif() + endif () + + if (HQR_INCLUDE_DIRS) + list(REMOVE_DUPLICATES HQR_INCLUDE_DIRS) + endif () + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_HQR_LIBDIR "$ENV{HQR_LIBDIR}") + if(ENV_HQR_LIBDIR) + list(APPEND _lib_env "${ENV_HQR_LIBDIR}") + elseif(ENV_HQR_DIR) + list(APPEND _lib_env "${ENV_HQR_DIR}") + list(APPEND _lib_env "${ENV_HQR_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the hqr lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(HQR_LIBDIR) + set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND") + find_library(HQR_hqr_LIBRARY + NAMES hqr + HINTS ${HQR_LIBDIR}) + else() + if(HQR_DIR) + set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND") + find_library(HQR_hqr_LIBRARY + NAMES hqr + HINTS ${HQR_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND") + find_library(HQR_hqr_LIBRARY + NAMES hqr + HINTS ${_lib_env}) + endif() + endif() + mark_as_advanced(HQR_hqr_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (HQR_hqr_LIBRARY) + get_filename_component(hqr_lib_path ${HQR_hqr_LIBRARY} PATH) + # set cmake variables (respects naming convention) + set(HQR_LIBRARIES "${HQR_hqr_LIBRARY}") + set(HQR_LIBRARY_DIRS "${hqr_lib_path}") + else () + set(HQR_LIBRARIES "HQR_LIBRARIES-NOTFOUND") + set(HQR_LIBRARY_DIRS "HQR_LIBRARY_DIRS-NOTFOUND") + if(NOT HQR_FIND_QUIETLY) + message(STATUS "Looking for hqr -- lib hqr not found") + endif() + endif () + + if (HQR_LIBRARY_DIRS) + list(REMOVE_DUPLICATES HQR_LIBRARY_DIRS) + endif () + + # check a function to validate the find + if(HQR_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # HQR + if (HQR_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${HQR_INCLUDE_DIRS}") + endif() + if (HQR_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${HQR_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${HQR_LIBRARIES}") + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(HQR_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(libhqr_hqr_init HQR_WORKS) + mark_as_advanced(HQR_WORKS) + + if(NOT HQR_WORKS) + if(NOT HQR_FIND_QUIETLY) + message(STATUS "Looking for hqr : test of libhqr_hqr_init with hqr library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(HQR_LIBRARIES) + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HQR_FOUND) OR (HQR_GIVEN_BY_USER) ) + +if (HQR_LIBRARIES) + if (HQR_LIBRARY_DIRS) + list(GET HQR_LIBRARY_DIRS 0 first_lib_path) + else() + list(GET HQR_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(HQR_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HQR library" FORCE) + else() + set(HQR_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HQR library" FORCE) + endif() +endif() +mark_as_advanced(HQR_DIR) +mark_as_advanced(HQR_DIR_FOUND) + +# check that HQR has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +if (PKG_CONFIG_EXECUTABLE AND HQR_FOUND) + find_package_handle_standard_args(HQR DEFAULT_MSG + HQR_LIBRARIES) +else() + find_package_handle_standard_args(HQR DEFAULT_MSG + HQR_LIBRARIES + HQR_WORKS) +endif() diff --git a/FindLIBHQR.cmake b/FindLIBHQR.cmake deleted file mode 100644 index 912d71d8758326cd81a82cd51ddc52b1e3ef54f6..0000000000000000000000000000000000000000 --- a/FindLIBHQR.cmake +++ /dev/null @@ -1,35 +0,0 @@ -# - Try to find LibHQR -# Once done this will define -# LIBHQR_FOUND - System has LibHQR -# LIBHQR_INCLUDE_DIRS - The LibHQR include directories -# LIBHQR_LIBRARIES - The libraries needed to use LibHQR -# LIBHQR_DEFINITIONS - Compiler switches required for using LIBHQR - -find_package(PkgConfig) -pkg_check_modules(PC_LIBHQR QUIET libhqr) -set(LIBHQR_DEFINITIONS ${PC_LIBHQR_CFLAGS_OTHER}) - -find_path( - LIBHQR_INCLUDE_DIR - libhqr.h - HINTS ${PC_LIBHQR_INCLUDEDIR} - ${PC_LIBHQR_INCLUDE_DIRS} - ) - -find_library( - LIBHQR_LIBRARY - NAMES hqr - HINTS ${PC_LIBHQR_LIBDIR} ${PC_LIBHQR_LIBRARY_DIRS} - ) - -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments -# and set LIBHQR_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args( - LIBHQR DEFAULT_MSG LIBHQR_LIBRARY LIBHQR_INCLUDE_DIR) - -mark_as_advanced(LIBHQR_INCLUDE_DIR LIBHQR_LIBRARY ) - -set(LIBHQR_LIBRARIES ${LIBHQR_LIBRARY} ) -set(LIBHQR_INCLUDE_DIRS ${LIBHQR_INCLUDE_DIR} ) diff --git a/cmake_modules/GenHQRPkgConfig.cmake b/cmake_modules/GenHQRPkgConfig.cmake index b20623ea042623b74bcd616ed9327b28a74f491d..efe61507bbb837a0a25ad010d704d31274d5d917 100644 --- a/cmake_modules/GenHQRPkgConfig.cmake +++ b/cmake_modules/GenHQRPkgConfig.cmake @@ -33,26 +33,26 @@ # used in CLEAN_LIB_LIST # ### -macro(CONVERT_LIBSTYLE_TO_PKGCONFIG _liblist) - set(${_liblist}_CPY "${${_liblist}}") - set(${_liblist} "") - foreach(_dep ${${_liblist}_CPY}) - if (${_dep} MATCHES "^/") - get_filename_component(dep_libname ${_dep} NAME) - get_filename_component(dep_libdir ${_dep} DIRECTORY) - string(REPLACE "lib" "" dep_libname "${dep_libname}") - string(REPLACE ".so" "" dep_libname "${dep_libname}") - string(REPLACE ".a" "" dep_libname "${dep_libname}") - string(REPLACE ".dylib" "" dep_libname "${dep_libname}") - string(REPLACE ".dll" "" dep_libname "${dep_libname}") - list(APPEND ${_liblist} -L${dep_libdir} -l${dep_libname}) - elseif(NOT ${_dep} MATCHES "^-") - list(APPEND ${_liblist} "-l${_dep}") - else() - list(APPEND ${_liblist} ${_dep}) - endif() - endforeach() -endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG) +# macro(CONVERT_LIBSTYLE_TO_PKGCONFIG _liblist) +# set(${_liblist}_CPY "${${_liblist}}") +# set(${_liblist} "") +# foreach(_dep ${${_liblist}_CPY}) +# if (${_dep} MATCHES "^/") +# get_filename_component(dep_libname ${_dep} NAME) +# get_filename_component(dep_libdir ${_dep} DIRECTORY) +# string(REPLACE "lib" "" dep_libname "${dep_libname}") +# string(REPLACE ".so" "" dep_libname "${dep_libname}") +# string(REPLACE ".a" "" dep_libname "${dep_libname}") +# string(REPLACE ".dylib" "" dep_libname "${dep_libname}") +# string(REPLACE ".dll" "" dep_libname "${dep_libname}") +# list(APPEND ${_liblist} -L${dep_libdir} -l${dep_libname}) +# elseif(NOT ${_dep} MATCHES "^-") +# list(APPEND ${_liblist} "-l${_dep}") +# else() +# list(APPEND ${_liblist} ${_dep}) +# endif() +# endforeach() +# endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG) ### # @@ -60,18 +60,18 @@ endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG) # used in GENERATE_PKGCONFIG_FILE # ### -macro(CLEAN_LIB_LIST _package) - list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS) - list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS_PRIVATE) - list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED) - list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED_PRIVATE) - convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS) - convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS_PRIVATE) - string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS "${${_package}_PKGCONFIG_LIBS}") - string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS_PRIVATE "${${_package}_PKGCONFIG_LIBS_PRIVATE}") - string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED "${${_package}_PKGCONFIG_REQUIRED}") - string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED_PRIVATE "${${_package}_PKGCONFIG_REQUIRED_PRIVATE}") -endmacro(CLEAN_LIB_LIST) +#macro(CLEAN_LIB_LIST _package) +# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS) +# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS_PRIVATE) +# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED) +# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED_PRIVATE) +# convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS) +# convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS_PRIVATE) +# string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS "${${_package}_PKGCONFIG_LIBS}") +# string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS_PRIVATE "${${_package}_PKGCONFIG_LIBS_PRIVATE}") +# string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED "${${_package}_PKGCONFIG_REQUIRED}") +# string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED_PRIVATE "${${_package}_PKGCONFIG_REQUIRED_PRIVATE}") +#endmacro(CLEAN_LIB_LIST) ### # @@ -85,7 +85,7 @@ macro(GENERATE_HQR_PKGCONFIG_FILE) set(HQR_PKGCONFIG_REQUIRED "") set(HQR_PKGCONFIG_REQUIRED_PRIVATE "") - clean_lib_list(LIBHQR) + #clean_lib_list(LIBHQR) set(_output_libhqr_file "${CMAKE_BINARY_DIR}/hqr.pc") configure_file( diff --git a/include/libhqr.h b/include/libhqr.h index f255fb55d0c3336e9a01e22a096e5b3c2096a49b..16122b2b4ba823f56d045de7d217feb999773ba3 100644 --- a/include/libhqr.h +++ b/include/libhqr.h @@ -18,23 +18,24 @@ #ifndef _LIBHQR_H_ #define _LIBHQR_H_ -#include <stdio.h> -#include "libhqr_common.h" +#ifdef BEGIN_C_DECLS +#undef BEGIN_C_DECLS +#endif + +#ifdef END_C_DECLS +#undef END_C_DECLS +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else +# define BEGIN_C_DECLS /* empty */ +# define END_C_DECLS /* empty */ +#endif BEGIN_C_DECLS -static inline int libhqr_imin(int a, int b){ - return (a > b) ? b : a; -} - -static inline int libhqr_imax(int a, int b){ - return (a > b) ? a : b; -} - -static inline int libhqr_iceil(int a, int b){ - return (a + b - 1) / b; -} - /** * @brief Define which tree level the tile belongs to. * @@ -50,8 +51,8 @@ static inline int libhqr_iceil(int a, int b){ * matrices. * * @remark LIBHQR_KILLED_BY_TS needs to be 0 for all variant of QR - * factorizations in order to distinguish TT kernels from TS kernels in - * compuations + * factorizations in order to easily distinguish TT kernels from TS kernels in + * computations. * */ typedef enum libhqr_type_ { @@ -74,47 +75,42 @@ typedef enum libhqr_tree_ { LIBHQR_FIBONACCI_TREE = 2, LIBHQR_BINARY_TREE = 3, LIBHQR_GREEDY1P_TREE = 4, -} libqr_tree_e; +} libhqr_tree_e; /** * @brief Define the type of factorization to apply: QR or LQ. */ -typedef enum libhqr_typefacto_ { +typedef enum libhqr_facto_ { LIBHQR_QR = 0, /**< QR factorization will be performed, and A shape is considered */ LIBHQR_LQ = 1, /**< LQ factorization will be performed, and A^t shape is considered */ -} libhqr_typefacto_e; +} libhqr_facto_e; /** - * @brief Minimal structure to define the shape of the matrix to factorize. + * @brief Minimal matrix description stucture. + * + * This structure describes the dimension of the matrix to factorize in number + * of tiles: mt-by-nt. When provided, nodes and p provides the total number of + * nodes involved in the computation, and the P parameter of 2D bloc-cyclic + * distributions of the tiles. LibHQR doesn't support any other distribtions for + * now. */ -typedef struct libhqr_tiledesc_s{ - int mt; /**< The number of rows of tiles */ - int nt; /**< The number of columns of tiles */ +typedef struct libhqr_matrix_s { + int mt; /**< The number of rows of tiles in the matrix */ + int nt; /**< The number of columns of tiles in the matrix */ int nodes; /**< The number of nodes involved in the data distribution */ int p; /**< The number of nodes per column in the data distribution */ -} libhqr_tiledesc_t; - +} libhqr_matrix_t; /** - * @brief Minimal structure to stock the information for each tile + * @brief Hierarchical tree structure for QR/LQ factorization like kernels. + * + * This structure holds the functions that allows one to traverse the tree, or + * to know form each node which ones are its neigbours in the reduction trees. + * */ -typedef struct libhqr_tileinfo_s{ - int type; /**< The type of the tile */ - int currpiv; /**< Number of the time annihilating the current tile */ - int nextpiv; /**< The next tile killed by the tile */ - int prevpiv; /**< The previous tile killed by the tile */ - int first_nextpiv; /**< The first next tile killed */ - int first_prevpiv; /**< The first previous tile killed */ -} libhqr_tileinfo_t; - struct libhqr_tree_s; typedef struct libhqr_tree_s libhqr_tree_t; -typedef struct libhqr_context_s libhqr_context_t; - -/** - * @brief Hierarchical tree structure for QR/LQ factorization like kernels. - */ struct libhqr_tree_s { /** * @brief Return the number of geqrt/gelqt kernels in the column/row k @@ -186,6 +182,7 @@ struct libhqr_tree_s { */ int (*prevpiv)(const libhqr_tree_t *qrtree, int k, int p, int m); + int init; /**< Internal variable */ int mt; /**< Number of rows of tile A if QR factorization, and in A^t if LQ factorization */ int nt; /**< Number of tile reflectors to compute. This is equal to @@ -199,53 +196,58 @@ struct libhqr_tree_s { tree: hqr, systolic, svd, ...) */ }; -int libhqr_systolic_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int p, int q ); -void libhqr_systolic_finalize( libhqr_tree_t *qrtree ); +/** + * @name Initialization/Finalization functions + * @{ + */ +int libhqr_init_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ); -int libhqr_svd_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, +int libhqr_init_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, int type_hlvl, int p, int nbcores_per_node, int ratio ); -int libhqr_hqr_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, +int libhqr_init_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, int type_llvl, int type_hlvl, int a, int p, int domino, int tsrr ); -void libhqr_hqr_finalize( libhqr_tree_t *qrtree ); +void libhqr_finalize( libhqr_tree_t *qrtree ); - -void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init); -int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m); -int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m); -int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m); -int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m); -void libhqr_matrix_finalize(libhqr_tree_t *qrtree); - -/* - * function for drawing the tree +/** + * @} + * @name Function to walk the tree in correct order for STF programing model + * @{ */ +void libhqr_walk_stepk( const libhqr_tree_t *qrtree, int k, int *tiles ); -void libhqr_treewalk(const libhqr_tree_t *qrtree, int k, int *tiles); -void draw_rectangle(int k, int p, int m, int step_m, FILE *file); -void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file); -void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file); -void draw_vertical_line(int k, int p, int m, int step_m, FILE *file); -void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix); +/** + * @} + * @name Drawing functions + * @{ + */ +void libhqr_print_dag( const libhqr_tree_t *qrtree, + const char *filename ); +void libhqr_print_svg( const libhqr_tree_t *qrtree, + const char *filename ); -/* - * Debugging functions +/** + * @} + * @name Checking/Debugging functions + * @{ */ +int libhqr_check ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ); +void libhqr_print_type ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ); +void libhqr_print_pivot ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ); +void libhqr_print_nbgeqrt( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ); +void libhqr_print_perm ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int *perm ); +void libhqr_print_next_k ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ); +void libhqr_print_prev_k ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ); +void libhqr_print_geqrt_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ); -int libhqr_tree_check ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_dag ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ); -void libhqr_tree_print_type ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_pivot ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ); -void libhqr_tree_print_perm ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ); -void libhqr_tree_print_next_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); -void libhqr_tree_print_prev_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); -void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ); +/** + * @} + */ END_C_DECLS diff --git a/include/libhqr_common.h b/include/libhqr_common.h deleted file mode 100644 index 1ae8a4312f30cdf4195b857a0373f7cd0f083494..0000000000000000000000000000000000000000 --- a/include/libhqr_common.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * - * @file libhqr_common.h - * - * Header file for common macro - * - * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - * @version 1.0.0 - * @author Raphael Boucherie - * @author Mathieu Faverge - * @date 2017-04-05 - * - */ -#ifndef _LIBHQR_COMMON_H_ -#define _LIBHQR_COMMON_H_ - -#undef BEGIN_C_DECLS -#undef END_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else -# define BEGIN_C_DECLS /* empty */ -# define END_C_DECLS /* empty */ -#endif - -#endif /* _LIBHQR_COMMON_H_ */ diff --git a/include/libhqr_draw.h b/include/libhqr_draw.h deleted file mode 100644 index 596151dcc7ba7afe7ca22ec0761a40aff20f1de8..0000000000000000000000000000000000000000 --- a/include/libhqr_draw.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * - * @file libhqr_draw.h - * - * Header file for all the functions of drawing. - * - * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - * @version 1.0.0 - * @author Raphael Boucherie - * @author Mathieu Faverge - * @date 2017-04-04 - * - **/ - -#ifndef _LIBHQR_DRAW_H_ -#define _LIBHQR_DRAW_H_ - -#include "libhqr_common.h" - -BEGIN_C_DECLS - -#define WIDTH 50 -#define HEIGHT 50 -#define SIZE 100 - -/* - * Clobal array for color - */ - -extern char *color[4]; - -/* - * function for treedraw - */ - -void libhqr_writeheader(FILE *file); -void libhqr_writeend(FILE *file); -void libhqr_drawTT(int x, int y, int w, int h, int k, FILE *file); -void libhqr_drawTS(int x, int y, int w, int h, int k, FILE *file); -void libhqr_drawline(int x1, int y1, int x2, int y2, int k, FILE *file); - -END_C_DECLS - -#endif /* _LIBHQR_DRAW_H_ */ diff --git a/include/libhqr_internal.h b/include/libhqr_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..a51c60701b640203764f255cc9e1ab8a04f8d43c --- /dev/null +++ b/include/libhqr_internal.h @@ -0,0 +1,181 @@ +/** + * + * @file libhqr.h + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + */ +#ifndef _LIBHQR_INTERNAL_H_ +#define _LIBHQR_INTERNAL_H_ + +#include "libhqr.h" +#include <stdio.h> +#include <assert.h> + +#define PRINT_PIVGEN 0 +#ifdef PRINT_PIVGEN +#define myassert( test ) {if ( ! (test) ) return -1;} +#else +#define myassert(test) {assert((test)); return -1;} +#endif + +typedef enum qrtree_type_ { + LIBHQR_QRTREE_UNSET = 0, + LIBHQR_QRTREE_HQR, + LIBHQR_QRTREE_SVD, + LIBHQR_QRTREE_SYS, + LIBHQR_QRTREE_MTX, +} qrtree_type_e; + +struct hqr_args_s; +typedef struct hqr_args_s hqr_args_t; + +struct hqr_subpiv_s; +typedef struct hqr_subpiv_s hqr_subpiv_t; + +/** + * @brief jhj + */ +struct hqr_args_s { + int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */ + int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */ + hqr_subpiv_t *llvl; + hqr_subpiv_t *hlvl; + int *perm; +}; + +struct hqr_subpiv_s { + /** + * currpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] m line you want to eliminate + * + * @return the annihilator p used with m at step k + */ + int (*currpiv)(const hqr_subpiv_t *arg, int k, int m); + /* + * nextpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] p line currently used as an annihilator + * @param[in] m line actually annihilated. + * m = MT to find the first time p is used as an annihilator during step k + * + * @return the next line m' using the line p as annihilator during step k + * mt if p will never be used again as an annihilator. + */ + int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m); + /* + * prevpiv + * @param[in] arg pointer to the qr_piv structure + * @param[in] k step in the factorization + * @param[in] p line currently used as an annihilator + * @param[in] m line actually annihilated. + * m = p to find the last time p has been used as an annihilator during step k + * + * @return the previous line m' using the line p as annihilator during step k + * mt if p has never been used before that as an annihilator. + */ + int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m); + int *ipiv; + int minMN; + int ldd; + int a; + int p; + int domino; +}; + +/** + * @brief Minimal structure to store the information related to each tile. + */ +typedef struct libhqr_tile_info_s { + int type; /**< The type of the reduction applied to the tile (@sa libhqr_type_e) */ + int currpiv; /**< The row index of the pivot for this tile */ + int nextpiv; /**< The next tile for which the currpiv is a pivot */ + int prevpiv; /**< The previous tile for which currpiv was a pivot */ + int first_nextpiv; /**< If type != 0, the first tile for which this tile is a pivot */ + int first_prevpiv; /**< If type != 0, the last tile for which this tile is a pivot */ +} libhqr_tile_info_t; + +static inline int libhqr_imin(int a, int b){ + return (a > b) ? b : a; +} + +static inline int libhqr_imax(int a, int b){ + return (a > b) ? a : b; +} + +static inline int libhqr_iceil(int a, int b){ + return (a + b - 1) / b; +} + + +/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ +#define nbextra1_formula (( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0) + +/* + * Common functions + */ +/* int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); */ +/* int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); */ +/* int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); */ +/* int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); */ + +/* + * Subtree for low-level + */ +void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init); +int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m); +int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m); +int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m); +void libhqr_matrix_finalize(libhqr_tree_t *qrtree); + +/* + * function for drawing the tree + */ +void draw_rectangle(int k, int p, int m, int step_m, FILE *file); +void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file); +void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file); +void draw_vertical_line( int k, int p, int m, int step_m, FILE *file); + +/** + * @name Low level trees + * @{ + */ +void hqr_low_flat_init ( hqr_subpiv_t *arg ); +void hqr_low_binary_init ( hqr_subpiv_t *arg ); +void hqr_low_fibonacci_init( hqr_subpiv_t *arg, int minMN ); +void hqr_low_greedy_init ( hqr_subpiv_t *arg, int minMN ); +void hqr_low_greedy1p_init ( hqr_subpiv_t *arg, int minMN ); +void svd_low_adaptiv_init ( hqr_subpiv_t *arg, int gmt, int gnt, int nbcores, int ratio ); + +/** + * @} + * + * @name High level trees + * @{ + */ +void hqr_high_flat_init ( hqr_subpiv_t *arg ); +void hqr_high_binary_init ( hqr_subpiv_t *arg ); +void hqr_high_fibonacci_init( hqr_subpiv_t *arg ); +void hqr_high_greedy_init ( hqr_subpiv_t *arg, int minMN ); +void hqr_high_greedy1p_init ( hqr_subpiv_t *arg ); +/** + * @} + */ + +void libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out ); + +#endif /* _LIBHQR_INTERNAL_H_ */ diff --git a/include/libhqr_queue.h b/include/libhqr_queue.h index 1448ce88877c1c309b48fefd873f00037c1ef679..9c968901feba7936566c1dbf131762654ed92038 100644 --- a/include/libhqr_queue.h +++ b/include/libhqr_queue.h @@ -16,10 +16,6 @@ #ifndef _LIBHQR_QUEUE_H_ #define _LIBHQR_QUEUE_H_ -#include "libhqr_common.h" - -BEGIN_C_DECLS - typedef struct libhqr_queue_tile_s { struct libhqr_queue_tile_s *prev; struct libhqr_queue_tile_s *next; @@ -27,11 +23,9 @@ typedef struct libhqr_queue_tile_s { } libhqr_queue_tile_t; libhqr_queue_tile_t *libhqr_queue_tile_new (void); -void libhqr_queue_tile_push (libhqr_queue_tile_t ** queue_tile, int numero); -int libhqr_queue_tile_head (libhqr_queue_tile_t ** queue_tile); -int libhqr_queue_tile_pop (libhqr_queue_tile_t ** queue_tile); -void libhqr_queue_tile_delete(libhqr_queue_tile_t ** queue_tile); - -END_C_DECLS +void libhqr_queue_tile_push (libhqr_queue_tile_t **queue_tile, int numero); +int libhqr_queue_tile_head (libhqr_queue_tile_t **queue_tile); +int libhqr_queue_tile_pop (libhqr_queue_tile_t **queue_tile); +void libhqr_queue_tile_delete(libhqr_queue_tile_t **queue_tile); #endif /* _LIBHQR_QUEUE_H_ */ diff --git a/lib/pkgconfig/libhqr.pc.in b/lib/pkgconfig/hqr.pc.in similarity index 74% rename from lib/pkgconfig/libhqr.pc.in rename to lib/pkgconfig/hqr.pc.in index 051384e9ec5aba1aafbd53abcb7b322087a0f502..31864a4f4c6bae665805552565ee1e7cf327381a 100644 --- a/lib/pkgconfig/libhqr.pc.in +++ b/lib/pkgconfig/hqr.pc.in @@ -1,10 +1,10 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} libdir=${exec_prefix}/lib -includedir=${exec_prefix}/include/libhqr +includdir=${exec_prefix}/include -Name: libhqr -Description: Build and Visualize Tree of Hierachical QR Factorization +Name: HQR +Description: Build and Visualize Trees for Hierachical QR Factorizations Version: @LIBHQR_VERSION_MAJOR@.@LIBHQR_VERSION_MINOR@.@LIBHQR_VERSION_MICRO@ Cflags: -I${includedir} Libs: -L${libdir} @LIBHQR_PKGCONFIG_LIBS@ diff --git a/src/libhqr_dbg.c b/src/check.c similarity index 66% rename from src/libhqr_dbg.c rename to src/check.c index cc333f41fbafe64b3061bd13356f7dfc53c8f959..e2d9dfb49a253f683a6cc41b5957412b19bcf1d3 100644 --- a/src/libhqr_dbg.c +++ b/src/check.c @@ -1,6 +1,6 @@ /** * - * @file libhqr_dbg.c + * @file check.c * * @copyright 2010-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -15,7 +15,7 @@ * @date 2017-03-21 * */ -#include "libhqr.h" +#include "libhqr_internal.h" #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -30,7 +30,8 @@ return ret; \ } -int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) +int +libhqr_check( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree) { int minMN = libhqr_imin(A->mt, A->nt ); int i, m, k, nb; @@ -43,8 +44,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) * Check Formula for NB geqrt */ { - /* libhqr_tree_print_type( A, qrtree ); */ - /* libhqr_tree_print_nbgeqrt( A, qrtree ); */ + /* libhqr_print_type( A, qrtree ); */ + /* libhqr_print_nbgeqrt( A, qrtree ); */ check = 1; for (k=0; k<minMN; k++) { nb = 0; @@ -73,7 +74,7 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) int prevm = -1; check = 1; for (k=0; k<minMN; k++) { - /* libhqr_tree_print_geqrt_k( A, qrtree, k ); */ + /* libhqr_print_geqrt_k( A, qrtree, k ); */ nb = qrtree->getnbgeqrf( qrtree, k ); prevm = -1; for (i=0; i < nb; i++) { @@ -134,8 +135,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) nb++; } if ( nb > 1 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -146,8 +147,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 3; } else if ( nb == 0 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -177,8 +178,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) nb++; } if ( nb > 1 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -189,8 +190,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 3; } else if ( nb == 0 ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -224,8 +225,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) prev = qrtree->prevpiv(qrtree, k, m, next); if ( start != prev ) { - libhqr_tree_print_next_k( A, qrtree, k); - libhqr_tree_print_prev_k( A, qrtree, k); + libhqr_print_next_k( A, qrtree, k); + libhqr_print_prev_k( A, qrtree, k); printf(" ----------------------------------------------------\n" " - a = %d, p = %d, M = %d, N = %d\n" @@ -246,7 +247,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree) return 0; } -void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void +libhqr_print_type( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -282,7 +284,8 @@ void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) } } -void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void +libhqr_print_pivot( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -317,7 +320,8 @@ void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) } } -void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void +libhqr_print_next_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Next (k = %d)--------------\n", k); @@ -336,7 +340,8 @@ void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } } -void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void +libhqr_print_prev_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ) { int m, s; printf("\n------------ Prev (k = %d)--------------\n", k); @@ -355,7 +360,8 @@ void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } } -void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm ) +void +libhqr_print_perm( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int *perm ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k; @@ -380,7 +386,8 @@ void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *p printf( "\n" ); } -void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) +void +libhqr_print_nbgeqrt( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree ) { int minMN = libhqr_imin(A->mt, A->nt ); int m, k, nb; @@ -408,7 +415,8 @@ void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree ) printf( "\n" ); } -void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k ) +void +libhqr_print_geqrt_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k ) { int i, m, nb; (void)A; @@ -426,125 +434,3 @@ void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int } printf( "\n" ); } - - -/* static int libhqr_tree_getinon0( const libhqr_tree_t *qrtree, */ -/* const int k, int i, int mt ) */ -/* { */ -/* int j; */ -/* for(j=k; j<mt; j++) { */ -/* if ( libhqr_tree_gettype( qrtree, k, j ) != 0 ) */ -/* i--; */ -/* if ( i == -1 ) */ -/* break; */ -/* } */ -/* return qrtree->perm[k*(qrtree->desc->mt+1) + j]; */ -/* } */ - -#define DAG_HEADER "digraph G { orientation=portrait; \n" -#define DAG_FOOTER "} // close graph\n" -#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n" -#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n" -#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n" -#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" -#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" -#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n" -#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" -#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" -#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n" -#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n" -#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n" - -char *color[] = { - "red", - "blue", - "green", - "orange", - "cyan", - "purple", - "yellow", -}; -#define DAG_NBCOLORS 7 - -void libhqr_tree_print_dag( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename ) -{ - int *pos, *next, *done; - int k, m, n, lpos, prev, length; - int minMN = libhqr_imin( A->mt, A->nt ); - FILE *f = fopen( filename, "w" ); - - done = (int*)malloc( A->mt * sizeof(int) ); - pos = (int*)malloc( A->mt * sizeof(int) ); - next = (int*)malloc( A->mt * sizeof(int) ); - memset(pos, 0, A->mt * sizeof(int) ); - memset(next, 0, A->mt * sizeof(int) ); - - /* Print header */ - fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/ - for(m=0; m < A->mt; m++) { - fprintf(f, DAG_LABELNODE, m, m, m); - } - - for(k=0; k<minMN; k++ ) { - int nb2reduce = A->mt - k - 1; - - for(m=k; m < A->mt; m++) { - fprintf(f, DAG_STARTNODE, m, A->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); - next[m] = qrtree->nextpiv( qrtree, k, m, A->mt); - } - - while( nb2reduce > 0 ) { - memset(done, 0, A->mt * sizeof(int) ); - for(m=A->mt-1; m > (k-1); m--) { - n = next[m]; - if ( next[n] != A->mt ) - continue; - if ( n != A->mt ) { - lpos = libhqr_imax( pos[m], pos[n] ); - lpos++; - pos[m] = lpos; - pos[n] = lpos; - - fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - prev = qrtree->prevpiv( qrtree, k, m, n ); - fprintf(f, DAG_EDGE_PIV, - m, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - prev = qrtree->prevpiv( qrtree, k, n, n ); - if ( qrtree->gettype(qrtree, k, n) == 0 ) - fprintf(f, DAG_EDGE_TS, - n, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - else - fprintf(f, DAG_EDGE_TT, - n, prev, k, - m, n, k, - color[ (m%qrtree->p) % DAG_NBCOLORS ]); - - next[m] = qrtree->nextpiv( qrtree, k, m, n); - done[m] = done[n] = 1; - nb2reduce--; - } - } - } - } - - length = 0; - for(m=0; m < A->mt; m++) { - length = libhqr_imax(length, pos[m]); - } - length++; - for(k=0; k<length; k++) - fprintf(f, DAG_LENGTHNODE, k, k, k); - fprintf(f, DAG_FOOTER); - - printf("Tic Max = %d\n", length-1); - - fclose( f ); - free(pos); - free(next); -} diff --git a/src/gendot.c b/src/gendot.c new file mode 100644 index 0000000000000000000000000000000000000000..50cd4cddfcf364bdbf6a92cbfe6b9fc8152f2e9e --- /dev/null +++ b/src/gendot.c @@ -0,0 +1,132 @@ +/** + * + * @file gendot.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + */ +#include "libhqr_internal.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#define DAG_HEADER "digraph G { orientation=portrait; \n" +#define DAG_FOOTER "} // close graph\n" +#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n" +#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n" +#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n" +#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" +#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n" +#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n" +#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" +#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n" +#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n" +#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n" +#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n" + +char *color[] = { + "red", + "blue", + "green", + "orange", + "cyan", + "purple", + "yellow", +}; +#define DAG_NBCOLORS 7 + +void +libhqr_print_dot( const libhqr_tree_t *qrtree, + const char *filename ) +{ + int *pos, *next, *done; + int k, m, n, lpos, prev, length; + int minMN = libhqr_imin( qrtree->mt, qrtree->nt ); + FILE *f = fopen( filename, "w" ); + + done = (int*)malloc( qrtree->mt * sizeof(int) ); + pos = (int*)malloc( qrtree->mt * sizeof(int) ); + next = (int*)malloc( qrtree->mt * sizeof(int) ); + memset(pos, 0, qrtree->mt * sizeof(int) ); + memset(next, 0, qrtree->mt * sizeof(int) ); + + /* Print header */ + fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/ + for(m=0; m < qrtree->mt; m++) { + fprintf(f, DAG_LABELNODE, m, m, m); + } + + for(k=0; k<minMN; k++ ) { + int nb2reduce = qrtree->mt - k - 1; + + for(m=k; m < qrtree->mt; m++) { + fprintf(f, DAG_STARTNODE, m, qrtree->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); + next[m] = qrtree->nextpiv( qrtree, k, m, qrtree->mt); + } + + while( nb2reduce > 0 ) { + memset(done, 0, qrtree->mt * sizeof(int) ); + for(m=qrtree->mt-1; m > (k-1); m--) { + n = next[m]; + if ( next[n] != qrtree->mt ) + continue; + if ( n != qrtree->mt ) { + lpos = libhqr_imax( pos[m], pos[n] ); + lpos++; + pos[m] = lpos; + pos[n] = lpos; + + fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + prev = qrtree->prevpiv( qrtree, k, m, n ); + fprintf(f, DAG_EDGE_PIV, + m, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + prev = qrtree->prevpiv( qrtree, k, n, n ); + if ( qrtree->gettype(qrtree, k, n) == 0 ) + fprintf(f, DAG_EDGE_TS, + n, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + else + fprintf(f, DAG_EDGE_TT, + n, prev, k, + m, n, k, + color[ (m%qrtree->p) % DAG_NBCOLORS ]); + + next[m] = qrtree->nextpiv( qrtree, k, m, n); + done[m] = done[n] = 1; + nb2reduce--; + } + } + } + } + + length = 0; + for(m=0; m < qrtree->mt; m++) { + length = libhqr_imax(length, pos[m]); + } + length++; + for(k=0; k<length; k++) + fprintf(f, DAG_LENGTHNODE, k, k, k); + fprintf(f, DAG_FOOTER); + + printf("Tic Max = %d\n", length-1); + + fclose( f ); + free(pos); + free(next); +} diff --git a/src/gensvg.c b/src/gensvg.c new file mode 100644 index 0000000000000000000000000000000000000000..6d52666719eea56ef1101f1bfc943b20ef15f96e --- /dev/null +++ b/src/gensvg.c @@ -0,0 +1,242 @@ +/** + * + * @file gensvg.c + * + * File for algorithm of treewalking. + * + * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + */ +#include "libhqr_internal.h" +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#define WIDTH 50 +#define HEIGHT 50 +#define SIZE 100 + +/* + * Global array for color + */ +char *colortree[] = {"red", "blue", "green", "orange", "cyan", "purple", "yellow" }; +#define NBCOLORS (sizeof( colortree ) / sizeof( char* )) + +/* + * functions writing in the svg file + */ +static void +drawsvg_header( FILE *file ) +{ + int rc; + + rc = fprintf(file, + "<?xml version=\"1.0\" standalone=\"no\"?>\n" + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n" + "<svg width=\"2000\" height=\"2000\" version=\"1.1\" \n xmlns=\"http://www.w3.org/2000/svg\">\n"); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_header)\n"); + } + return; +} + +static void +drawsvg_top_TS( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%NBCOLORS]); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TS)\n"); + } + return; +} + +static void +drawsvg_bot_TS( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc, x2, y2, w2, h2; + + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%NBCOLORS]); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n"); + return; + } + + x2 = x + (w / 4); + y2 = y + (h / 4); + w2 = (w / 2); + h2 = (h / 2); + + rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill =\"white\"/> \n", x2, y2, w2, h2); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n"); + } + return; +} + +static void +drawsvg_top_TT( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n", + x + w / 2, y + h / 2, w / 2, colortree[k%NBCOLORS] ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TT)\n"); + } + return; +} + +static void +drawsvg_bot_TT( FILE *file, int k, int x, int y, int w, int h ) +{ + int rc; + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n", + x + w / 2, y + h / 2, w / 2, colortree[k%NBCOLORS] ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n"); + return; + } + + rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"white\" /> \n", + x + w / 2, y + h / 2, w / 4 ); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n"); + } + return; +} + +static void +drawsvg_line( FILE *file, int k, int x1, int y1, int x2, int y2 ) +{ + int rc; + rc = fprintf(file,"<line x1=\"%d\" y1=\"%d\" x2=\"%d\" y2=\"%d\" style=\"fill:none;stroke:%s;stroke-width:2px;\"/> \n", x1, y1, x2, y2, colortree[k%NBCOLORS]); + + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_line)\n"); + } + return; +} + +static void +drawsvg_footer( FILE *file ) +{ + int rc; + rc = fprintf(file, "</svg>\n"); + if (rc < 0) { + fprintf(stderr, "Something wrong happened while writing the file (drawsvg_footer)\n"); + } + return; +} + + +static void +drawsvg_lines_rowm( FILE *file, int k, + int p, int m, int beg_p, int beg_m, int end ) +{ + int yp, ym; + int x, xp, xm; + + /* Row of the tiles */ + ym = SIZE + SIZE * m; + yp = SIZE + SIZE * p; + + /* Starting position of the tiles */ + xm = (SIZE + (SIZE / 4)) + SIZE * beg_m; + xp = (SIZE + (SIZE / 4)) + SIZE * beg_p; + + /* Final position of the tiles */ + x = SIZE + SIZE * end; + + /* Horizontal lines */ + drawsvg_line( file, k, xm, ym, x + (SIZE / 4), ym ); + drawsvg_line( file, k, xp, yp, x + (SIZE / 4), yp ); + + /* Vertical line */ + drawsvg_line( file, k, x, ym, x, yp ); +} + +static void +drawsvg_lines_stepk( const libhqr_tree_t *qrtree, FILE *file, + int k, int *tiles, int *step ) +{ + int i, m, p, end; + + /* Get order for step k */ + libhqr_walk_stepk( qrtree, k, tiles+(k+1) ); + + for(i = k+1; i < qrtree->mt; i++){ + m = tiles[i]; + p = qrtree->currpiv(qrtree, k, m); + + end = libhqr_imax( step[p], step[m] ) + 1; + + /* Draw horizontal lines for rows p and m */ + drawsvg_lines_rowm( file, k, p, m, step[p], step[m], end ); + + /* Update last time the rows p and m have been modified for future lines */ + step[m] = end; + step[p] = end; + } +} + +static void +drawsvg_nodes_rowm( FILE *file, int k, + int type, int p, int m, int step_m ) +{ + int x, yp, ym; + x = ((SIZE * 3) / 4) + SIZE * step_m; + ym = ((SIZE * 3) / 4) + SIZE * m; + yp = ((SIZE * 3) / 4) + SIZE * p; + + if ( type == LIBHQR_KILLED_BY_TS ) { + drawsvg_top_TS(file, k, x, yp, WIDTH, HEIGHT ); + drawsvg_bot_TS(file, k, x, ym, WIDTH, HEIGHT ); + } + else { + drawsvg_top_TT(file, k, x, yp, WIDTH, HEIGHT ); + drawsvg_bot_TT(file, k, x, ym, WIDTH, HEIGHT ); + } +} + +void +libhqr_print_svg( const libhqr_tree_t *qrtree, + const char *filename ) +{ + FILE *file; + int *tiles, *steps; + int minMN, k, i; + + minMN = libhqr_imin( qrtree->mt, qrtree->nt ); + + tiles = (int*)calloc( qrtree->mt, sizeof(int)); + steps = (int*)calloc( qrtree->mt, sizeof(int)); + + file = fopen(filename,"w+"); + + drawsvg_header(file); + for (k = 0; k < minMN; k++) { + /* Drawing the lines */ + drawsvg_lines_stepk( qrtree, file, k, tiles, steps ); + + /* Drawing the rectangles */ + for (i = k+1; i < qrtree->mt; i++) { + drawsvg_nodes_rowm( file, k, + qrtree->gettype(qrtree, k, i), + qrtree->currpiv(qrtree, k, i), i, steps[i] ); + } + } + drawsvg_footer(file); + fclose(file); + + free(tiles); + free(steps); +} diff --git a/src/high_binary.c b/src/high_binary.c new file mode 100644 index 0000000000000000000000000000000000000000..ef9d20b570aea371ec010b2f5239f490151f4312 --- /dev/null +++ b/src/high_binary.c @@ -0,0 +1,101 @@ +/** + * + * @file high_binary.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level binary tree + * + */ +#include "libhqr_internal.h" +#include <math.h> + +static int +hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + int tmp1 = m - k; + int tmp2 = 1; + (void)arg; + + if ( tmp1 == 0) + return 0; + while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { + tmp1 = tmp1 >> 1; + tmp2 = tmp2 << 1; + } + assert( m - tmp2 >= k ); + return m - tmp2; +} + +static int +hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int tmpp, bit; + myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) ); + + if ( start <= p ) + return arg->ldd; + + int offset = p - k; + bit = 0; + if (start != arg->ldd) { + while( ( (start - k) & (1 << bit ) ) == 0 ) + bit++; + bit++; + } + + tmpp = offset | (1 << bit); + if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) ) + return tmpp + k; + else + return arg->ldd; +} + +static int +hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int offset = p - k; + + myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) ); + + if ( (start == p) && ( offset%2 == 0 ) ) { + int i, bit, tmp; + if ( offset == 0 ) + bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); + else { + bit = 0; + while( (offset & (1 << bit )) == 0 ) + bit++; + } + for( i=bit; i>-1; i--){ + tmp = offset | (1 << i); + if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) ) + return tmp+k; + } + return arg->ldd; + } + + if ( (start - p) > 1 ) + return p + ( (start - p) >> 1 ); + else { + return arg->ldd; + } +} + +void +hqr_high_binary_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_high_binary_currpiv; + arg->nextpiv = hqr_high_binary_nextpiv; + arg->prevpiv = hqr_high_binary_prevpiv; + arg->ipiv = NULL; +} + diff --git a/src/high_fibonacci.c b/src/high_fibonacci.c new file mode 100644 index 0000000000000000000000000000000000000000..a41ec888a85218bd3ac8ef958ae08beeac84fa18 --- /dev/null +++ b/src/high_fibonacci.c @@ -0,0 +1,152 @@ +/** + * + * @file high_fibonacci.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level fibonacci tree, and init for duplicated greedy. + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/**************************************************** + * HQR_HIGH_FIBONACCI_TREE + ***************************************************/ +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + return (qrpiv->ipiv)[ m-k ] + k; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { + int i; + myassert( p >= k && start >= p && start-k <= qrpiv->p); + + int lp = p - k; + int lstart= start - k; + int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p); + for( i=lstart+1; i<end; i++ ) + if ( (qrpiv->ipiv)[i] == lp ) + return i+k; + return qrpiv->ldd; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { + int i; + myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) ); + + for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- ) + if ( (qrpiv->ipiv)[i] == (p-k) ) + return i + k; + return (qrpiv->ldd); +} + +void +hqr_high_fibonacci_init(hqr_subpiv_t *arg) { + int *ipiv; + int p; + + arg->currpiv = hqr_high_fibonacci_currpiv; + arg->nextpiv = hqr_high_fibonacci_nextpiv; + arg->prevpiv = hqr_high_fibonacci_prevpiv; + + p = arg->p; + + arg->ipiv = (int*)calloc( p, sizeof(int) ); + ipiv = arg->ipiv; + + /* + * Fibonacci of order 1: + * u_(n+1) = u_(n) + 1 + */ + { + int f1, k, m; + + /* Fill in the first column */ + f1 = 1; + for (m=1; m < p; ) { + for (k=0; (k < f1) && (m < p); k++, m++) { + ipiv[m] = m - f1; + } + f1++; + } + } +} + +/**************************************************** + * HQR_HIGH_GREEDY_TREE (1 panel duplicated) + ***************************************************/ +void hqr_high_greedy1p_init(hqr_subpiv_t *arg){ + int *ipiv; + int mt, p; + + arg->currpiv = hqr_high_fibonacci_currpiv; + arg->nextpiv = hqr_high_fibonacci_nextpiv; + arg->prevpiv = hqr_high_fibonacci_prevpiv; + + mt = arg->ldd; + p = arg->p; + + arg->ipiv = (int*)calloc( p, sizeof(int) ); + ipiv = arg->ipiv; + + { + int minMN = 1; + int j, k, height, start, end, firstk = 0; + int *nT = (int*)calloc(minMN, sizeof(int)); + int *nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + nZ[0] = libhqr_imax( mt - p, 0 ); + for(k=1; k<minMN; k++) { + height = libhqr_imax(mt-k-p, 0); + nT[k] = height; + nZ[k] = height; + } + + k = 0; + while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - firstk ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + firstk++; + } + k = firstk; + continue; + } + + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + if (k < minMN-1) nT[k+1] = nZ[k]; + + for( j=start; j > end; j-- ) { + ipiv[ k*p + j-k ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } +} diff --git a/src/high_flat.c b/src/high_flat.c new file mode 100644 index 0000000000000000000000000000000000000000..29003463fa6a5573b7ad5865c0faed7cfb5d242e --- /dev/null +++ b/src/high_flat.c @@ -0,0 +1,61 @@ +/** + * + * @file high_flat.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level flat tree + * + */ +#include "libhqr_internal.h" + +static int +hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + (void)arg; + (void)m; + return k; +} + +static int +hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + if ( p == k && arg->ldd > 1 ) { + if ( start == arg->ldd ) + return p+1; + else if ( start < arg->ldd && (start-k < arg->p-1) ) + return start+1; + } + return arg->ldd; +} + +static int +hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + assert( arg->p > 1 ); + if ( p == k && arg->ldd > 1 ) { + if ( start == p && p != arg->ldd-1 ) + return libhqr_imin( p + arg->p - 1, arg->ldd - 1 ); + else if ( start > p + 1 && (start-k < arg->p)) + return start-1; + } + return arg->ldd; +} + +void +hqr_high_flat_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_high_flat_currpiv; + arg->nextpiv = hqr_high_flat_nextpiv; + arg->prevpiv = hqr_high_flat_prevpiv; + arg->ipiv = NULL; +} + diff --git a/src/high_greedy.c b/src/high_greedy.c new file mode 100644 index 0000000000000000000000000000000000000000..d16b4c5ce3802f972738312ad7eb836c3dd000b6 --- /dev/null +++ b/src/high_greedy.c @@ -0,0 +1,110 @@ +/** + * + * @file high_greedy.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for high level greedy tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +static int +hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + myassert( m >= k && m < k+arg->p ); + return (arg->ipiv)[ k * (arg->p) + (m - k) ]; +} + +static int +hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int i; + myassert( (start >= k && start < k+arg->p) || start == arg->ldd ); + for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- ) + if ( (arg->ipiv)[i-k + k* (arg->p)] == p ) + return i; + return (arg->ldd); +} + +static int +hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) +{ + int i; + myassert( (start >= k && start < k+arg->p) || start == p ); + for( i=start-k+1; i<arg->p; i++ ) + if ( (arg->ipiv)[i + k * (arg->p)] == p ) + return k+i; + return arg->ldd; +} + +void +hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, p; + + arg->currpiv = hqr_high_greedy_currpiv; + arg->nextpiv = hqr_high_greedy_nextpiv; + arg->prevpiv = hqr_high_greedy_prevpiv; + + mt = arg->ldd; + p = arg->p; + + arg->ipiv = (int*)calloc( p * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + { + int j, k, height, start, end, firstk = 0; + int *nT = (int*)calloc(minMN, sizeof(int)); + int *nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + nZ[0] = libhqr_imax( mt - p, 0 ); + for(k=1; k<minMN; k++) { + height = libhqr_imax(mt-k-p, 0); + nT[k] = height; + nZ[k] = height; + } + + k = 0; + while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - firstk ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + firstk++; + } + k = firstk; + continue; + } + + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + if (k < minMN-1) nT[k+1] = nZ[k]; + + for( j=start; j > end; j-- ) { + ipiv[ k*p + j-k ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } +} diff --git a/src/hqr.c b/src/hqr.c new file mode 100644 index 0000000000000000000000000000000000000000..d1b856d1b3d2d47bfabc97e937438f7d87fc9c42 --- /dev/null +++ b/src/hqr.c @@ -0,0 +1,1105 @@ +/** + * + * @file hqr.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * This file contains all the function to describe the dependencies + * used in the Xgeqrf_param.jdf file. + * The QR factorization done with this file relies on three levels: + * - the first one is using a flat tree with TS kernels. The + * height of this tree is defined by the parameter 'a'. If 'a' + * is set to A->mt, the factorization is identical to the one + * perform by PLASMA_zgeqrf. + * For all subdiagonal "macro-tiles", the line reduced is always + * the first. For all diagonal "macro-tiles", the factorization + * performed is identical to the one performed by PLASMA_zgeqrf. + * + * - the third level is using a reduction tree of size 'p'. By + * default, the parameter 'p' should be equal to the number of + * processors used for the computation, but can be set + * differently. (see further example). The type of tree used at + * this level is defined by the hlvl parameter. It can be flat + * or greedy. + * CODE DETAILS: This tree and all the function related to it + * are performing a QR factorization on a band matrix with 'p' + * the size of the band. All the functions take global indices + * as input and return global indices as output. + * + * - Finally, a second 'low' level of reduction tree is applied. + * The size of this tree is induced by the parameters 'a' and 'p' + * from the first and third levels and is A->mt / ( p * a ). This + * tree is reproduced p times for each subset of tiles + * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. + * The tree used for the reduction is defined by the llvl + * parameter and can be: flat, greedy, fibonacci or binary. + * CODE DETAILS: For commodity, the size of this tree is always + * ceil(A->mt / (p * a) ) inducing some extra tests in the code. + * All the functions related to this level of tree take as input + * the local indices in the A->mt / (p*a) matrix and the global + * k. They return the local index. The reductions are so + * performed on a trapezoidal matrices where the step is defined + * by a: + * <- min( lhlvl_mt, min( mt, nt ) ) -> + * __a__ a a + * | |_____ + * | |_____ + * | |_____ + * llvl_mt = ceil(MT/ (a*p)) | |_____ + * | |_____ + * |___________________________________| + * + * + * + * At each step of the factorization, the lines of tiles are divided + * in 4 types: + * - QRPARAM_TILE_TS: They are the lines annihilated by a TS + * kernel, these lines are never used as an annihilator. They are + * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p + * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor + * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They + * are themselves annihilated by the TT kernel of the low level + * reduction tree. The roots of the local trees are the lines i, + * with i/p = k. + * - QRPARAM_TILE_DOMINO: These are the lines that are + * annhilihated with a domino effect in the band defined by (i/p) + * <= k and i >= k + * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the + * high level tree to reduce communications. + * These lines are defined by (i-k)/p = 0. + */ +#include "libhqr_internal.h" +#include "libhqr_queue.h" +#include <stdlib.h> + +/* + * Common functions + */ +static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); + +/* Permutation */ +static void hqr_genperm ( libhqr_tree_t *qrtree ); +static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ); + +/**************************************************** + * Common ipiv + *************************************************** + * + * Common prameters to the 4 following functions: + * a - Parameter a for the tunable QR + * p - Parameter p for the tunable QR + * k - Step k of the QR factorization + * + */ + +/* + * Extra parameter: + * gmt - Global number of tiles in a column of the complete distributed matrix + * Return: + * The number of geqrt to execute in the panel k + */ +static int +hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) +{ + int a = qrtree->a; + int p = qrtree->p; + int gmt = qrtree->mt; + int domino = qrtree->domino; + int pa = p * a; + int nb_1, nb_2, nb_3; + int nb_11, nb_12; + + /* Number of tasks of type 3 */ + nb_3 = p; + + /* Number of tasks of type 2 */ + if ( domino ) { + nb_2 = k * (p - 1); + + /* First multiple of p*a under the diagonal of step k */ + nb_11 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; + } + else { + /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ + nb_2 = nbextra1_formula; + + /* First multiple of p*a under the diagonal of step 1 */ + nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; + } + + /* Last multiple of p*a lower than A->mt */ + nb_12 = ( gmt / pa ) * pa; + + /* Number of tasks of type 1 between nb_11 and nb_12 */ + nb_1 = (nb_12 - nb_11) / a; + + /* Add leftover */ + nb_1 += libhqr_imin( p, gmt - nb_12 ); + + return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); +} + +/* + * Extra parameter: + * i - indice of the geqrt in the continuous space + * Return: + * The global indice m of the i th geqrt in the panel k + */ +static int +hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) +{ + int a = qrtree->a; + int p = qrtree->p; + int domino = qrtree->domino; + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + (domino ? k*(p-1) : nbextra1 ); + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( i < nb23 ) + return k+i; + /* Tile of type 1 */ + else { + j = i - nb23; + pa = p * a; + if ( domino ) + pos1 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; + else + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + return pos1 + (j/p) * pa + j%p; + } +} + +/* + * Extra parameter: + * m - The global indice m of a geqrt in the panel k + * Return: + * The index i of the geqrt in the panel k + */ +static int +hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) +{ + int a = qrtree->a; + int p = qrtree->p; + int domino = qrtree->domino; + int lm = m; + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + ( domino ? k*(p-1) : nbextra1 ); + int end2 = p + ( domino ? k*p : k + nbextra1 ); + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( lm < end2 ) + return lm-k; + /* Tile of type 1 */ + else { + if ( domino ) + pos1 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; + else + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + j = lm - pos1; + return nb23 + (j / pa) * p + j%pa; + } +} + +/* + * Extra parameter: + * m - The global indice m of the row in the panel k + * Return + * -1 - Error + * 0 - if m is reduced thanks to a TS kernel + * 1 - if m is reduced thanks to the low level tree + * 2 - if m is reduced thanks to the bubble tree + * 3 - if m is reduced in distributed + */ +static int +hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) +{ + int a = qrtree->a; + int p = qrtree->p; + int domino = qrtree->domino; + + int lm = m; + myassert( lm >= k ); + + /* Element to be reduce in distributed */ + if (lm < k + p) { + return 3; + } + /* Element on the local diagonal */ + else if ( domino && lm < p * (k+1) ) + return 2; + /* Lower triangle of the matrix */ + else { + if( (lm / p) % a == 0 ) + return 1; + else + return 0; + } +} + +/**************************************************** + * + * Generic functions currpiv,prevpiv,nextpiv + * + ***************************************************/ +static int +hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, tmpk; + int lm, rank; + int a = qrtree->a; + int p = qrtree->p; + int domino = qrtree->domino; + int gmt = qrtree->mt; + + lm = m / p; /* Local index in the distribution over p domains */ + rank = m % p; /* Staring index in this distribution */ + + /* TS level common to every case */ + if ( domino ) { + switch( hqr_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + if ( tmp == k / a ) + return k * p + rank ; /* Below to the first bloc including the diagonal */ + else + return tmp * a * p + rank ; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank; + break; + case 2: + return m - p; + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } + } + else { + switch( hqr_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; + break; + case 2: + return m - p; + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } + } +} + +/** + * hqr_nextpiv - Computes the next row killed by the row p, after + * it has kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the next line killed by p after start + * start must be equal to A.mt to find the first row killed by p. + * if start != A.mt, start must be killed by p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the following row killed by p if it exists, A->mt otherwise + */ +static int +hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int lpivot, rpivot, lstart; + int a = qrtree->a; + int p = qrtree->p; + int gmt = qrtree->mt; + + lpivot = pivot / p; /* Local index in the distribution over p domains */ + rpivot = pivot % p; /* Staring index in this distribution */ + + /* Local index in the distribution over p domains */ + lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p; + + myassert( start > pivot && pivot >= k ); + myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) ); + + /* TS level common to every case */ + ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1; + lp = hqr_gettype( qrtree, k, pivot ); + + switch( ls ) + { + case -1: + + if ( lp == LIBHQR_KILLED_BY_TS ) { + myassert( start == gmt ); + return gmt; + } + + case LIBHQR_KILLED_BY_TS: + + /* If the tile is over the diagonal of step k, skip directly to type 2 */ + if ( arg->domino && lpivot < k ) + goto next_2; + + if ( start == gmt ) + nextp = pivot + p; + else + nextp = start + p; + + if ( ( nextp < gmt ) && + ( nextp < pivot + a*p ) && + ( (nextp/p)%a != 0 ) ) + return nextp; + start = gmt; + lstart = arg->llvl->ldd * a; + + case LIBHQR_KILLED_BY_LOCALTREE: + + /* If the tile is over the diagonal of step k, skip directly to type 2 */ + if ( arg->domino && lpivot < k ) + goto next_2; + + /* Get the next pivot for the low level tree */ + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == arg->llvl->ldd-1) ) + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != arg->llvl->ldd ) + return tmp * a * p + rpivot; + + next_2: + /* no next of type 1, we reset start to search the next 2 */ + start = gmt; + lstart = arg->llvl->ldd * a; + + case LIBHQR_KILLED_BY_DOMINO: + + if ( lp < LIBHQR_KILLED_BY_DOMINO ) { + return gmt; + } + + /* Type 2 are killed only once if they are strictly in the band */ + if ( arg->domino && + (start == gmt) && + (lpivot < k) && + (pivot+p < gmt) ) { + return pivot+p; + } + + /* no next of type 2, we reset start to search the next 3 */ + start = gmt; + lstart = arg->llvl->ldd * a; + + case LIBHQR_KILLED_BY_DISTTREE: + + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { + return gmt; + } + + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + default: + return gmt; + } +} + +/** + * hqr_prevpiv - Computes the previous row killed by the row p, before + * to kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the previous line killed by p before start + * start must be killed by p, and start must be greater or equal to p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the previous row killed by p if it exists, A->mt otherwise + */ +static int +hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int lpivot, rpivot, lstart; + int a = qrtree->a; + int p = qrtree->p; + int gmt = qrtree->mt; + + lpivot = pivot / p; /* Local index in the distribution over p domains */ + rpivot = pivot % p; /* Staring index in this distribution */ + lstart = start / p; /* Local index in the distribution over p domains */ + + myassert( start >= pivot && pivot >= k && start < gmt ); + myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) ); + + /* T Slevel common to every case */ + ls = hqr_gettype( qrtree, k, start ); + lp = hqr_gettype( qrtree, k, pivot ); + + if ( lp == LIBHQR_KILLED_BY_TS ) + return gmt; + + myassert( lp >= ls ); + switch( ls ) + { + case LIBHQR_KILLED_BY_DISTTREE: + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + start = pivot; + lstart = pivot / p; + + case LIBHQR_KILLED_BY_DOMINO: + /* If the tile is over the diagonal of step k, process it as type 2 */ + if ( arg->domino && lpivot < k ) { + + if ( ( start == pivot ) && + (start+p < gmt ) ) + return start+p; + + if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) + return gmt; + } + + start = pivot; + lstart = pivot / p; + + /* If it is the 'local' diagonal block, we go to 1 */ + + case LIBHQR_KILLED_BY_LOCALTREE: + /* If the tile is over the diagonal of step k and is of type 2, + it cannot annihilate type 0 or 1 */ + if ( arg->domino && lpivot < k ) + return gmt; + + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == arg->llvl->ldd-1) ) + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != arg->llvl->ldd ) + return tmp * a * p + rpivot; + + start = pivot; + + case LIBHQR_KILLED_BY_TS: + /* Search for predecessor in TS tree */ + /* if ( ( start+p < gmt ) && */ + /* ( (((start+p) / p) % a) != 0 ) ) */ + /* return start + p; */ + + if ( start == pivot ) { + tmp = lpivot + a - 1 - lpivot%a; + nextp = tmp * p + rpivot; + + while( pivot < nextp && nextp >= gmt ) + nextp -= p; + } else { + nextp = start - p; /*(lstart - 1) * p + rpivot;*/ + } + assert(nextp < gmt); + if ( pivot < nextp ) + return nextp; + + default: + return gmt; + } +} + +/**************************************************** + * + * Generate the permutation required for the round-robin on TS + * + ***************************************************/ +static void +hqr_genperm( libhqr_tree_t *qrtree ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int m = qrtree->mt; + int n = qrtree->nt; + int a = qrtree->a; + int p = qrtree->p; + int domino = arg->domino; + int minMN = libhqr_imin( m, n ); + int pa = p * a; + int i, j, k; + int nbextra1; + int end2; + int mpa = m % pa; + int endpa = m - mpa; + int *perm; + + arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) ); + perm = arg->perm; + + if ( arg->tsrr ) { + for(k=0; k<minMN; k++) { + for( i=0; i<m+1; i++) { + perm[i] = -1; + } + perm += m+1; + } + perm = arg->perm; + for(k=0; k<minMN; k++) { + nbextra1 = nbextra1_formula; + + end2 = p + ( domino ? k*p : k + nbextra1 ); + end2 = (( end2 + pa - 1 ) / pa ) * pa; + end2 = libhqr_imin( end2, m ); + + /* + * All tiles of type 3, 2 and: + * - 1 when domino is disabled + * - 0 before the first multiple of pa under the considered diagonal + */ + for( i=k; i<end2; i++) { + perm[i] = i; + } + + /* All permutations in groups of pa tiles */ + assert( i%pa == 0 || i>=endpa); + for( ; i<endpa; i+=pa ) { + for(j=0; j<pa; j++) { + perm[i+j] = i + ( j + p * (k%a) )%pa; + } + } + + /* Last group of tiles */ + for( ; i<m; i++) { + perm[i] = i; + } + perm[m] = m; + perm += m+1; + } + } + else { + for(k=0; k<minMN; k++) { + for( i=0; i<m+1; i++) { + perm[i] = i; + } + perm += m+1; + } + } +} + +static inline int +hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) +{ + int gmt = qrtree->mt + 1; + int a = qrtree->a; + int p = qrtree->p; + int pa = p * a; + int start = m / pa * pa; + int stop = libhqr_imin( start + pa, gmt ) - start; + int i; + + if (a == 1) + return m; + + for ( i=m%p; i < stop; i+=p ) { + //if( perm[i] == m ) + if( i == m ) + return i+start; + } + + /* We should never arrive here */ + myassert( 0 ); + (void)k; +} + +/** + ******************************************************************************* + * + * @ingroup dplasma + * + * libhqr_hqr_init - Creates the tree structure that will describes the + * operation performed during QR/LQ factorization with parameterized QR/LQ + * algorithms family. + * + * Trees available parameters are described below. It is recommended to: + * - set p to the same value than the P-by-Q process grid used to distribute + * the data. (P for QR factorization, Q for LQ factorization). + * - set the low level tree to LIBHQR_GREEDY_TREE. + * - set the high level tree to: + * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides + * by two the volume of communication of any other tree. + * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or + * small and fat (LQ), because it reduces the critical path length. + * - Disable the domino effect when problem is square, to keep high efficiency + * kernel proportion high. + * - Enable the domino effect when problem is tall and skinny (QR) or + * small and fat (LQ) to increase parallelism and reduce critical path length. + * - Round-robin on TS domain (tsrr) option should be disabled. It is + * experimental and is not safe. + * + * These are the default when a parameter is set to -1; + * + * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf + * + ******************************************************************************* + * + * @param[inout] qrtree + * On entry, an allocated structure uninitialized. + * On exit, the structure initialized according to the parameter given. + * + * @param[in] trans + * @arg QR: Structure is initialized for QR factorization. + * @arg LQ: Structure is initialized for LQ factorization. + * + * @param[in] A + * Descriptor of the distributed matrix A to be factorized, on which + * QR/LQ factorization will be performed. + * The descriptor is untouched and only mt/nt/P parameters are used. + * + * @param[in] type_llvl + * Defines the tree used to reduce the main tiles of each local domain + * together. The matrix of those tiles has a lower triangular structure + * with a diagonal by step a. + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local + * tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local + * tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * local tiles. + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local + * tiles. + * @arg -1: The default is used (LIBHQR_GREEDY_TREE) + * + * @param[in] type_hlvl + * Defines the tree used to reduce the main tiles of each domain. This + * is a band lower diagonal matrix of width p. + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * tiles. + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first + * column and then duplicated on all others. + * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) + * + * @param[in] a + * Defines the size of the local domains on which a classic flat TS + * tree is performed. If a==1, then all local tiles are reduced by the + * type_lllvl tree. If a is larger than mt/p, then no local reduction + * tree is performed and type_llvl is ignored. + * If a == -1, it is set to 4 by default. + * + * @param[in] p + * Defines the number of distributed domains, ie the width of the high + * level reduction tree. If p == 1, no high level reduction tree is + * used. If p == mt, a and type_llvl are ignored since only high level + * reduction are performed. + * By default, it is recommended to set p to P if trans == + * PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to + * distributed the data. (p > 0) + * + * @param[in] domino + * Enable/disable the domino effect that connects the high and low + * level reduction trees. Enabling the domino increases the proportion + * of TT (Triangle on top of Triangle) kernels that are less efficient, + * but increase the pipeline effect between independent factorization + * steps, reducin the critical path length. + * If disabled, it keeps the proprotion of more efficient TS (Triangle + * on top of Square) kernels high, but deteriorates the pipeline + * effect, and the critical path length. + * If domino == -1, it is enable when ration between M and N is lower + * than 1/2, and disabled otherwise. + * + * @param[in] tsrr + * Enable/Disable a round robin selection of the killer in local + * domains reduced by TS kernels. Enabling a round-robin selection of + * the killer allows to take benefit of the good pipelining of the flat + * trees and the high efficient TS kernels, while having other trees on + * top of it to reduce critical path length. + * WARNING: This option is under development and should not be enabled + * due to problem in corner cases. + * + ******************************************************************************* + * + * @retval -i if the ith parameters is incorrect. + * @retval 0 on success. + * + ******************************************************************************* + * + * @sa libhqr_hqr_finalize + * @sa libhqr_systolic_init + * + ******************************************************************************/ +int +libhqr_initfct_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) +{ + double ratio = 0.0; + int low_mt, minMN; + hqr_args_t *arg; + + if (qrtree == NULL) { + fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree"); + return -1; + } + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_hqr_init, illegal value of trans"); + return -2; + } + if (A == NULL) { + fprintf(stderr, "libhqr_hqr_init, illegal value of A"); + return -3; + } + + /* Compute parameters */ + if ( a == -1) { + a = 4; /* TODO: add automatic computation */ + } + else { + a = libhqr_imax( a, 1 ); + } + + if ( p == -1 ) { + if ( trans == LIBHQR_QR ) { + p = A->p; + } + else { + p = A->nodes / A->p; + } + } + else { + p = libhqr_imax( p, 1 ); + } + + /* Domino */ + if ( domino >= 0 ) { + domino = domino ? 1 : 0; + } + else { + if (trans == LIBHQR_QR) { + ratio = ((double)(A->nt) / (double)(A->mt)); + } else { + ratio = ((double)(A->mt) / (double)(A->nt)); + } + if ( ratio >= 0.5 ) { + domino = 0; + } else { + domino = 1; + } + } + + minMN = libhqr_imin(A->mt, A->nt); + + qrtree->domino = domino; + qrtree->getnbgeqrf = hqr_getnbgeqrf; + qrtree->getm = hqr_getm; + qrtree->geti = hqr_geti; + qrtree->gettype = hqr_gettype; + qrtree->currpiv = hqr_currpiv; + qrtree->nextpiv = hqr_nextpiv; + qrtree->prevpiv = hqr_prevpiv; + + qrtree->init = LIBHQR_QRTREE_HQR; + + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = minMN; + + a = libhqr_imin( a, qrtree->mt ); + + qrtree->a = a; + qrtree->p = p; + qrtree->args = NULL; + + arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); + arg->domino = domino; + arg->tsrr = tsrr; + arg->perm = NULL; + + arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + arg->hlvl = NULL; + + low_mt = (qrtree->mt + p * a - 1) / ( p * a ); + + arg->llvl->minMN = minMN; + arg->llvl->ldd = low_mt; + arg->llvl->a = a; + arg->llvl->p = p; + arg->llvl->domino = domino; + + switch( type_llvl ) { + case LIBHQR_FLAT_TREE : + hqr_low_flat_init(arg->llvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_low_fibonacci_init(arg->llvl, minMN); + break; + case LIBHQR_BINARY_TREE : + hqr_low_binary_init(arg->llvl); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_low_greedy1p_init(arg->llvl, minMN); + break; + case LIBHQR_GREEDY_TREE : + default: + hqr_low_greedy_init(arg->llvl, minMN); + } + + if ( p > 1 ) { + arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + + arg->llvl->minMN = minMN; + arg->hlvl->ldd = qrtree->mt; + arg->hlvl->a = a; + arg->hlvl->p = p; + arg->hlvl->domino = domino; + + switch( type_hlvl ) { + case LIBHQR_FLAT_TREE : + hqr_high_flat_init(arg->hlvl); + break; + case LIBHQR_GREEDY_TREE : + hqr_high_greedy_init(arg->hlvl, minMN); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_high_greedy1p_init(arg->hlvl); + break; + case LIBHQR_BINARY_TREE : + hqr_high_binary_init(arg->hlvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_high_fibonacci_init(arg->hlvl); + break; + default: + if ( ratio >= 0.5 ) { + hqr_high_flat_init(arg->hlvl); + } else { + hqr_high_fibonacci_init(arg->hlvl); + } + } + } + + qrtree->args = (void*)arg; + hqr_genperm( qrtree ); + + return 0; +} + +int +libhqr_initmtx_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) +{ + libhqr_tree_t qrtreefct; + int rc; + + rc = libhqr_initfct_hqr( &qrtreefct, trans, A, + type_llvl, type_hlvl, a, p, + domino, tsrr ); + if ( rc != 0 ) { + return rc; + } + + libhqr_fct_to_mtx( &qrtreefct, qrtree ); + + /* Free the initial qrtree */ + libhqr_finalize( &qrtreefct ); + + return 0; +} + +int +libhqr_init_hqr( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_llvl, int type_hlvl, + int a, int p, + int domino, int tsrr ) +{ + return libhqr_initmtx_hqr( qrtree, trans, A, + type_llvl, type_hlvl, a, p, + domino, tsrr ); + +} + +/** + * libhqr_walk_stepk + * + * @param[in] arg + * Arguments specific to the reduction tree used + * + * @param[in] k + * Factorization step + * + * @param[in] tiles + * Array of size qrtree->mt that stores in the first entries the tiles + * to kill in the right order to walk through the tree. + * + */ +void +libhqr_walk_stepk( const libhqr_tree_t *qrtree, + int k, int *tiles ) +{ + int tsid = -1, ttid = -1; + int p = k; + int pivot = k; + int m = 0; + libhqr_queue_tile_t *tt = libhqr_queue_tile_new(); + libhqr_queue_tile_t *ts = libhqr_queue_tile_new(); + libhqr_queue_tile_push(&tt, k); + + while( tt ) + { + /* Stack all elements killed on the way down */ + p = qrtree->prevpiv(qrtree, k, pivot, p); + while( p != qrtree->mt ) { + /* If it's a TS tile, or a TT at the bottom of the tree that kills noone */ + if( (qrtree->gettype(qrtree, k, p) == 0) || + (qrtree->prevpiv(qrtree, k, p, p) != qrtree->mt) ) + { + libhqr_queue_tile_push(&tt,p); + /* printf("PUSH TT: %d\n", p); */ + } + libhqr_queue_tile_push(&ts, p); + /* printf("PUSH TS: %d\n", p); */ + p = qrtree->prevpiv(qrtree, k, pivot, p); + assert( p != -1 ); + } + + tsid = libhqr_queue_tile_head(&ts); + ttid = libhqr_queue_tile_pop(&tt); + + /* Unstack all element killed by TS, or by TT and already discovered */ + while( (tsid != -1) && + (tsid != ttid) ) + { + /* printf( "TS=%d, TT=%d\n", tsid, ttid ); */ + + tsid = libhqr_queue_tile_pop(&ts); + /* printf("POP TS: %d\n", tsid); */ + /* printf("Call function on (%d, %d)\n", + qrtree->currpiv(qrtree, k, tsid), tsid ); */ + assert(m < (qrtree->mt-1)); + tiles[m] = tsid; + m++; + tsid = libhqr_queue_tile_head(&ts); + } + pivot = p = ttid; + } + + //assert(m == (qrtree->mt-1)); + assert(ts == NULL); + assert(tt == NULL); +} + +/** + ******************************************************************************* + * + * @ingroup dplasma + * + * libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to + * libhqr_hqr_init(). + * + ******************************************************************************* + * + * @param[in,out] qrtree + * On entry, an allocated structure to destroy. + * On exit, the structure is destroy and cannot be used. + * + ******************************************************************************* + * + * @sa libhqr_hqr_init + * + ******************************************************************************/ +void +libhqr_finalize( libhqr_tree_t *qrtree ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + + if ( (qrtree->init != LIBHQR_QRTREE_HQR) && + (qrtree->init != LIBHQR_QRTREE_SVD) && + (qrtree->init != LIBHQR_QRTREE_SYS) && + (qrtree->init != LIBHQR_QRTREE_MTX) ) + { + fprintf(stderr, "libhqr_finalize: qrtree not initialized\n" ); + return; + } + + if (arg != NULL) { + + if ( (qrtree->init == LIBHQR_QRTREE_HQR) || + (qrtree->init == LIBHQR_QRTREE_HQR) ) + { + if ( arg->llvl != NULL) { + if ( arg->llvl->ipiv != NULL ) + free( arg->llvl->ipiv ); + free( arg->llvl ); + } + + if ( arg->hlvl != NULL) { + if ( arg->hlvl->ipiv != NULL ) + free( arg->hlvl->ipiv ); + free( arg->hlvl ); + } + + if ( arg->perm != NULL ) + free(arg->perm); + } + + free(arg); + } +} diff --git a/src/libhqr.c b/src/libhqr.c deleted file mode 100644 index f064e31feeef814ee6132201bb7b97eeae902bbc..0000000000000000000000000000000000000000 --- a/src/libhqr.c +++ /dev/null @@ -1,2856 +0,0 @@ -/** - * - * @file libhqr.c - * - * @copyright 2010-2017 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - * @version 1.0.0 - * @author Raphael Boucherie - * @author Mathieu Faverge - * @date 2017-03-21 - * - * This file contains all the function to describe the dependencies - * used in the Xgeqrf_param.jdf file. - * The QR factorization done with this file relies on three levels: - * - the first one is using a flat tree with TS kernels. The - * height of this tree is defined by the parameter 'a'. If 'a' - * is set to A->mt, the factorization is identical to the one - * perform by PLASMA_zgeqrf. - * For all subdiagonal "macro-tiles", the line reduced is always - * the first. For all diagonal "macro-tiles", the factorization - * performed is identical to the one performed by PLASMA_zgeqrf. - * - * - the third level is using a reduction tree of size 'p'. By - * default, the parameter 'p' should be equal to the number of - * processors used for the computation, but can be set - * differently. (see further example). The type of tree used at - * this level is defined by the hlvl parameter. It can be flat - * or greedy. - * CODE DETAILS: This tree and all the function related to it - * are performing a QR factorization on a band matrix with 'p' - * the size of the band. All the functions take global indices - * as input and return global indices as output. - * - * - Finally, a second 'low' level of reduction tree is applied. - * The size of this tree is induced by the parameters 'a' and 'p' - * from the first and third levels and is A->mt / ( p * a ). This - * tree is reproduced p times for each subset of tiles - * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. - * The tree used for the reduction is defined by the llvl - * parameter and can be: flat, greedy, fibonacci or binary. - * CODE DETAILS: For commodity, the size of this tree is always - * ceil(A->mt / (p * a) ) inducing some extra tests in the code. - * All the functions related to this level of tree take as input - * the local indices in the A->mt / (p*a) matrix and the global - * k. They return the local index. The reductions are so - * performed on a trapezoidal matrices where the step is defined - * by a: - * <- min( lhlvl_mt, min( mt, nt ) ) -> - * __a__ a a - * | |_____ - * | |_____ - * | |_____ - * llvl_mt = ceil(MT/ (a*p)) | |_____ - * | |_____ - * |___________________________________| - * - * - * - * At each step of the factorization, the lines of tiles are divided - * in 4 types: - * - QRPARAM_TILE_TS: They are the lines annihilated by a TS - * kernel, these lines are never used as an annihilator. They are - * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p - * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor - * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They - * are themselves annihilated by the TT kernel of the low level - * reduction tree. The roots of the local trees are the lines i, - * with i/p = k. - * - QRPARAM_TILE_DOMINO: These are the lines that are - * annhilihated with a domino effect in the band defined by (i/p) - * <= k and i >= k - * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the - * high level tree to reduce communications. - * These lines are defined by (i-k)/p = 0. - */ -#include "libhqr.h" -#include <assert.h> -#include <stdlib.h> -#include <stdio.h> -#include <math.h> -#include <string.h> - -#define PRINT_PIVGEN 0 -#ifdef PRINT_PIVGEN -#define myassert( test ) {if ( ! (test) ) return -1;} -#else -#define myassert(test) {assert((test)); return -1;} -#endif - -struct hqr_args_s; -typedef struct hqr_args_s hqr_args_t; - -struct hqr_subpiv_s; -typedef struct hqr_subpiv_s hqr_subpiv_t; - -/** - * @brief jhj - */ -struct hqr_args_s { - int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */ - int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */ - hqr_subpiv_t *llvl; - hqr_subpiv_t *hlvl; - int *perm; -}; - -struct hqr_subpiv_s { - /** - * currpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] m line you want to eliminate - * - * @return the annihilator p used with m at step k - */ - int (*currpiv)(const hqr_subpiv_t *arg, int k, int m); - /* - * nextpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] p line currently used as an annihilator - * @param[in] m line actually annihilated. - * m = MT to find the first time p is used as an annihilator during step k - * - * @return the next line m' using the line p as annihilator during step k - * mt if p will never be used again as an annihilator. - */ - int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m); - /* - * prevpiv - * @param[in] arg pointer to the qr_piv structure - * @param[in] k step in the factorization - * @param[in] p line currently used as an annihilator - * @param[in] m line actually annihilated. - * m = p to find the last time p has been used as an annihilator during step k - * - * @return the previous line m' using the line p as annihilator during step k - * mt if p has never been used before that as an annihilator. - */ - int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m); - int *ipiv; - int minMN; - int ldd; - int a; - int p; - int domino; -}; - -/* - * Common functions - */ -static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); -static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); -static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); -static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); - -/* Permutation */ -static void hqr_genperm ( libhqr_tree_t *qrtree ); -static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ); - -/* - * Subtree for low-level - */ -static void hqr_low_flat_init( hqr_subpiv_t *arg); -static void hqr_low_greedy_init( hqr_subpiv_t *arg, int minMN); -static void hqr_low_binary_init( hqr_subpiv_t *arg); -static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN); - - -/**************************************************** - * Reading functions - ************************************************** - * - * Common parameters for the following functions - * qrtree - tree used for the factorization - * k - Step k of the QR factorization - * m - line anhilated - */ - -int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - id = (k * qrtree->mt) + m; - return arg[id].type; -} - -int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m){ - int id, perm_m, p, a; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - perm_m = m; - p = qrtree->p; - a = qrtree->a; - myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) ); - myassert( (p==1) || (perm_m % p) == (m % p) ); - id = (k * qrtree->mt) + m; - return arg[id].currpiv; -} - -/* - * Extra parameter: - * p - line used as pivot - */ - -int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - int gmt = qrtree->mt; - myassert( m > p && p >= k ); - myassert( m == gmt || p == rdmtx_currpiv( qrtree, k, m ) ); - if(m == qrtree->mt){ - id = (k * qrtree->mt) + p; - return arg[id].first_nextpiv; - } - else{ - id = (k * qrtree->mt) + m; - return arg[id].nextpiv; - } -} - -int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ - int id; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - int gmt = qrtree->mt; - myassert( m >= p && p >= k && m < gmt ); - myassert( m == p || p == rdmtx_currpiv( qrtree, k, m ) ); - - if(m == p){ - id = (k * qrtree->mt) + p; - return arg[id].first_prevpiv; - } - else{ - id = (k * qrtree->mt) + m; - return arg[id].prevpiv; - } -} -/**************************************************** - * Common ipiv - *************************************************** - * - * Common prameters to the 4 following functions: - * a - Parameter a for the tunable QR - * p - Parameter p for the tunable QR - * k - Step k of the QR factorization - * - */ -#define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0 - -/* - * Extra parameter: - * gmt - Global number of tiles in a column of the complete distributed matrix - * Return: - * The number of geqrt to execute in the panel k - */ -static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { - int a = qrtree->a; - int p = qrtree->p; - int gmt = qrtree->mt; - int domino = qrtree->domino; - int pa = p * a; - int nb_1, nb_2, nb_3; - int nb_11, nb_12; - - /* Number of tasks of type 3 */ - nb_3 = p; - - /* Number of tasks of type 2 */ - if ( domino ) { - nb_2 = k * (p - 1); - - /* First multiple of p*a under the diagonal of step k */ - nb_11 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; - } - else { - /* Number of extra tile of type 1 between the the tile of type 3 and the first of nb11 */ - nb_2 = nbextra1_formula; - - /* First multiple of p*a under the diagonal of step 1 */ - nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; - } - - /* Last multiple of p*a lower than A->mt */ - nb_12 = ( gmt / pa ) * pa; - - /* Number of tasks of type 1 between nb_11 and nb_12 */ - nb_1 = (nb_12 - nb_11) / a; - - /* Add leftover */ - nb_1 += libhqr_imin( p, gmt - nb_12 ); - - return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); -} - -/* - * Extra parameter: - * i - indice of the geqrt in the continuous space - * Return: - * The global indice m of the i th geqrt in the panel k - */ -static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ) -{ - int a = qrtree->a; - int p = qrtree->p; - int domino = qrtree->domino; - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + (domino ? k*(p-1) : nbextra1 ); - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( i < nb23 ) - return k+i; - /* Tile of type 1 */ - else { - j = i - nb23; - pa = p * a; - if ( domino ) - pos1 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; - else - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - return pos1 + (j/p) * pa + j%p; - } -} - -/* - * Extra parameter: - * m - The global indice m of a geqrt in the panel k - * Return: - * The index i of the geqrt in the panel k - */ -static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ) -{ - int a = qrtree->a; - int p = qrtree->p; - int domino = qrtree->domino; - int lm = m; - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + ( domino ? k*(p-1) : nbextra1 ); - int end2 = p + ( domino ? k*p : k + nbextra1 ); - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( lm < end2 ) - return lm-k; - /* Tile of type 1 */ - else { - if ( domino ) - pos1 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa; - else - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - j = lm - pos1; - return nb23 + (j / pa) * p + j%pa; - } -} - -/* - * Extra parameter: - * m - The global indice m of the row in the panel k - * Return - * -1 - Error - * 0 - if m is reduced thanks to a TS kernel - * 1 - if m is reduced thanks to the low level tree - * 2 - if m is reduced thanks to the bubble tree - * 3 - if m is reduced in distributed - */ -static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) { - int a = qrtree->a; - int p = qrtree->p; - int domino = qrtree->domino; - - int lm = m; - myassert( lm >= k ); - - /* Element to be reduce in distributed */ - if (lm < k + p) { - return 3; - } - /* Element on the local diagonal */ - else if ( domino && lm < p * (k+1) ) - return 2; - /* Lower triangle of the matrix */ - else { - if( (lm / p) % a == 0 ) - return 1; - else - return 0; - } -} - -/**************************************************** - * HQR_LOW_FLAT_TREE - ***************************************************/ -static int hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - (void)m; - if ( arg->domino ) - return k / arg->a; - else - return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ; -}; - -static int hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - -#ifdef FLAT_UP - if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) ) - return start_pa-1; - else -#else /* FLAT_DOWN */ - if ( start_pa <= p_pa ) - return arg->ldd; - - if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { - if ( start_pa == arg->ldd ) - return p_pa+1; - else if ( start_pa < arg->ldd ) - return start_pa+1; - } -#endif - return arg->ldd; -} - -static int hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - -#ifdef FLAT_UP - if ( p_pa == k_a && (start_pa+1 < arg->ldd) ) - return start_pa+1; - else -#else - if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { - if ( start_pa == p_pa ) - return arg->ldd - 1; - else if ( start_pa > p_pa + 1 ) - return start_pa-1; - } -#endif - return arg->ldd; -}; - -static void hqr_low_flat_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_low_flat_currpiv; - arg->nextpiv = hqr_low_flat_nextpiv; - arg->prevpiv = hqr_low_flat_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_LOW_BINARY_TREE - ***************************************************/ -static int hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a; - int m_pa = (m / arg->p ) / arg->a; - - int tmp1 = m_pa - k_a; - int tmp2 = 1; - (void)arg; - - if ( tmp1 == 0) - return 0; - while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { - tmp1 = tmp1 >> 1; - tmp2 = tmp2 << 1; - } - assert( m_pa - tmp2 >= k_a ); - return m_pa - tmp2; -}; - -static int hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - - int tmpp, bit; - myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) ); - - if ( start_pa <= p_pa ) - return arg->ldd; - - int offset = p_pa-k_a; - bit = 0; - if (start_pa != arg->ldd) { - while( ( (start_pa-k_a) & (1 << bit ) ) == 0 ) - bit++; - bit++; - } - - tmpp = offset | (1 << bit); - if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) ) - return tmpp + k_a; - else - return arg->ldd; -}; - -static int hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) -{ - int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; - int p_pa = (p / arg->p ) / arg->a; - int offset = p_pa - k_a; - - myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino || - hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) ); - - if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) { - int i, bit, tmp; - if ((p_pa - k_a) == 0) - bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) ); - else { - bit = 0; - while( (offset & (1 << bit )) == 0 ) - bit++; - } - for( i=bit; i>-1; i--){ - tmp = offset | (1 << i); - if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) ) - return tmp+k_a; - } - return arg->ldd; - } - - if ( (start_pa - p_pa) > 1 ) - return p_pa + ( (start_pa - p_pa) >> 1 ); - else { - return arg->ldd; - } -}; - -static void hqr_low_binary_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_low_binary_currpiv; - arg->nextpiv = hqr_low_binary_nextpiv; - arg->prevpiv = hqr_low_binary_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_LOW_FIBONACCI_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a; - return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = (p / qrpiv->p ) / qrpiv->a; - - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = (p / qrpiv->p ) / qrpiv->a; - - for( i=start_pa-1; i>k_a; i-- ) - if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) - return i; - return (qrpiv->ldd); -} - -static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt; - - arg->currpiv = hqr_low_fibonacci_currpiv; - arg->nextpiv = hqr_low_fibonacci_nextpiv; - arg->prevpiv = hqr_low_fibonacci_prevpiv; - - mt = arg->ldd; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); - - /* - * Fibonacci of order 1: - * u_(n+1) = u_(n) + 1 - */ - { - int f1, k, m; - - /* Fill in the first column */ - f1 = 1; - for (m=1; m < mt; ) { - for (k=0; (k < f1) && (m < mt); k++, m++) { - ipiv[m] = m - f1; - } - f1++; - } - - for( k=1; k<minMN; k++) { - for(m=k+1; m < mt; m++) { - ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1; - } - } - } -}; - -/**************************************************** - * HQR_LOW_GREEDY_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - if (qrpiv->domino) - return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; - else - return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) - + ( ( m / qrpiv->p ) / qrpiv->a ) ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int p_pa = p / qrpiv->p / qrpiv->a; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int pa = qrpiv->p * qrpiv->a; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = p / pa; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - - return (qrpiv->ldd); -} - -static void hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, a, p, pa, domino; - - arg->currpiv = hqr_low_greedy_currpiv; - arg->nextpiv = hqr_low_greedy_nextpiv; - arg->prevpiv = hqr_low_greedy_prevpiv; - - mt = arg->ldd; - a = arg->a; - p = arg->p; - pa = p * a; - domino = arg->domino; - - if ( domino ) - { - int j, k, height, start, end, firstk = 0; - int *nT, *nZ; - - arg->minMN = libhqr_imin( minMN, mt*a ); - minMN = arg->minMN; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); - - nT = (int*)malloc(minMN*sizeof(int)); - nZ = (int*)malloc(minMN*sizeof(int)); - - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - - k = 0; - while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - ( firstk / a ) ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - if ( (( firstk % a) != a-1 ) - && ( firstk < minMN-1 ) ) - nT[firstk+1]++; - firstk++; - } - k = firstk; - continue; - } - - if (k < minMN-1) nT[k+1] += height; - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - - for( j=start; j > end; j-- ) { - ipiv[ k*mt + j ] = (j - height); - } - - k++; - if (k > minMN-1) k = firstk; - } - - free(nT); - free(nZ); - } - else - { - int j, k, myrank, height, start, end, firstk = 0; - int *nT2DO = (int*)malloc(minMN*sizeof(int)); - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*mt*p); - - for ( myrank=0; myrank<p; myrank++ ) { - int lminMN = minMN; - - memset( nT2DO, 0, minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - - for(k=0; k<lminMN; k++) { - nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - if ( nT2DO[k] == 0 ) { - lminMN = k; - break; - } - } - - k = 0; firstk = 0; - while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) && - ( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) ) - && ( firstk < lminMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < lminMN) && - ( nT[firstk] == nT2DO[firstk] ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - if ( ( firstk < lminMN-1 ) && - (( (firstk) % pa) != ((a-1)*p+myrank) ) ) - nT[firstk+1]++; - firstk++; - } - k = firstk; - continue; - } - - if (k < lminMN-1) nT[k+1] += height; - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - - for( j=start; j > end; j-- ) { - ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); - } - - k++; - if (k > lminMN-1) k = firstk; - } - } - - free(nT2DO); - free(nT); - free(nZ); - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*mt + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += mt*minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k*mt + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * HQR_LOW_GREEDY1P_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - if (qrpiv->domino) - return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; - else - return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) - + ( ( m / qrpiv->p ) / qrpiv->a ) ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int p_pa = p / qrpiv->p / qrpiv->a; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa+1; i<(qrpiv->ldd); i++ ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - return i; - } - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { - int i; - int pa = qrpiv->p * qrpiv->a; - int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; - int p_pa = p / pa; - int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; - - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) - return i; - - return (qrpiv->ldd); -} - -static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, a, p, pa, domino; - int j, k, height, start, end, nT, nZ; - - arg->currpiv = hqr_low_greedy1p_currpiv; - arg->nextpiv = hqr_low_greedy1p_nextpiv; - arg->prevpiv = hqr_low_greedy1p_prevpiv; - - mt = arg->ldd; - a = arg->a; - p = arg->p; - pa = p * a; - domino = arg->domino; - - /* This section has not been coded yet, and will perform a classic greedy */ - if ( domino ) - { - arg->minMN = libhqr_imin( minMN, mt*a ); - minMN = arg->minMN; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, mt*minMN*sizeof(int)); - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++) { - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - (k / a), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ k*mt + j ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - else - { - int myrank; - end = 0; - - arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*mt*p); - - for ( myrank=0; myrank<p; myrank++ ) { - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++) { - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - /* No more computations on this node */ - if ( nT == 0 ) { - break; - } - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*mt + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += mt*minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k*mt + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * HQR_HIGH_FLAT_TREE - ***************************************************/ -static int hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - (void)arg; - (void)m; - return k; -}; - -static int hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - if ( p == k && arg->ldd > 1 ) { - if ( start == arg->ldd ) - return p+1; - else if ( start < arg->ldd && (start-k < arg->p-1) ) - return start+1; - } - return arg->ldd; -}; - -static int hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - assert( arg->p > 1 ); - if ( p == k && arg->ldd > 1 ) { - if ( start == p && p != arg->ldd-1 ) - return libhqr_imin( p + arg->p - 1, arg->ldd - 1 ); - else if ( start > p + 1 && (start-k < arg->p)) - return start-1; - } - return arg->ldd; -}; - -static void hqr_high_flat_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_high_flat_currpiv; - arg->nextpiv = hqr_high_flat_nextpiv; - arg->prevpiv = hqr_high_flat_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_HIGH_BINARY_TREE - ***************************************************/ -static int hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - int tmp1 = m - k; - int tmp2 = 1; - (void)arg; - - if ( tmp1 == 0) - return 0; - while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { - tmp1 = tmp1 >> 1; - tmp2 = tmp2 << 1; - } - assert( m - tmp2 >= k ); - return m - tmp2; -}; - -static int hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int tmpp, bit; - myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) ); - - if ( start <= p ) - return arg->ldd; - - int offset = p - k; - bit = 0; - if (start != arg->ldd) { - while( ( (start - k) & (1 << bit ) ) == 0 ) - bit++; - bit++; - } - - tmpp = offset | (1 << bit); - if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) ) - return tmpp + k; - else - return arg->ldd; -}; - - -static int hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int offset = p - k; - - myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) ); - - if ( (start == p) && ( offset%2 == 0 ) ) { - int i, bit, tmp; - if ( offset == 0 ) - bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) ); - else { - bit = 0; - while( (offset & (1 << bit )) == 0 ) - bit++; - } - for( i=bit; i>-1; i--){ - tmp = offset | (1 << i); - if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) ) - return tmp+k; - } - return arg->ldd; - } - - if ( (start - p) > 1 ) - return p + ( (start - p) >> 1 ); - else { - return arg->ldd; - } -}; - -static void hqr_high_binary_init(hqr_subpiv_t *arg){ - arg->currpiv = hqr_high_binary_currpiv; - arg->nextpiv = hqr_high_binary_nextpiv; - arg->prevpiv = hqr_high_binary_prevpiv; - arg->ipiv = NULL; -}; - -/**************************************************** - * HQR_HIGH_FIBONACCI_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { - return (qrpiv->ipiv)[ m-k ] + k; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { - int i; - myassert( p >= k && start >= p && start-k <= qrpiv->p); - - int lp = p - k; - int lstart= start - k; - int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p); - for( i=lstart+1; i<end; i++ ) - if ( (qrpiv->ipiv)[i] == lp ) - return i+k; - return qrpiv->ldd; -} - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) { - int i; - myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) ); - - for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- ) - if ( (qrpiv->ipiv)[i] == (p-k) ) - return i + k; - return (qrpiv->ldd); -} - -static void hqr_high_fibonacci_init(hqr_subpiv_t *arg){ - int *ipiv; - int p; - - arg->currpiv = hqr_high_fibonacci_currpiv; - arg->nextpiv = hqr_high_fibonacci_nextpiv; - arg->prevpiv = hqr_high_fibonacci_prevpiv; - - p = arg->p; - - arg->ipiv = (int*)malloc( p * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*sizeof(int)); - - /* - * Fibonacci of order 1: - * u_(n+1) = u_(n) + 1 - */ - { - int f1, k, m; - - /* Fill in the first column */ - f1 = 1; - for (m=1; m < p; ) { - for (k=0; (k < f1) && (m < p); k++, m++) { - ipiv[m] = m - f1; - } - f1++; - } - } -}; - -/**************************************************** - * HQR_HIGH_GREEDY_TREE (1 panel duplicated) - ***************************************************/ -static void hqr_high_greedy1p_init(hqr_subpiv_t *arg){ - int *ipiv; - int mt, p; - - arg->currpiv = hqr_high_fibonacci_currpiv; - arg->nextpiv = hqr_high_fibonacci_nextpiv; - arg->prevpiv = hqr_high_fibonacci_prevpiv; - - mt = arg->ldd; - p = arg->p; - - arg->ipiv = (int*)malloc( p * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*sizeof(int)); - - { - int minMN = 1; - int j, k, height, start, end, firstk = 0; - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - nZ[0] = libhqr_imax( mt - p, 0 ); - for(k=1; k<minMN; k++) { - height = libhqr_imax(mt-k-p, 0); - nT[k] = height; - nZ[k] = height; - } - - k = 0; - while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - firstk ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - firstk++; - } - k = firstk; - continue; - } - - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - if (k < minMN-1) nT[k+1] = nZ[k]; - - for( j=start; j > end; j-- ) { - ipiv[ k*p + j-k ] = (j - height); - } - - k++; - if (k > minMN-1) k = firstk; - } - - free(nT); - free(nZ); - } -}; - -/**************************************************** - * HQR_HIGH_GREEDY_TREE - ***************************************************/ -static int hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m) -{ - myassert( m >= k && m < k+arg->p ); - return (arg->ipiv)[ k * (arg->p) + (m - k) ]; -}; - -static int hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int i; - myassert( (start >= k && start < k+arg->p) || start == arg->ldd ); - for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- ) - if ( (arg->ipiv)[i-k + k* (arg->p)] == p ) - return i; - return (arg->ldd); -}; - -static int hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start) -{ - int i; - myassert( (start >= k && start < k+arg->p) || start == p ); - for( i=start-k+1; i<arg->p; i++ ) - if ( (arg->ipiv)[i + k * (arg->p)] == p ) - return k+i; - return arg->ldd; -}; - -static void hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN){ - int *ipiv; - int mt, p; - - arg->currpiv = hqr_high_greedy_currpiv; - arg->nextpiv = hqr_high_greedy_nextpiv; - arg->prevpiv = hqr_high_greedy_prevpiv; - - mt = arg->ldd; - p = arg->p; - - arg->ipiv = (int*)malloc( p * minMN * sizeof(int) ); - ipiv = arg->ipiv; - memset(ipiv, 0, p*minMN*sizeof(int)); - - { - int j, k, height, start, end, firstk = 0; - int *nT = (int*)malloc(minMN*sizeof(int)); - int *nZ = (int*)malloc(minMN*sizeof(int)); - memset( nT, 0, minMN*sizeof(int)); - memset( nZ, 0, minMN*sizeof(int)); - - nT[0] = mt; - nZ[0] = libhqr_imax( mt - p, 0 ); - for(k=1; k<minMN; k++) { - height = libhqr_imax(mt-k-p, 0); - nT[k] = height; - nZ[k] = height; - } - - k = 0; - while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) && - ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) - && ( firstk < minMN ) ) { - height = (nT[k] - nZ[k]) / 2; - if ( height == 0 ) { - while ( (firstk < minMN) && - ( nT[firstk] == mt - firstk ) && - ( nZ[firstk]+1 == nT[firstk] ) ) { - firstk++; - } - k = firstk; - continue; - } - - start = mt - nZ[k] - 1; - end = start - height; - nZ[k] += height; - if (k < minMN-1) nT[k+1] = nZ[k]; - - for( j=start; j > end; j-- ) { - ipiv[ k*p + j-k ] = (j - height); - } - - k++; - if (k > minMN-1) k = firstk; - } - - free(nT); - free(nZ); - } -}; - -/**************************************************** - * - * Generic functions currpiv,prevpiv,nextpiv - * - ***************************************************/ -static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, tmpk; - int lm, rank; - int a = qrtree->a; - int p = qrtree->p; - int domino = qrtree->domino; - int gmt = qrtree->mt; - - lm = m / p; /* Local index in the distribution over p domains */ - rank = m % p; /* Staring index in this distribution */ - - /* TS level common to every case */ - if ( domino ) { - switch( hqr_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - if ( tmp == k / a ) - return k * p + rank ; /* Below to the first bloc including the diagonal */ - else - return tmp * a * p + rank ; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank; - break; - case 2: - return m - p; - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - } - else { - switch( hqr_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ; - break; - case 2: - return m - p; - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - } -}; - -/** - * hqr_nextpiv - Computes the next row killed by the row p, after - * it has kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the next line killed by p after start - * start must be equal to A.mt to find the first row killed by p. - * if start != A.mt, start must be killed by p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the following row killed by p if it exists, A->mt otherwise - */ -static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int a = qrtree->a; - int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - - /* Local index in the distribution over p domains */ - lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p; - - myassert( start > pivot && pivot >= k ); - myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) ); - - /* TS level common to every case */ - ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1; - lp = hqr_gettype( qrtree, k, pivot ); - - switch( ls ) - { - case -1: - - if ( lp == LIBHQR_KILLED_BY_TS ) { - myassert( start == gmt ); - return gmt; - } - - case LIBHQR_KILLED_BY_TS: - - /* If the tile is over the diagonal of step k, skip directly to type 2 */ - if ( arg->domino && lpivot < k ) - goto next_2; - - if ( start == gmt ) - nextp = pivot + p; - else - nextp = start + p; - - if ( ( nextp < gmt ) && - ( nextp < pivot + a*p ) && - ( (nextp/p)%a != 0 ) ) - return nextp; - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_LOCALTREE: - - /* If the tile is over the diagonal of step k, skip directly to type 2 */ - if ( arg->domino && lpivot < k ) - goto next_2; - - /* Get the next pivot for the low level tree */ - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == arg->llvl->ldd-1) ) - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != arg->llvl->ldd ) - return tmp * a * p + rpivot; - - next_2: - /* no next of type 1, we reset start to search the next 2 */ - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_DOMINO: - - if ( lp < LIBHQR_KILLED_BY_DOMINO ) { - return gmt; - } - - /* Type 2 are killed only once if they are strictly in the band */ - if ( arg->domino && - (start == gmt) && - (lpivot < k) && - (pivot+p < gmt) ) { - return pivot+p; - } - - /* no next of type 2, we reset start to search the next 3 */ - start = gmt; - lstart = arg->llvl->ldd * a; - - case LIBHQR_KILLED_BY_DISTTREE: - - if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { - return gmt; - } - - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - default: - return gmt; - } -} - -/** - * hqr_prevpiv - Computes the previous row killed by the row p, before - * to kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the previous line killed by p before start - * start must be killed by p, and start must be greater or equal to p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the previous row killed by p if it exists, A->mt otherwise - */ -static int -hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int a = qrtree->a; - int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - lstart = start / p; /* Local index in the distribution over p domains */ - - myassert( start >= pivot && pivot >= k && start < gmt ); - myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) ); - - /* T Slevel common to every case */ - ls = hqr_gettype( qrtree, k, start ); - lp = hqr_gettype( qrtree, k, pivot ); - - if ( lp == LIBHQR_KILLED_BY_TS ) - return gmt; - - myassert( lp >= ls ); - switch( ls ) - { - case LIBHQR_KILLED_BY_DISTTREE: - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - start = pivot; - lstart = pivot / p; - - case LIBHQR_KILLED_BY_DOMINO: - /* If the tile is over the diagonal of step k, process it as type 2 */ - if ( arg->domino && lpivot < k ) { - - if ( ( start == pivot ) && - (start+p < gmt ) ) - return start+p; - - if ( lp > LIBHQR_KILLED_BY_LOCALTREE ) - return gmt; - } - - start = pivot; - lstart = pivot / p; - - /* If it is the 'local' diagonal block, we go to 1 */ - - case LIBHQR_KILLED_BY_LOCALTREE: - /* If the tile is over the diagonal of step k and is of type 2, - it cannot annihilate type 0 or 1 */ - if ( arg->domino && lpivot < k ) - return gmt; - - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == arg->llvl->ldd-1) ) - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != arg->llvl->ldd ) - return tmp * a * p + rpivot; - - start = pivot; - - case LIBHQR_KILLED_BY_TS: - /* Search for predecessor in TS tree */ - /* if ( ( start+p < gmt ) && */ - /* ( (((start+p) / p) % a) != 0 ) ) */ - /* return start + p; */ - - if ( start == pivot ) { - tmp = lpivot + a - 1 - lpivot%a; - nextp = tmp * p + rpivot; - - while( pivot < nextp && nextp >= gmt ) - nextp -= p; - } else { - nextp = start - p; /*(lstart - 1) * p + rpivot;*/ - } - assert(nextp < gmt); - if ( pivot < nextp ) - return nextp; - - default: - return gmt; - } -}; - -/**************************************************** - * - * Generate the permutation required for the round-robin on TS - * - ***************************************************/ -static void -hqr_genperm( libhqr_tree_t *qrtree ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int m = qrtree->mt; - int n = qrtree->nt; - int a = qrtree->a; - int p = qrtree->p; - int domino = arg->domino; - int minMN = libhqr_imin( m, n ); - int pa = p * a; - int i, j, k; - int nbextra1; - int end2; - int mpa = m % pa; - int endpa = m - mpa; - int *perm; - - arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) ); - perm = arg->perm; - - if ( arg->tsrr ) { - for(k=0; k<minMN; k++) { - for( i=0; i<m+1; i++) { - perm[i] = -1; - } - perm += m+1; - } - perm = arg->perm; - for(k=0; k<minMN; k++) { - nbextra1 = nbextra1_formula; - - end2 = p + ( domino ? k*p : k + nbextra1 ); - end2 = (( end2 + pa - 1 ) / pa ) * pa; - end2 = libhqr_imin( end2, m ); - - /* - * All tiles of type 3, 2 and: - * - 1 when domino is disabled - * - 0 before the first multiple of pa under the considered diagonal - */ - for( i=k; i<end2; i++) { - perm[i] = i; - } - - /* All permutations in groups of pa tiles */ - assert( i%pa == 0 || i>=endpa); - for( ; i<endpa; i+=pa ) { - for(j=0; j<pa; j++) { - perm[i+j] = i + ( j + p * (k%a) )%pa; - } - } - - /* Last group of tiles */ - for( ; i<m; i++) { - perm[i] = i; - } - perm[m] = m; - perm += m+1; - } - } - else { - for(k=0; k<minMN; k++) { - for( i=0; i<m+1; i++) { - perm[i] = i; - } - perm += m+1; - } - } -} - -static inline int -hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m ) -{ - int gmt = qrtree->mt + 1; - int a = qrtree->a; - int p = qrtree->p; - int pa = p * a; - int start = m / pa * pa; - int stop = libhqr_imin( start + pa, gmt ) - start; - int i; - - if (a == 1) - return m; - - for ( i=m%p; i < stop; i+=p ) { - //if( perm[i] == m ) - if( i == m ) - return i+start; - } - - /* We should never arrive here */ - myassert( 0 ); -} - -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_hqr_init - Creates the tree structure that will describes the - * operation performed during QR/LQ factorization with parameterized QR/LQ - * algorithms family. - * - * Trees available parameters are described below. It is recommended to: - * - set p to the same value than the P-by-Q process grid used to distribute - * the data. (P for QR factorization, Q for LQ factorization). - * - set the low level tree to LIBHQR_GREEDY_TREE. - * - set the high level tree to: - * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides - * by two the volume of communication of any other tree. - * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or - * small and fat (LQ), because it reduces the critical path length. - * - Disable the domino effect when problem is square, to keep high efficiency - * kernel proportion high. - * - Enable the domino effect when problem is tall and skinny (QR) or - * small and fat (LQ) to increase parallelism and reduce critical path length. - * - Round-robin on TS domain (tsrr) option should be disabled. It is - * experimental and is not safe. - * - * These are the default when a parameter is set to -1; - * - * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf - * - ******************************************************************************* - * - * @param[inout] qrtree - * On entry, an allocated structure uninitialized. - * On exit, the structure initialized according to the parameter given. - * - * @param[in] trans - * @arg QR: Structure is initialized for QR factorization. - * @arg LQ: Structure is initialized for LQ factorization. - * - * @param[in] A - * Descriptor of the distributed matrix A to be factorized, on which - * QR/LQ factorization will be performed. - * The descriptor is untouched and only mt/nt/P parameters are used. - * - * @param[in] type_llvl - * Defines the tree used to reduce the main tiles of each local domain - * together. The matrix of those tiles has a lower triangular structure - * with a diagonal by step a. - * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local - * tiles. - * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local - * tiles. - * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the - * local tiles. - * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local - * tiles. - * @arg -1: The default is used (LIBHQR_GREEDY_TREE) - * - * @param[in] type_hlvl - * Defines the tree used to reduce the main tiles of each domain. This - * is a band lower diagonal matrix of width p. - * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. - * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the - * tiles. - * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first - * column and then duplicated on all others. - * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) - * - * @param[in] a - * Defines the size of the local domains on which a classic flat TS - * tree is performed. If a==1, then all local tiles are reduced by the - * type_lllvl tree. If a is larger than mt/p, then no local reduction - * tree is performed and type_llvl is ignored. - * If a == -1, it is set to 4 by default. - * - * @param[in] p - * Defines the number of distributed domains, ie the width of the high - * level reduction tree. If p == 1, no high level reduction tree is - * used. If p == mt, a and type_llvl are ignored since only high level - * reduction are performed. - * By default, it is recommended to set p to P if trans == - * PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to - * distributed the data. (p > 0) - * - * @param[in] domino - * Enable/disable the domino effect that connects the high and low - * level reduction trees. Enabling the domino increases the proportion - * of TT (Triangle on top of Triangle) kernels that are less efficient, - * but increase the pipeline effect between independent factorization - * steps, reducin the critical path length. - * If disabled, it keeps the proprotion of more efficient TS (Triangle - * on top of Square) kernels high, but deteriorates the pipeline - * effect, and the critical path length. - * If domino == -1, it is enable when ration between M and N is lower - * than 1/2, and disabled otherwise. - * - * @param[in] tsrr - * Enable/Disable a round robin selection of the killer in local - * domains reduced by TS kernels. Enabling a round-robin selection of - * the killer allows to take benefit of the good pipelining of the flat - * trees and the high efficient TS kernels, while having other trees on - * top of it to reduce critical path length. - * WARNING: This option is under development and should not be enabled - * due to problem in corner cases. - * - ******************************************************************************* - * - * @retval -i if the ith parameters is incorrect. - * @retval 0 on success. - * - ******************************************************************************* - * - * @sa libhqr_hqr_finalize - * @sa libhqr_systolic_init - * - ******************************************************************************/ -int -libhqr_hqr_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int type_llvl, int type_hlvl, - int a, int p, - int domino, int tsrr ) -{ - libhqr_tree_t qrtree_init; - double ratio = 0.0; - int low_mt, minMN; - hqr_args_t *arg; - - if (qrtree == NULL) { - fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree"); - return -1; - } - if ((trans != LIBHQR_QR) && - (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_hqr_init, illegal value of trans"); - return -2; - } - if (A == NULL) { - fprintf(stderr, "libhqr_hqr_init, illegal value of A"); - return -3; - } - - /* Compute parameters */ - a = (a == -1) ? 4 : libhqr_imax( a, 1 ); - p = libhqr_imax( p, 1 ); - - /* Domino */ - if ( domino >= 0 ) { - domino = domino ? 1 : 0; - } - else { - if (trans == LIBHQR_QR) { - ratio = ((double)(A->nt) / (double)(A->mt)); - } else { - ratio = ((double)(A->mt) / (double)(A->nt)); - } - if ( ratio >= 0.5 ) { - domino = 0; - } else { - domino = 1; - } - } - - minMN = libhqr_imin(A->mt, A->nt); - - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.domino = domino; - qrtree_init.getnbgeqrf = hqr_getnbgeqrf; - qrtree_init.getm = hqr_getm; - qrtree_init.geti = hqr_geti; - qrtree_init.gettype = hqr_gettype; - qrtree_init.currpiv = hqr_currpiv; - qrtree_init.nextpiv = hqr_nextpiv; - qrtree_init.prevpiv = hqr_prevpiv; - - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = minMN; - - a = libhqr_imin( a, qrtree_init.mt ); - - qrtree_init.a = a; - qrtree_init.p = p; - qrtree_init.args = NULL; - - arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); - arg->domino = domino; - arg->tsrr = tsrr; - arg->perm = NULL; - - arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->hlvl = NULL; - - low_mt = (qrtree_init.mt + p * a - 1) / ( p * a ); - - arg->llvl->minMN = minMN; - arg->llvl->ldd = low_mt; - arg->llvl->a = a; - arg->llvl->p = p; - arg->llvl->domino = domino; - - switch( type_llvl ) { - case LIBHQR_FLAT_TREE : - hqr_low_flat_init(arg->llvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_low_fibonacci_init(arg->llvl, minMN); - break; - case LIBHQR_BINARY_TREE : - hqr_low_binary_init(arg->llvl); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_low_greedy1p_init(arg->llvl, minMN); - break; - case LIBHQR_GREEDY_TREE : - default: - hqr_low_greedy_init(arg->llvl, minMN); - } - - if ( p > 1 ) { - arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - - arg->llvl->minMN = minMN; - arg->hlvl->ldd = qrtree_init.mt; - arg->hlvl->a = a; - arg->hlvl->p = p; - arg->hlvl->domino = domino; - - switch( type_hlvl ) { - case LIBHQR_FLAT_TREE : - hqr_high_flat_init(arg->hlvl); - break; - case LIBHQR_GREEDY_TREE : - hqr_high_greedy_init(arg->hlvl, minMN); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_high_greedy1p_init(arg->hlvl); - break; - case LIBHQR_BINARY_TREE : - hqr_high_binary_init(arg->hlvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_high_fibonacci_init(arg->hlvl); - break; - default: - if ( ratio >= 0.5 ) { - hqr_high_flat_init(arg->hlvl); - } else { - hqr_high_fibonacci_init(arg->hlvl); - } - } - } - - qrtree_init.args = (void*)arg; - hqr_genperm( &qrtree_init ); - } - - /* Initialize the final QR tree */ - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); - qrtree->getnbgeqrf = hqr_getnbgeqrf; - qrtree->getm = hqr_getm; - qrtree->geti = hqr_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); - - /* Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); - - /* Free the initial qrtree */ - libhqr_hqr_finalize( &qrtree_init ); - - return 0; -} - -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to - * libhqr_hqr_init(). - * - ******************************************************************************* - * - * @param[in,out] qrtree - * On entry, an allocated structure to destroy. - * On exit, the structure is destroy and cannot be used. - * - ******************************************************************************* - * - * @sa libhqr_hqr_init - * - ******************************************************************************/ -void -libhqr_hqr_finalize( libhqr_tree_t *qrtree ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - - if (arg != NULL) { - if ( arg->llvl != NULL) { - if ( arg->llvl->ipiv != NULL ) - free( arg->llvl->ipiv ); - free( arg->llvl ); - } - - if ( arg->hlvl != NULL) { - if ( arg->hlvl->ipiv != NULL ) - free( arg->hlvl->ipiv ); - free( arg->hlvl ); - } - - if ( arg->perm != NULL ) - free(arg->perm); - - free(arg); - } -} - - -/* - * Common functions - */ -static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); -static int svd_getm( const libhqr_tree_t *qrtree, int k, int i ); -static int svd_geti( const libhqr_tree_t *qrtree, int k, int m ); -static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m ); - -#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) ) -#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] ) - -/* - * Extra parameter: - * gmt - Global number of tiles in a column of the complete distributed matrix - * Return: - * The number of geqrt to execute in the panel k - */ -static int -svd_getnbgeqrf( const libhqr_tree_t *qrtree, - int k ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int gmt = qrtree->mt; - int a = svd_geta(arg, k); - int pa = p * a; - int nb_1, nb_2, nb_3; - int nb_11, nb_12; - - /* Number of tasks of type 3 */ - nb_3 = p; - - /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ - nb_2 = nbextra1_formula; - - /* First multiple of p*a under the diagonal of step 1 */ - nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; - - /* Last multiple of p*a lower than A->mt */ - nb_12 = ( gmt / pa ) * pa; - - /* Number of tasks of type 1 between nb_11 and nb_12 */ - nb_1 = (nb_12 - nb_11) / a; - - /* Add leftover */ - nb_1 += libhqr_imin( p, gmt - nb_12 ); - - return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); -} - -/* - * Extra parameter: - * i - indice of the geqrt in the continuous space - * Return: - * The global indice m of the i th geqrt in the panel k - */ -static int -svd_getm( const libhqr_tree_t *qrtree, - int k, int i ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + nbextra1; - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( i < nb23 ) - return k+i; - /* Tile of type 1 */ - else { - j = i - nb23; - pa = p * a; - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - return pos1 + (j/p) * pa + j%p; - } -} - -/* - * Extra parameter: - * m - The global indice m of a geqrt in the panel k - * Return: - * The index i of the geqrt in the panel k - */ -static int -svd_geti( const libhqr_tree_t *qrtree, - int k, int m ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - int pos1, j, pa = p * a; - int nbextra1 = nbextra1_formula; - int nb23 = p + nbextra1; - int end2 = p + k + nbextra1; - - /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ - if ( m < end2 ) - return m-k; - /* Tile of type 1 */ - else { - pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; - j = m - pos1; - return nb23 + (j / pa) * p + j%pa; - } -} - -/* - * Extra parameter: - * m - The global indice m of the row in the panel k - * Return - * -1 - Error - * 0 - if m is reduced thanks to a TS kernel - * 1 - if m is reduced thanks to the low level tree - * 2 - if m is reduced thanks to the bubble tree - * 3 - if m is reduced in distributed - */ -static int -svd_gettype( const libhqr_tree_t *qrtree, - int k, int m ) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int p = qrtree->p; - int a = svd_geta(arg, k); - - /* Element to be reduce in distributed */ - if (m < k + p) { - return 3; - } - /* Lower triangle of the matrix */ - else { - if( (m / p) % a == 0 ) - return 1; - else - return 0; - } -} - -/**************************************************** - * SVD_LOW_ADAPTIV_TREE - ***************************************************/ -/* Return the pivot to use for the row m at step k */ -inline static int -svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg, - int k, int m ) -{ - int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd; - int a = ipiv[0]; - - ipiv+=2; - return ipiv[ ( m / arg->p ) / a ]; -} - -/* Return the last row which has used the row m as a pivot in step k before the row start */ -inline static int -svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg, - int k, int p, int start_pa ) -{ - int i; - int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd; - int a = ipiv[0]; - int ldd = ipiv[1]; - int p_pa = p / arg->p / a; - - ipiv+=2; - for( i=start_pa+1; i<ldd; i++ ) - if ( ipiv[i] == p_pa ) - return i; - return i; -} - -/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ -inline static int -svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg, - int k, int p, int start_pa ) -{ - int i; - int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd; - int a = ipiv[0]; - int ldd = ipiv[1]; - int pa = arg->p * a; - int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a; - int p_pa = p / pa; - - ipiv+=2; - for( i=start_pa-1; i> k_a; i-- ) - if ( ipiv[i] == p_pa ) - return i; - - return ldd; -} - -static void -svd_low_adaptiv_init(hqr_subpiv_t *arg, - int gmt, int gnt, int nbcores, int ratio) -{ - int *ipiv; - int mt, a, p, pa, maxmt, myrank; - int j, k, height, start, end, nT, nZ; - int minMN = libhqr_imin(gmt, gnt); - - arg->currpiv = svd_low_adaptiv_currpiv; - arg->nextpiv = svd_low_adaptiv_nextpiv; - arg->prevpiv = svd_low_adaptiv_prevpiv; - - p = arg->p; - - end = 0; - - /** - * Compute the local greedy tree of each column, on each node - */ - maxmt = 1; - for(k=0; k<minMN; k++) { - /** - * The objective is to have at least two columns of TS to reduce per - * core, so it must answer the following inequality: - * ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores ); - * so, - * a <= mt * (gnt-k) / (ratio * nbcores ) - */ - height = libhqr_iceil( gmt-k, p ); - a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); - - /* Now let's make sure all sub-parts are equilibrate */ - j = libhqr_iceil( height, a ); - a = libhqr_iceil( gmt-k, j ); - - /* Compute max dimension of the tree */ - mt = libhqr_iceil( gmt, p * a ); - maxmt = libhqr_imax( mt, maxmt ); - } - - arg->ldd = maxmt + 2; - arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p ); - ipiv = arg->ipiv; - - memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p ); - - for ( myrank=0; myrank<p; myrank++ ) { - - /** - * Compute the local greedy tree of each column, on each node - */ - for(k=0; k<minMN; k++, ipiv += arg->ldd) { - /** - * The objective is to have at least two columns of TS to reduce per - * core, so it must answer the following inequality: - * (ldd / a ) * (gnt-k) >= ( ratio * nbcores ); - * so, - * a <= mt * (gnt-k) / (ratio * nbcores ) - */ - height = libhqr_iceil( gmt-k, p ); - a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); - - /* Now let's make sure all sub-parts are equilibrate */ - j = libhqr_iceil( height, a ); - a = libhqr_iceil( gmt-k, j ); - - pa = p * a; - mt = libhqr_iceil( gmt, pa ); - ipiv[0] = a; - ipiv[1] = mt; - - assert( a > 0 ); - assert( mt < arg->ldd-1 ); - - /* Number of tiles to factorized in this column on this rank */ - nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); - /* Number of tiles already killed */ - nZ = 0; - - assert( nT <= mt ); - - /* No more computations on this node */ - if ( nT == 0 ) { - continue; - } - - while( nZ < (nT-1) ) { - height = (nT - nZ) / 2; - start = mt - nZ - 1; - end = start - height; - nZ += height; - - for( j=start; j > end; j-- ) { - ipiv[ j+2 ] = (j - height); - } - } - assert( nZ+1 == nT ); - } - } - -#if 0 - { - int m, k; - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - printf( "%3d ", ipiv[k*(arg->ldd) + m] ); - } - printf("\n"); - } - } - if (!arg->domino) { - int m, k, myrank; - for ( myrank=1; myrank<p; myrank++ ) { - ipiv += arg->ldd * minMN; - printf("-------- rank %d ---------\n", myrank ); - for(m=0; m<mt; m++) { - printf("%3d | ", m); - for (k=0; k<minMN; k++) { - int k_a = (k + p - 1 - myrank) / p / a; - if ( m >= k_a ) - printf( "%3d ", ipiv[k * arg->ldd + m] ); - else - printf( "--- " ); - } - printf("\n"); - } - } - } -#endif -}; - -/**************************************************** - * - * Generic functions currpiv,prevpiv,nextpiv - * - ***************************************************/ -static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, tmpk; - int lm, rank; - int a = svd_geta( arg, k ); - int p = qrtree->p; - int gmt = qrtree->mt; - - lm = m / p; /* Local index in the distribution over p domains */ - rank = m % p; /* Staring index in this distribution */ - - /* TS level common to every case */ - switch( svd_gettype( qrtree, k, m ) ) - { - case 0: - tmp = lm / a; - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; - break; - case 1: - tmp = arg->llvl->currpiv(arg->llvl, k, m); - /* tmpk = (k + p - 1 - m%p) / p / a; */ - tmpk = k / (p * a); - return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; - break; - case 2: - assert(0); - break; - case 3: - if ( arg->hlvl != NULL ) - return arg->hlvl->currpiv(arg->hlvl, k, m); - default: - return gmt; - } - return -1; -}; - -/** - * svd_nextpiv - Computes the next row killed by the row p, after - * it has kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the next line killed by p after start - * start must be equal to A.mt to find the first row killed by p. - * if start != A.mt, start must be killed by p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the following row killed by p if it exists, A->mt otherwise - */ -static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int rpivot, lstart; - int *ipiv = svd_getipiv( arg, k ); - int a = ipiv[0]; - int ldd = ipiv[1]; - int p = qrtree->p; - int gmt = qrtree->mt; - - rpivot = pivot % p; /* Staring index in this distribution */ - - /* Local index in the distribution over p domains */ - lstart = ( start == gmt ) ? ldd * a : start / p; - - myassert( start > pivot && pivot >= k ); - myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) ); - - /* TS level common to every case */ - ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1; - lp = svd_gettype( qrtree, k, pivot ); - - switch( ls ) - { - case LIBHQR_KILLED_BY_DOMINO: - assert(0); - case -1: - - if ( lp == LIBHQR_KILLED_BY_TS ) { - myassert( start == gmt ); - return gmt; - } - - case LIBHQR_KILLED_BY_TS: - if ( start == gmt ) - nextp = pivot + p; - else - nextp = start + p; - - if ( ( nextp < gmt ) && - ( nextp < pivot + a*p ) && - ( (nextp/p)%a != 0 ) ) - return nextp; - start = gmt; - lstart = ldd * a; - - case LIBHQR_KILLED_BY_LOCALTREE: - /* Get the next pivot for the low level tree */ - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == ldd-1) ) - tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != ldd ) - return tmp * a * p + rpivot; - - /* no next of type 1, we reset start to search the next 2 */ - start = gmt; - lstart = ldd * a; - - case LIBHQR_KILLED_BY_DISTTREE: - - if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { - return gmt; - } - - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - default: - return gmt; - } -} - -/** - * svd_prevpiv - Computes the previous row killed by the row p, before - * to kill the row start. - * - * @param[in] k - * Factorization step - * - * @param[in] pivot - * Line used as killer - * - * @param[in] start - * Starting point to search the previous line killed by p before start - * start must be killed by p, and start must be greater or equal to p - * - * @return: - * - -1 if start doesn't respect the previous conditions - * - m, the previous row killed by p if it exists, A->mt otherwise - */ -static int -svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) -{ - hqr_args_t *arg = (hqr_args_t*)(qrtree->args); - int tmp, ls, lp, nextp; - int lpivot, rpivot, lstart; - int *ipiv = svd_getipiv( arg, k ); - int a = ipiv[0]; - int ldd = ipiv[1]; - int p = qrtree->p; - int gmt = qrtree->mt; - - lpivot = pivot / p; /* Local index in the distribution over p domains */ - rpivot = pivot % p; /* Staring index in this distribution */ - lstart = start / p; /* Local index in the distribution over p domains */ - - myassert( start >= pivot && pivot >= k && start < gmt ); - myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) ); - - /* TS level common to every case */ - ls = svd_gettype( qrtree, k, start ); - lp = svd_gettype( qrtree, k, pivot ); - - if ( lp == LIBHQR_KILLED_BY_TS ) - return gmt; - - myassert( lp >= ls ); - switch( ls ) - { - case LIBHQR_KILLED_BY_DOMINO: - assert(0); - case LIBHQR_KILLED_BY_DISTTREE: - if( arg->hlvl != NULL ) { - tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); - if ( tmp != gmt ) - return tmp; - } - - start = pivot; - lstart = pivot / p; - - case LIBHQR_KILLED_BY_LOCALTREE: - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); - - if ( (tmp * a * p + rpivot >= gmt) - && (tmp == ldd-1) ) - tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); - - if ( tmp != ldd ) - return tmp * a * p + rpivot; - - start = pivot; - - case LIBHQR_KILLED_BY_TS: - /* Search for predecessor in TS tree */ - /* if ( ( start+p < gmt ) && */ - /* ( (((start+p) / p) % a) != 0 ) ) */ - /* return perm[start + p]; */ - - if ( start == pivot ) { - tmp = lpivot + a - 1 - lpivot%a; - nextp = tmp * p + rpivot; - - while( pivot < nextp && nextp >= gmt ) - nextp -= p; - } else { - nextp = start - p; /*(lstart - 1) * p + rpivot;*/ - } - assert(nextp < gmt); - if ( pivot < nextp ) - return nextp; - - default: - return gmt; - } -}; - -/** - ******************************************************************************* - * - * @ingroup libhqr - * - * libhqr_svd_init - Create the tree structures that will describes the - * operation performed during QR/LQ reduction step of the gebrd_ge2gb operation. - * - * Trees available parameters are described below. It is recommended to: - * - set p to the same value than the P-by-Q process grid used to distribute - * the data. (P for QR factorization, Q for LQ factorization). - * - set the low level tree to LIBHQR_GREEDY_TREE. - * - set the high level tree to: - * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides - * by two the volume of communication of any other tree. - * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or - * small and fat (LQ), because it reduces the critical path length. - * - Disable the domino effect when problem is square, to keep high efficiency - * kernel proportion high. - * - Enable the domino effect when problem is tall and skinny (QR) or - * small and fat (LQ) to increase parallelism and reduce critical path length. - * - Round-robin on TS domain (tsrr) option should be disabled. It is - * experimental and is not safe. - * - * These are the default when a parameter is set to -1; - * - * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf - * - ******************************************************************************* - * - * @param[in,out] qrtree - * On entry, an allocated structure uninitialized. - * On exit, the structure initialized according to the given parameters. - * - * @param[in] trans - * @arg LIBHQR_QR: Structure is initialized for the QR steps. - * @arg LIBHQR_LQ: Structure is initialized for the LQ steps. - * - * @param[in,out] A - * Descriptor of the distributed matrix A to be factorized, on which - * QR/LQ reduction steps will be performed. In case, of - * R-bidiagonalization, don't forget to provide the square submatrix - * that is concerned by those operations. - * The descriptor is untouched and only mt/nt/P parameters are used. - * - * @param[in] type_hlvl - * Defines the tree used to reduce the main tiles of each domain. This - * is a band lower diagonal matrix of width p. - * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. - * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the - * tiles. - * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. - * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first - * column and then duplicated on all others. - * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) - * - * @param[in] p - * Defines the number of distributed domains, ie the width of the high - * level reduction tree. If p == 1, no high level reduction tree is - * used. If p == mt, this enforce the high level reduction tree to be - * performed on the full matrix. - * By default, it is recommended to set p to P if trans == - * LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid - * used to distributed the data. (p > 0) - * - * @param[in] nbthread_per_node - * Define the number of working threads per node to configure the - * adaptativ local tree to provide at least (ratio * nbthread_per_node) - * tasks per step when possible by creating the right amount of TS and - * TT kernels. - * - * @param[in] ratio - * Define the minimal number of tasks per thread that the adaptiv tree - * must provide at the lowest level of the tree. - * - ******************************************************************************* - * - * @return - * \retval -i if the ith parameters is incorrect. - * \retval 0 on success. - * - ******************************************************************************* - * - * @sa libhqr_hqr_finalize - * @sa libhqr_hqr_init - * @sa dplasma_zgeqrf_param - * @sa dplasma_cgeqrf_param - * @sa dplasma_dgeqrf_param - * @sa dplasma_sgeqrf_param - * - ******************************************************************************/ -int -libhqr_svd_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int type_hlvl, int p, int nbthread_per_node, int ratio ) -{ - int low_mt, minMN, a = -1; - hqr_args_t *arg; - libhqr_tree_t qrtree_init; - - if (qrtree == NULL) { - fprintf(stderr,"libhqr_svd_init, illegal value of qrtree"); - return -1; - } - if ((trans != LIBHQR_QR) && - (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_svd_init, illegal value of trans"); - return -2; - } - if (A == NULL) { - fprintf(stderr, "libhqr_svd_init, illegal value of A"); - return -3; - } - - /* Compute parameters */ - p = libhqr_imax( p, 1 ); - - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.getnbgeqrf = svd_getnbgeqrf; - qrtree_init.getm = svd_getm; - qrtree_init.geti = svd_geti; - qrtree_init.gettype = svd_gettype; - qrtree_init.currpiv = svd_currpiv; - qrtree_init.nextpiv = svd_nextpiv; - qrtree_init.prevpiv = svd_prevpiv; - - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - - qrtree_init.a = a; - qrtree_init.p = p; - qrtree_init.args = NULL; - - arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); - arg->domino = 0; - arg->tsrr = 0; - arg->perm = NULL; - - arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - arg->hlvl = NULL; - - minMN = libhqr_imin(A->mt, A->nt); - low_mt = (qrtree_init.mt + p - 1) / ( p ); - - arg->llvl->minMN = minMN; - arg->llvl->ldd = low_mt; - arg->llvl->a = a; - arg->llvl->p = p; - arg->llvl->domino = 0; - - svd_low_adaptiv_init(arg->llvl, qrtree_init.mt, qrtree_init.nt, - nbthread_per_node * (A->nodes / p), ratio ); - - if ( p > 1 ) { - arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); - - arg->llvl->minMN = minMN; - arg->hlvl->ldd = qrtree_init.mt; - arg->hlvl->a = a; - arg->hlvl->p = p; - arg->hlvl->domino = 0; - - switch( type_hlvl ) { - case LIBHQR_FLAT_TREE : - hqr_high_flat_init(arg->hlvl); - break; - case LIBHQR_GREEDY_TREE : - hqr_high_greedy_init(arg->hlvl, minMN); - break; - case LIBHQR_GREEDY1P_TREE : - hqr_high_greedy1p_init(arg->hlvl); - break; - case LIBHQR_BINARY_TREE : - hqr_high_binary_init(arg->hlvl); - break; - case LIBHQR_FIBONACCI_TREE : - hqr_high_fibonacci_init(arg->hlvl); - break; - default: - hqr_high_fibonacci_init(arg->hlvl); - } - } - - qrtree_init.args = (void*)arg; - hqr_genperm( &qrtree_init ); - } - - /* Initialize the final QR tree */ - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); - qrtree->getnbgeqrf = svd_getnbgeqrf; - qrtree->getm = svd_getm; - qrtree->geti = svd_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); - - /* Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); - - /* Free the initial qrtree */ - libhqr_hqr_finalize( &qrtree_init ); - - return 0; -} - -void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init){ - int i, minMN, tile_id, p, k; - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - minMN = libhqr_imin(qrtree->nt, qrtree->mt); - for (k = 0; k < minMN; k++){ - for (i = 0; i < qrtree->mt; i++){ - tile_id = (k * qrtree->mt) + i; - - arg[tile_id].type = qrtree_init->gettype(qrtree_init, k, i); - assert( (i < k) || (arg[tile_id].type >= 0) ); - arg[tile_id].currpiv = qrtree_init->currpiv(qrtree_init, k, i); - p = arg[tile_id].currpiv; - arg[tile_id].first_nextpiv = qrtree_init->nextpiv(qrtree_init, k, i, qrtree->mt); - arg[tile_id].first_prevpiv = qrtree_init->prevpiv(qrtree_init, k, i, i); - arg[tile_id].nextpiv = qrtree_init->nextpiv(qrtree_init, k, p, i); - arg[tile_id].prevpiv = qrtree_init->prevpiv(qrtree_init, k, p, i); - assert( (arg[tile_id].nextpiv <= qrtree->mt && arg[tile_id].nextpiv >= k) || arg[tile_id].nextpiv==-1 ); - assert( (arg[tile_id].prevpiv <= qrtree->mt && arg[tile_id].prevpiv >= k) || arg[tile_id].prevpiv==-1 ); - assert( (arg[tile_id].first_nextpiv <= qrtree->mt && arg[tile_id].first_nextpiv >= k) || arg[tile_id].first_nextpiv==-1 ); - assert( (arg[tile_id].first_prevpiv <= qrtree->mt && arg[tile_id].first_prevpiv >= k) || arg[tile_id].first_prevpiv==-1 ); - } - } - qrtree->args = (void*)arg; -} - -void libhqr_matrix_finalize(libhqr_tree_t *qrtree){ - libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args); - free(arg); -} diff --git a/src/low_adaptiv.c b/src/low_adaptiv.c new file mode 100644 index 0000000000000000000000000000000000000000..ba6f30f8ec88472445307e33743168d1cddf3563 --- /dev/null +++ b/src/low_adaptiv.c @@ -0,0 +1,202 @@ +/** + * + * @file low_adaptiv.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level adaptiv tree for SVD/EVD reduction to band. + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/* Return the pivot to use for the row m at step k */ +inline static int +svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg, + int k, int m ) +{ + int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd; + int a = ipiv[0]; + + ipiv+=2; + return ipiv[ ( m / arg->p ) / a ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +inline static int +svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg, + int k, int p, int start_pa ) +{ + int i; + int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd; + int a = ipiv[0]; + int ldd = ipiv[1]; + int p_pa = p / arg->p / a; + + ipiv+=2; + for( i=start_pa+1; i<ldd; i++ ) + if ( ipiv[i] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +inline static int +svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg, + int k, int p, int start_pa ) +{ + int i; + int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd; + int a = ipiv[0]; + int ldd = ipiv[1]; + int pa = arg->p * a; + int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a; + int p_pa = p / pa; + + ipiv+=2; + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i] == p_pa ) + return i; + + return ldd; +} + +void +svd_low_adaptiv_init( hqr_subpiv_t *arg, + int gmt, int gnt, int nbcores, int ratio ) +{ + int *ipiv; + int mt, a, p, pa, maxmt, myrank; + int j, k, height, start, end, nT, nZ; + int minMN = libhqr_imin(gmt, gnt); + + arg->currpiv = svd_low_adaptiv_currpiv; + arg->nextpiv = svd_low_adaptiv_nextpiv; + arg->prevpiv = svd_low_adaptiv_prevpiv; + + p = arg->p; + + end = 0; + + /** + * Compute the local greedy tree of each column, on each node + */ + maxmt = 1; + for(k=0; k<minMN; k++) { + /** + * The objective is to have at least two columns of TS to reduce per + * core, so it must answer the following inequality: + * ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores ); + * so, + * a <= mt * (gnt-k) / (ratio * nbcores ) + */ + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + + /* Now let's make sure all sub-parts are equilibrate */ + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); + + /* Compute max dimension of the tree */ + mt = libhqr_iceil( gmt, p * a ); + maxmt = libhqr_imax( mt, maxmt ); + } + + arg->ldd = maxmt + 2; + arg->ipiv = (int*)calloc( arg->ldd * minMN * p, sizeof(int) ); + ipiv = arg->ipiv; + + for ( myrank=0; myrank<p; myrank++ ) { + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++, ipiv += arg->ldd) { + /** + * The objective is to have at least two columns of TS to reduce per + * core, so it must answer the following inequality: + * (ldd / a ) * (gnt-k) >= ( ratio * nbcores ); + * so, + * a <= mt * (gnt-k) / (ratio * nbcores ) + */ + height = libhqr_iceil( gmt-k, p ); + a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 ); + + /* Now let's make sure all sub-parts are equilibrate */ + j = libhqr_iceil( height, a ); + a = libhqr_iceil( gmt-k, j ); + + pa = p * a; + mt = libhqr_iceil( gmt, pa ); + ipiv[0] = a; + ipiv[1] = mt; + + assert( a > 0 ); + assert( mt < arg->ldd-1 ); + + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + assert( nT <= mt ); + + /* No more computations on this node */ + if ( nT == 0 ) { + continue; + } + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ j+2 ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*(arg->ldd) + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += arg->ldd * minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k * arg->ldd + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} diff --git a/src/low_binary.c b/src/low_binary.c new file mode 100644 index 0000000000000000000000000000000000000000..0b05a8f66c778735ffc4f3acde559aecfd5d6f8a --- /dev/null +++ b/src/low_binary.c @@ -0,0 +1,109 @@ +/** + * + * @file low_binary.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level binary tree + * + */ +#include "libhqr_internal.h" +#include <math.h> + +static inline int +hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a; + int m_pa = (m / arg->p ) / arg->a; + + int tmp1 = m_pa - k_a; + int tmp2 = 1; + (void)arg; + + if ( tmp1 == 0) + return 0; + while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) { + tmp1 = tmp1 >> 1; + tmp2 = tmp2 << 1; + } + assert( m_pa - tmp2 >= k_a ); + return m_pa - tmp2; +} + +static inline int +hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + + int tmpp, bit; + myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) ); + + if ( start_pa <= p_pa ) + return arg->ldd; + + int offset = p_pa-k_a; + bit = 0; + if (start_pa != arg->ldd) { + while( ( (start_pa-k_a) & (1 << bit ) ) == 0 ) + bit++; + bit++; + } + + tmpp = offset | (1 << bit); + if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) ) + return tmpp + k_a; + else + return arg->ldd; +} + +static inline int +hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + int offset = p_pa - k_a; + + myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino || + hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) ); + + if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) { + int i, bit, tmp; + if ((p_pa - k_a) == 0) + bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) ); + else { + bit = 0; + while( (offset & (1 << bit )) == 0 ) + bit++; + } + for( i=bit; i>-1; i--){ + tmp = offset | (1 << i); + if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) ) + return tmp+k_a; + } + return arg->ldd; + } + + if ( (start_pa - p_pa) > 1 ) + return p_pa + ( (start_pa - p_pa) >> 1 ); + else { + return arg->ldd; + } +} + +void +hqr_low_binary_init(hqr_subpiv_t *arg) { + arg->currpiv = hqr_low_binary_currpiv; + arg->nextpiv = hqr_low_binary_nextpiv; + arg->prevpiv = hqr_low_binary_prevpiv; + arg->ipiv = NULL; +} diff --git a/src/low_fibonacci.c b/src/low_fibonacci.c new file mode 100644 index 0000000000000000000000000000000000000000..da6a6b5d1db5e5a51481a1b5614bc9772fa2db27 --- /dev/null +++ b/src/low_fibonacci.c @@ -0,0 +1,91 @@ +/** + * + * @file low_fibonacci.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level fibonacci tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a; + return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = (p / qrpiv->p ) / qrpiv->a; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = (p / qrpiv->p ) / qrpiv->a; + + for( i=start_pa-1; i>k_a; i-- ) + if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa ) + return i; + return (qrpiv->ldd); +} + +void +hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt; + + arg->currpiv = hqr_low_fibonacci_currpiv; + arg->nextpiv = hqr_low_fibonacci_nextpiv; + arg->prevpiv = hqr_low_fibonacci_prevpiv; + + mt = arg->ldd; + + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + /* + * Fibonacci of order 1: + * u_(n+1) = u_(n) + 1 + */ + { + int f1, k, m; + + /* Fill in the first column */ + f1 = 1; + for (m=1; m < mt; ) { + for (k=0; (k < f1) && (m < mt); k++, m++) { + ipiv[m] = m - f1; + } + f1++; + } + + for( k=1; k<minMN; k++) { + for(m=k+1; m < mt; m++) { + ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1; + } + } + } +} diff --git a/src/low_flat.c b/src/low_flat.c new file mode 100644 index 0000000000000000000000000000000000000000..09b698786f7149db0c02aa16c410b4a8935736d7 --- /dev/null +++ b/src/low_flat.c @@ -0,0 +1,81 @@ +/** + * + * @file low_flat.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level flat tree + * + */ +#include "libhqr_internal.h" + +static inline int +hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m) +{ + (void)m; + if ( arg->domino ) + return k / arg->a; + else + return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ; +} + +static inline int +hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + +#ifdef FLAT_UP + if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) ) + return start_pa-1; + else +#else /* FLAT_DOWN */ + if ( start_pa <= p_pa ) + return arg->ldd; + + if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { + if ( start_pa == arg->ldd ) + return p_pa+1; + else if ( start_pa < arg->ldd ) + return start_pa+1; + } +#endif + return arg->ldd; +} + +static inline int +hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa) +{ + int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a; + int p_pa = (p / arg->p ) / arg->a; + +#ifdef FLAT_UP + if ( p_pa == k_a && (start_pa+1 < arg->ldd) ) + return start_pa+1; + else +#else + if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) { + if ( start_pa == p_pa ) + return arg->ldd - 1; + else if ( start_pa > p_pa + 1 ) + return start_pa-1; + } +#endif + return arg->ldd; +} + +void hqr_low_flat_init(hqr_subpiv_t *arg){ + arg->currpiv = hqr_low_flat_currpiv; + arg->nextpiv = hqr_low_flat_nextpiv; + arg->prevpiv = hqr_low_flat_prevpiv; + arg->ipiv = NULL; +} diff --git a/src/low_greedy.c b/src/low_greedy.c new file mode 100644 index 0000000000000000000000000000000000000000..497410b0ff7401d08f446c7543148977cd15c530 --- /dev/null +++ b/src/low_greedy.c @@ -0,0 +1,221 @@ +/** + * + * @file low_greedy.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level greedy tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> +#include <string.h> + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + if (qrpiv->domino) + return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; + else + return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) + + ( ( m / qrpiv->p ) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int p_pa = p / qrpiv->p / qrpiv->a; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + return i; + } + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int pa = qrpiv->p * qrpiv->a; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = p / pa; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + + return (qrpiv->ldd); +} + +void +hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, a, p, pa, domino; + + arg->currpiv = hqr_low_greedy_currpiv; + arg->nextpiv = hqr_low_greedy_nextpiv; + arg->prevpiv = hqr_low_greedy_prevpiv; + + mt = arg->ldd; + a = arg->a; + p = arg->p; + pa = p * a; + domino = arg->domino; + + if ( domino ) + { + int j, k, height, start, end, firstk = 0; + int *nT, *nZ; + + arg->minMN = libhqr_imin( minMN, mt*a ); + minMN = arg->minMN; + + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + nT = (int*)calloc(minMN, sizeof(int)); + nZ = (int*)calloc(minMN, sizeof(int)); + + nT[0] = mt; + + k = 0; + while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) && + ( nZ[minMN-1]+1 == nT[minMN-1] ) ) ) + && ( firstk < minMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < minMN) && + ( nT[firstk] == mt - ( firstk / a ) ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + if ( (( firstk % a) != a-1 ) + && ( firstk < minMN-1 ) ) + nT[firstk+1]++; + firstk++; + } + k = firstk; + continue; + } + + if (k < minMN-1) nT[k+1] += height; + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + + for( j=start; j > end; j-- ) { + ipiv[ k*mt + j ] = (j - height); + } + + k++; + if (k > minMN-1) k = firstk; + } + + free(nT); + free(nZ); + } + else + { + int j, k, myrank, height, start, end, firstk = 0; + int *nT2DO = (int*)malloc(minMN*sizeof(int)); + int *nT = (int*)malloc(minMN*sizeof(int)); + int *nZ = (int*)malloc(minMN*sizeof(int)); + + arg->ipiv = (int*)calloc( mt * minMN * p, sizeof(int) ); + ipiv = arg->ipiv; + + for ( myrank=0; myrank<p; myrank++ ) { + int lminMN = minMN; + + memset( nT2DO, 0, minMN*sizeof(int)); + memset( nT, 0, minMN*sizeof(int)); + memset( nZ, 0, minMN*sizeof(int)); + + nT[0] = mt; + + for(k=0; k<lminMN; k++) { + nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + if ( nT2DO[k] == 0 ) { + lminMN = k; + break; + } + } + + k = 0; firstk = 0; + while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) && + ( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) ) + && ( firstk < lminMN ) ) { + height = (nT[k] - nZ[k]) / 2; + if ( height == 0 ) { + while ( (firstk < lminMN) && + ( nT[firstk] == nT2DO[firstk] ) && + ( nZ[firstk]+1 == nT[firstk] ) ) { + if ( ( firstk < lminMN-1 ) && + (( (firstk) % pa) != ((a-1)*p+myrank) ) ) + nT[firstk+1]++; + firstk++; + } + k = firstk; + continue; + } + + if (k < lminMN-1) nT[k+1] += height; + start = mt - nZ[k] - 1; + end = start - height; + nZ[k] += height; + + for( j=start; j > end; j-- ) { + ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); + } + + k++; + if (k > lminMN-1) k = firstk; + } + } + + free(nT2DO); + free(nT); + free(nZ); + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*mt + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += mt*minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k*mt + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} diff --git a/src/low_greedy1p.c b/src/low_greedy1p.c new file mode 100644 index 0000000000000000000000000000000000000000..7ec8692c8a8c70a92a9e95032b2f2855a0932e3f --- /dev/null +++ b/src/low_greedy1p.c @@ -0,0 +1,177 @@ +/** + * + * @file low_greedy1p.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * Functions for low level duplicated greedy tree + * + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/* Return the pivot to use for the row m at step k */ +static inline int +hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) { + if (qrpiv->domino) + return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ]; + else + return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd) + + ( ( m / qrpiv->p ) / qrpiv->a ) ]; +} + +/* Return the last row which has used the row m as a pivot in step k before the row start */ +static inline int +hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int p_pa = p / qrpiv->p / qrpiv->a; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa+1; i<(qrpiv->ldd); i++ ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + return i; +} + +/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */ +static inline int +hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) { + int i; + int pa = qrpiv->p * qrpiv->a; + int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a; + int p_pa = p / pa; + int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd; + + for( i=start_pa-1; i> k_a; i-- ) + if ( ipiv[i + k * (qrpiv->ldd)] == p_pa ) + return i; + + return (qrpiv->ldd); +} + +void +hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN) { + int *ipiv; + int mt, a, p, pa, domino; + int j, k, height, start, end, nT, nZ; + + arg->currpiv = hqr_low_greedy1p_currpiv; + arg->nextpiv = hqr_low_greedy1p_nextpiv; + arg->prevpiv = hqr_low_greedy1p_prevpiv; + + mt = arg->ldd; + a = arg->a; + p = arg->p; + pa = p * a; + domino = arg->domino; + + /* This section has not been coded yet, and will perform a classic greedy */ + if ( domino ) + { + arg->minMN = libhqr_imin( minMN, mt*a ); + minMN = arg->minMN; + + arg->ipiv = (int*)calloc( mt * minMN, sizeof(int) ); + ipiv = arg->ipiv; + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++) { + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - (k / a), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ k*mt + j ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + else + { + int myrank; + end = 0; + + arg->ipiv = (int*)calloc( mt * minMN * p, sizeof(int) ); + ipiv = arg->ipiv; + + for ( myrank=0; myrank<p; myrank++ ) { + + /** + * Compute the local greedy tree of each column, on each node + */ + for(k=0; k<minMN; k++) { + /* Number of tiles to factorized in this column on this rank */ + nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 ); + /* Number of tiles already killed */ + nZ = 0; + + /* No more computations on this node */ + if ( nT == 0 ) { + break; + } + + while( nZ < (nT-1) ) { + height = (nT - nZ) / 2; + start = mt - nZ - 1; + end = start - height; + nZ += height; + + for( j=start; j > end; j-- ) { + ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height); + } + } + assert( nZ+1 == nT ); + } + } + } + +#if 0 + { + int m, k; + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + printf( "%3d ", ipiv[k*mt + m] ); + } + printf("\n"); + } + } + if (!arg->domino) { + int m, k, myrank; + for ( myrank=1; myrank<p; myrank++ ) { + ipiv += mt*minMN; + printf("-------- rank %d ---------\n", myrank ); + for(m=0; m<mt; m++) { + printf("%3d | ", m); + for (k=0; k<minMN; k++) { + int k_a = (k + p - 1 - myrank) / p / a; + if ( m >= k_a ) + printf( "%3d ", ipiv[k*mt + m] ); + else + printf( "--- " ); + } + printf("\n"); + } + } + } +#endif +} diff --git a/src/mtxtree.c b/src/mtxtree.c new file mode 100644 index 0000000000000000000000000000000000000000..9c68b92288233ae84b315690c3f3b1805eb82ce9 --- /dev/null +++ b/src/mtxtree.c @@ -0,0 +1,193 @@ +/** + * + * @file mtxtree.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * This file contains all the function to describe the dependencies + * used in the Xgeqrf_param.jdf file. + * The QR factorization done with this file relies on three levels: + * - the first one is using a flat tree with TS kernels. The + * height of this tree is defined by the parameter 'a'. If 'a' + * is set to A->mt, the factorization is identical to the one + * perform by PLASMA_zgeqrf. + * For all subdiagonal "macro-tiles", the line reduced is always + * the first. For all diagonal "macro-tiles", the factorization + * performed is identical to the one performed by PLASMA_zgeqrf. + * + * - the third level is using a reduction tree of size 'p'. By + * default, the parameter 'p' should be equal to the number of + * processors used for the computation, but can be set + * differently. (see further example). The type of tree used at + * this level is defined by the hlvl parameter. It can be flat + * or greedy. + * CODE DETAILS: This tree and all the function related to it + * are performing a QR factorization on a band matrix with 'p' + * the size of the band. All the functions take global indices + * as input and return global indices as output. + * + * - Finally, a second 'low' level of reduction tree is applied. + * The size of this tree is induced by the parameters 'a' and 'p' + * from the first and third levels and is A->mt / ( p * a ). This + * tree is reproduced p times for each subset of tiles + * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. + * The tree used for the reduction is defined by the llvl + * parameter and can be: flat, greedy, fibonacci or binary. + * CODE DETAILS: For commodity, the size of this tree is always + * ceil(A->mt / (p * a) ) inducing some extra tests in the code. + * All the functions related to this level of tree take as input + * the local indices in the A->mt / (p*a) matrix and the global + * k. They return the local index. The reductions are so + * performed on a trapezoidal matrices where the step is defined + * by a: + * <- min( lhlvl_mt, min( mt, nt ) ) -> + * __a__ a a + * | |_____ + * | |_____ + * | |_____ + * llvl_mt = ceil(MT/ (a*p)) | |_____ + * | |_____ + * |___________________________________| + * + * + * + * At each step of the factorization, the lines of tiles are divided + * in 4 types: + * - QRPARAM_TILE_TS: They are the lines annihilated by a TS + * kernel, these lines are never used as an annihilator. They are + * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p + * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor + * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They + * are themselves annihilated by the TT kernel of the low level + * reduction tree. The roots of the local trees are the lines i, + * with i/p = k. + * - QRPARAM_TILE_DOMINO: These are the lines that are + * annhilihated with a domino effect in the band defined by (i/p) + * <= k and i >= k + * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the + * high level tree to reduce communications. + * These lines are defined by (i-k)/p = 0. + */ +#include "libhqr_internal.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> + +/**************************************************** + * Reading functions + ************************************************** + * + * Common parameters for the following functions + * qrtree - tree used for the factorization + * k - Step k of the QR factorization + * m - line anhilated + */ + +int mtxtree_gettype(const libhqr_tree_t *qrtree, int k, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + id = (k * qrtree->mt) + m; + return arg[id].type; +} + +int mtxtree_currpiv(const libhqr_tree_t *qrtree, int k, int m){ + int id, perm_m, p, a; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + perm_m = m; + p = qrtree->p; + a = qrtree->a; + myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) ); + myassert( (p==1) || (perm_m % p) == (m % p) ); + id = (k * qrtree->mt) + m; + return arg[id].currpiv; +} + +/* + * Extra parameter: + * p - line used as pivot + */ + +int mtxtree_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + int gmt = qrtree->mt; + myassert( m > p && p >= k ); + myassert( m == gmt || p == mtxtree_currpiv( qrtree, k, m ) ); + if(m == qrtree->mt){ + id = (k * qrtree->mt) + p; + return arg[id].first_nextpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].nextpiv; + } +} + +int mtxtree_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){ + int id; + libhqr_tile_info_t *arg = (libhqr_tile_info_t*)(qrtree->args); + int gmt = qrtree->mt; + myassert( m >= p && p >= k && m < gmt ); + myassert( m == p || p == mtxtree_currpiv( qrtree, k, m ) ); + + if(m == p){ + id = (k * qrtree->mt) + p; + return arg[id].first_prevpiv; + } + else{ + id = (k * qrtree->mt) + m; + return arg[id].prevpiv; + } +} + +void +libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out ) +{ + libhqr_tile_info_t *tileinfo; + int i, minMN, p, k; + + minMN = libhqr_imin( in->mt, in->nt ); + + /* Copy the input tree to the output one */ + memcpy( out, in, sizeof(libhqr_tree_t) ); + + /* Switch to matrix storage format and functions */ + out->init = LIBHQR_QRTREE_MTX; + out->gettype = mtxtree_gettype; + out->currpiv = mtxtree_currpiv; + out->nextpiv = mtxtree_nextpiv; + out->prevpiv = mtxtree_prevpiv; + out->args = malloc( out->mt * minMN * sizeof(libhqr_tile_info_t) ); + + tileinfo = (libhqr_tile_info_t*)(out->args); + + /* Initialize the matrix */ + for (k=0; k<minMN; k++) + { + for (i=0; i<in->mt; i++, tileinfo++) + { + tileinfo->type = in->gettype(in, k, i); + tileinfo->currpiv = p = in->currpiv(in, k, i); + tileinfo->first_nextpiv = in->nextpiv(in, k, i, in->mt); + tileinfo->first_prevpiv = in->prevpiv(in, k, i, i); + tileinfo->nextpiv = in->nextpiv(in, k, p, i); + tileinfo->prevpiv = in->prevpiv(in, k, p, i); + + assert( ((tileinfo->nextpiv <= in->mt) && (tileinfo->nextpiv >= k)) || (tileinfo->nextpiv == -1) ); + assert( ((tileinfo->prevpiv <= in->mt) && (tileinfo->prevpiv >= k)) || (tileinfo->prevpiv == -1) ); + assert( ((tileinfo->first_nextpiv <= in->mt) && (tileinfo->first_nextpiv >= k)) || (tileinfo->first_nextpiv == -1) ); + assert( ((tileinfo->first_prevpiv <= in->mt) && (tileinfo->first_prevpiv >= k)) || (tileinfo->first_prevpiv == -1) ); + } + } +} diff --git a/src/queue.c b/src/queue.c index eedec89126cf02bfadb826a5c3d9e856e36354b1..de36b9100f8a973b515835b27b013bd8067bb13a 100644 --- a/src/queue.c +++ b/src/queue.c @@ -2,7 +2,7 @@ * * @file queue.c * - * File for queue functions. + * Queue module for the treewalk algorithm. * * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. @@ -18,16 +18,14 @@ #include <stdio.h> #include <assert.h> -/**************************************************** - * Generic functions for queue - ***************************************************/ - -libhqr_queue_tile_t *libhqr_queue_tile_new (void) +libhqr_queue_tile_t * +libhqr_queue_tile_new(void) { return (NULL); } -void libhqr_queue_tile_push (libhqr_queue_tile_t ** queue_tile, int numero) +void +libhqr_queue_tile_push( libhqr_queue_tile_t **queue_tile, int numero ) { if (queue_tile != NULL) { @@ -55,7 +53,8 @@ void libhqr_queue_tile_push (libhqr_queue_tile_t ** queue_tile, int numero) return; } -int libhqr_queue_tile_head (libhqr_queue_tile_t ** queue_tile) +int +libhqr_queue_tile_head( libhqr_queue_tile_t **queue_tile ) { if (queue_tile != NULL && *queue_tile != NULL) { @@ -64,7 +63,8 @@ int libhqr_queue_tile_head (libhqr_queue_tile_t ** queue_tile) return -1; } -int libhqr_queue_tile_pop (libhqr_queue_tile_t ** queue_tile) +int +libhqr_queue_tile_pop ( libhqr_queue_tile_t **queue_tile ) { int ret = -1; if (queue_tile != NULL) @@ -82,4 +82,3 @@ int libhqr_queue_tile_pop (libhqr_queue_tile_t ** queue_tile) } return (ret); } - diff --git a/src/svd.c b/src/svd.c new file mode 100644 index 0000000000000000000000000000000000000000..dd1b1a6808207ba1b324bcd7c0b8aadcb5bdb6be --- /dev/null +++ b/src/svd.c @@ -0,0 +1,672 @@ +/** + * + * @file svd.c + * + * @copyright 2010-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + * @version 1.0.0 + * @author Raphael Boucherie + * @author Mathieu Faverge + * @date 2017-03-21 + * + * This file contains all the function to describe the dependencies + * used in the Xgeqrf_param.jdf file. + * The QR factorization done with this file relies on three levels: + * - the first one is using a flat tree with TS kernels. The + * height of this tree is defined by the parameter 'a'. If 'a' + * is set to A->mt, the factorization is identical to the one + * perform by PLASMA_zgeqrf. + * For all subdiagonal "macro-tiles", the line reduced is always + * the first. For all diagonal "macro-tiles", the factorization + * performed is identical to the one performed by PLASMA_zgeqrf. + * + * - the third level is using a reduction tree of size 'p'. By + * default, the parameter 'p' should be equal to the number of + * processors used for the computation, but can be set + * differently. (see further example). The type of tree used at + * this level is defined by the hlvl parameter. It can be flat + * or greedy. + * CODE DETAILS: This tree and all the function related to it + * are performing a QR factorization on a band matrix with 'p' + * the size of the band. All the functions take global indices + * as input and return global indices as output. + * + * - Finally, a second 'low' level of reduction tree is applied. + * The size of this tree is induced by the parameters 'a' and 'p' + * from the first and third levels and is A->mt / ( p * a ). This + * tree is reproduced p times for each subset of tiles + * S_k = {i in [0, A->mt-1] \ i%p*a = k } with k in [0, p-1]. + * The tree used for the reduction is defined by the llvl + * parameter and can be: flat, greedy, fibonacci or binary. + * CODE DETAILS: For commodity, the size of this tree is always + * ceil(A->mt / (p * a) ) inducing some extra tests in the code. + * All the functions related to this level of tree take as input + * the local indices in the A->mt / (p*a) matrix and the global + * k. They return the local index. The reductions are so + * performed on a trapezoidal matrices where the step is defined + * by a: + * <- min( lhlvl_mt, min( mt, nt ) ) -> + * __a__ a a + * | |_____ + * | |_____ + * | |_____ + * llvl_mt = ceil(MT/ (a*p)) | |_____ + * | |_____ + * |___________________________________| + * + * + * + * At each step of the factorization, the lines of tiles are divided + * in 4 types: + * - QRPARAM_TILE_TS: They are the lines annihilated by a TS + * kernel, these lines are never used as an annihilator. They are + * the lines i, with 1 < (i/p)%a < a and i > (k+1)*p + * - QRPARAM_TILE_LOCALTT: They are the lines used as annhilitor + * in the TS kernels annihiling the QRPARAM_TILE_TS lines. They + * are themselves annihilated by the TT kernel of the low level + * reduction tree. The roots of the local trees are the lines i, + * with i/p = k. + * - QRPARAM_TILE_DOMINO: These are the lines that are + * annhilihated with a domino effect in the band defined by (i/p) + * <= k and i >= k + * - QRPARAM_TILE_DISTTT: These are the lines annihilated by the + * high level tree to reduce communications. + * These lines are defined by (i-k)/p = 0. + */ +#include "libhqr_internal.h" +#include <stdlib.h> + +/* + * Common functions + */ +static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); +static int svd_getm( const libhqr_tree_t *qrtree, int k, int i ); +static int svd_geti( const libhqr_tree_t *qrtree, int k, int m ); +static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m ); + +#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) ) +#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] ) + +/* + * Extra parameter: + * gmt - Global number of tiles in a column of the complete distributed matrix + * Return: + * The number of geqrt to execute in the panel k + */ +static int +svd_getnbgeqrf( const libhqr_tree_t *qrtree, + int k ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int gmt = qrtree->mt; + int a = svd_geta(arg, k); + int pa = p * a; + int nb_1, nb_2, nb_3; + int nb_11, nb_12; + + /* Number of tasks of type 3 */ + nb_3 = p; + + /* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */ + nb_2 = nbextra1_formula; + + /* First multiple of p*a under the diagonal of step 1 */ + nb_11 = ( (k + p + pa - 1 ) / pa ) * pa; + + /* Last multiple of p*a lower than A->mt */ + nb_12 = ( gmt / pa ) * pa; + + /* Number of tasks of type 1 between nb_11 and nb_12 */ + nb_1 = (nb_12 - nb_11) / a; + + /* Add leftover */ + nb_1 += libhqr_imin( p, gmt - nb_12 ); + + return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k); +} + +/* + * Extra parameter: + * i - indice of the geqrt in the continuous space + * Return: + * The global indice m of the i th geqrt in the panel k + */ +static int +svd_getm( const libhqr_tree_t *qrtree, + int k, int i ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + nbextra1; + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( i < nb23 ) + return k+i; + /* Tile of type 1 */ + else { + j = i - nb23; + pa = p * a; + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + return pos1 + (j/p) * pa + j%p; + } +} + +/* + * Extra parameter: + * m - The global indice m of a geqrt in the panel k + * Return: + * The index i of the geqrt in the panel k + */ +static int +svd_geti( const libhqr_tree_t *qrtree, + int k, int m ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + int pos1, j, pa = p * a; + int nbextra1 = nbextra1_formula; + int nb23 = p + nbextra1; + int end2 = p + k + nbextra1; + + /* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */ + if ( m < end2 ) + return m-k; + /* Tile of type 1 */ + else { + pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa; + j = m - pos1; + return nb23 + (j / pa) * p + j%pa; + } +} + +/* + * Extra parameter: + * m - The global indice m of the row in the panel k + * Return + * -1 - Error + * 0 - if m is reduced thanks to a TS kernel + * 1 - if m is reduced thanks to the low level tree + * 2 - if m is reduced thanks to the bubble tree + * 3 - if m is reduced in distributed + */ +static int +svd_gettype( const libhqr_tree_t *qrtree, + int k, int m ) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int p = qrtree->p; + int a = svd_geta(arg, k); + + /* Element to be reduce in distributed */ + if (m < k + p) { + return 3; + } + /* Lower triangle of the matrix */ + else { + if( (m / p) % a == 0 ) + return 1; + else + return 0; + } +} + +/**************************************************** + * + * Generic functions currpiv,prevpiv,nextpiv + * + ***************************************************/ +static int +svd_currpiv(const libhqr_tree_t *qrtree, int k, int m) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, tmpk; + int lm, rank; + int a = svd_geta( arg, k ); + int p = qrtree->p; + int gmt = qrtree->mt; + + lm = m / p; /* Local index in the distribution over p domains */ + rank = m % p; /* Staring index in this distribution */ + + /* TS level common to every case */ + switch( svd_gettype( qrtree, k, m ) ) + { + case 0: + tmp = lm / a; + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; + break; + case 1: + tmp = arg->llvl->currpiv(arg->llvl, k, m); + /* tmpk = (k + p - 1 - m%p) / p / a; */ + tmpk = k / (p * a); + return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank; + break; + case 2: + assert(0); + break; + case 3: + if ( arg->hlvl != NULL ) + return arg->hlvl->currpiv(arg->hlvl, k, m); + default: + return gmt; + } + return -1; +} + +/** + * svd_nextpiv - Computes the next row killed by the row p, after + * it has kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the next line killed by p after start + * start must be equal to A.mt to find the first row killed by p. + * if start != A.mt, start must be killed by p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the following row killed by p if it exists, A->mt otherwise + */ +static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int rpivot, lstart; + int *ipiv = svd_getipiv( arg, k ); + int a = ipiv[0]; + int ldd = ipiv[1]; + int p = qrtree->p; + int gmt = qrtree->mt; + + rpivot = pivot % p; /* Staring index in this distribution */ + + /* Local index in the distribution over p domains */ + lstart = ( start == gmt ) ? ldd * a : start / p; + + myassert( start > pivot && pivot >= k ); + myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) ); + + /* TS level common to every case */ + ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1; + lp = svd_gettype( qrtree, k, pivot ); + + switch( ls ) + { + case LIBHQR_KILLED_BY_DOMINO: + assert(0); + case -1: + + if ( lp == LIBHQR_KILLED_BY_TS ) { + myassert( start == gmt ); + return gmt; + } + + case LIBHQR_KILLED_BY_TS: + if ( start == gmt ) + nextp = pivot + p; + else + nextp = start + p; + + if ( ( nextp < gmt ) && + ( nextp < pivot + a*p ) && + ( (nextp/p)%a != 0 ) ) + return nextp; + start = gmt; + lstart = ldd * a; + + case LIBHQR_KILLED_BY_LOCALTREE: + /* Get the next pivot for the low level tree */ + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a ); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == ldd-1) ) + tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != ldd ) + return tmp * a * p + rpivot; + + /* no next of type 1, we reset start to search the next 2 */ + start = gmt; + lstart = ldd * a; + + case LIBHQR_KILLED_BY_DISTTREE: + + if ( lp < LIBHQR_KILLED_BY_DISTTREE ) { + return gmt; + } + + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + default: + return gmt; + } +} + +/** + * svd_prevpiv - Computes the previous row killed by the row p, before + * to kill the row start. + * + * @param[in] k + * Factorization step + * + * @param[in] pivot + * Line used as killer + * + * @param[in] start + * Starting point to search the previous line killed by p before start + * start must be killed by p, and start must be greater or equal to p + * + * @return: + * - -1 if start doesn't respect the previous conditions + * - m, the previous row killed by p if it exists, A->mt otherwise + */ +static int +svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +{ + hqr_args_t *arg = (hqr_args_t*)(qrtree->args); + int tmp, ls, lp, nextp; + int lpivot, rpivot, lstart; + int *ipiv = svd_getipiv( arg, k ); + int a = ipiv[0]; + int ldd = ipiv[1]; + int p = qrtree->p; + int gmt = qrtree->mt; + + lpivot = pivot / p; /* Local index in the distribution over p domains */ + rpivot = pivot % p; /* Staring index in this distribution */ + lstart = start / p; /* Local index in the distribution over p domains */ + + myassert( start >= pivot && pivot >= k && start < gmt ); + myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) ); + + /* TS level common to every case */ + ls = svd_gettype( qrtree, k, start ); + lp = svd_gettype( qrtree, k, pivot ); + + if ( lp == LIBHQR_KILLED_BY_TS ) + return gmt; + + myassert( lp >= ls ); + switch( ls ) + { + case LIBHQR_KILLED_BY_DOMINO: + assert(0); + case LIBHQR_KILLED_BY_DISTTREE: + if( arg->hlvl != NULL ) { + tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start ); + if ( tmp != gmt ) + return tmp; + } + + start = pivot; + lstart = pivot / p; + + case LIBHQR_KILLED_BY_LOCALTREE: + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a); + + if ( (tmp * a * p + rpivot >= gmt) + && (tmp == ldd-1) ) + tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp); + + if ( tmp != ldd ) + return tmp * a * p + rpivot; + + start = pivot; + + case LIBHQR_KILLED_BY_TS: + if ( start == pivot ) { + tmp = lpivot + a - 1 - lpivot%a; + nextp = tmp * p + rpivot; + + while( pivot < nextp && nextp >= gmt ) + nextp -= p; + } else { + nextp = start - p; /*(lstart - 1) * p + rpivot;*/ + } + assert(nextp < gmt); + if ( pivot < nextp ) + return nextp; + + default: + return gmt; + } +} + +/** + ******************************************************************************* + * + * @ingroup libhqr + * + * libhqr_svd_init - Create the tree structures that will describes the + * operation performed during QR/LQ reduction step of the gebrd_ge2gb operation. + * + * Trees available parameters are described below. It is recommended to: + * - set p to the same value than the P-by-Q process grid used to distribute + * the data. (P for QR factorization, Q for LQ factorization). + * - set the low level tree to LIBHQR_GREEDY_TREE. + * - set the high level tree to: + * 1) LIBHQR_FLAT_TREE when the problem is square, because it divides + * by two the volume of communication of any other tree. + * 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or + * small and fat (LQ), because it reduces the critical path length. + * - Disable the domino effect when problem is square, to keep high efficiency + * kernel proportion high. + * - Enable the domino effect when problem is tall and skinny (QR) or + * small and fat (LQ) to increase parallelism and reduce critical path length. + * - Round-robin on TS domain (tsrr) option should be disabled. It is + * experimental and is not safe. + * + * These are the default when a parameter is set to -1; + * + * See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf + * + ******************************************************************************* + * + * @param[in,out] qrtree + * On entry, an allocated structure uninitialized. + * On exit, the structure initialized according to the given parameters. + * + * @param[in] trans + * @arg LIBHQR_QR: Structure is initialized for the QR steps. + * @arg LIBHQR_LQ: Structure is initialized for the LQ steps. + * + * @param[in,out] A + * Descriptor of the distributed matrix A to be factorized, on which + * QR/LQ reduction steps will be performed. In case, of + * R-bidiagonalization, don't forget to provide the square submatrix + * that is concerned by those operations. + * The descriptor is untouched and only mt/nt/P parameters are used. + * + * @param[in] type_hlvl + * Defines the tree used to reduce the main tiles of each domain. This + * is a band lower diagonal matrix of width p. + * @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles. + * @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the + * tiles. + * @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles. + * @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first + * column and then duplicated on all others. + * @arg -1: The default is used (LIBHQR_FIBONACCI_TREE) + * + * @param[in] p + * Defines the number of distributed domains, ie the width of the high + * level reduction tree. If p == 1, no high level reduction tree is + * used. If p == mt, this enforce the high level reduction tree to be + * performed on the full matrix. + * By default, it is recommended to set p to P if trans == + * LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid + * used to distributed the data. (p > 0) + * + * @param[in] nbthread_per_node + * Define the number of working threads per node to configure the + * adaptativ local tree to provide at least (ratio * nbthread_per_node) + * tasks per step when possible by creating the right amount of TS and + * TT kernels. + * + * @param[in] ratio + * Define the minimal number of tasks per thread that the adaptiv tree + * must provide at the lowest level of the tree. + * + ******************************************************************************* + * + * @retval -i if the ith parameters is incorrect. + * @retval 0 on success. + * + ******************************************************************************* + * + * @sa libhqr_hqr_finalize + * @sa libhqr_hqr_init + * @sa dplasma_zgeqrf_param + * @sa dplasma_cgeqrf_param + * @sa dplasma_dgeqrf_param + * @sa dplasma_sgeqrf_param + * + ******************************************************************************/ +int +libhqr_initfct_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + int low_mt, minMN, a = -1; + hqr_args_t *arg; + + if (qrtree == NULL) { + fprintf(stderr,"libhqr_initfct_svd: illegal value of qrtree"); + return -1; + } + if ((trans != LIBHQR_QR) && + (trans != LIBHQR_LQ)) { + fprintf(stderr, "libhqr_initfct_svd: illegal value of trans"); + return -2; + } + if (A == NULL) { + fprintf(stderr, "libhqr_initfct_svd: illegal value of A"); + return -3; + } + + /* Compute parameters */ + p = libhqr_imax( p, 1 ); + + qrtree->getnbgeqrf = svd_getnbgeqrf; + qrtree->getm = svd_getm; + qrtree->geti = svd_geti; + qrtree->gettype = svd_gettype; + qrtree->currpiv = svd_currpiv; + qrtree->nextpiv = svd_nextpiv; + qrtree->prevpiv = svd_prevpiv; + + qrtree->init = LIBHQR_QRTREE_SVD; + + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; + + qrtree->a = a; + qrtree->p = p; + qrtree->args = NULL; + + arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) ); + arg->domino = 0; + arg->tsrr = 0; + arg->perm = NULL; + + arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + arg->hlvl = NULL; + + minMN = libhqr_imin(A->mt, A->nt); + low_mt = (qrtree->mt + p - 1) / ( p ); + + arg->llvl->minMN = minMN; + arg->llvl->ldd = low_mt; + arg->llvl->a = a; + arg->llvl->p = p; + arg->llvl->domino = 0; + + svd_low_adaptiv_init( arg->llvl, qrtree->mt, qrtree->nt, + nbthread_per_node * (A->nodes / p), ratio ); + + if ( p > 1 ) { + arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) ); + + arg->llvl->minMN = minMN; + arg->hlvl->ldd = qrtree->mt; + arg->hlvl->a = a; + arg->hlvl->p = p; + arg->hlvl->domino = 0; + + switch( type_hlvl ) { + case LIBHQR_FLAT_TREE : + hqr_high_flat_init(arg->hlvl); + break; + case LIBHQR_GREEDY_TREE : + hqr_high_greedy_init(arg->hlvl, minMN); + break; + case LIBHQR_GREEDY1P_TREE : + hqr_high_greedy1p_init(arg->hlvl); + break; + case LIBHQR_BINARY_TREE : + hqr_high_binary_init(arg->hlvl); + break; + case LIBHQR_FIBONACCI_TREE : + hqr_high_fibonacci_init(arg->hlvl); + break; + default: + hqr_high_fibonacci_init(arg->hlvl); + } + } + + qrtree->args = (void*)arg; + + return 0; +} + +int +libhqr_initmtx_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + libhqr_tree_t qrtreefct; + int rc; + + rc = libhqr_initfct_svd( &qrtreefct, trans, A, type_hlvl, p, nbthread_per_node, ratio ); + if ( rc != 0 ) { + return rc; + } + + libhqr_fct_to_mtx( &qrtreefct, qrtree ); + + /* Free the initial qrtree */ + libhqr_finalize( &qrtreefct ); + + return 0; +} + +int +libhqr_init_svd( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int type_hlvl, int p, int nbthread_per_node, int ratio ) +{ + return libhqr_initmtx_svd( qrtree, trans, A, type_hlvl, p, nbthread_per_node, ratio ); +} diff --git a/src/libhqr_systolic.c b/src/systolic.c similarity index 71% rename from src/libhqr_systolic.c rename to src/systolic.c index d66d3bb9666b06bd142ad055f2e7f84d1b665248..8e20a6994af552fe90df9b6f9256350fd0b5e57f 100644 --- a/src/libhqr_systolic.c +++ b/src/systolic.c @@ -15,34 +15,24 @@ * @date 2017-03-21 * */ -#include "libhqr.h" -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define PRINT_PIVGEN 0 -#ifdef PRINT_PIVGEN -#define myassert( test ) {if ( ! (test) ) return -1;} -#else -#define myassert(test) {assert((test)); return -1;} -#endif - -#define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0 - -static int systolic_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) +#include "libhqr_internal.h" + +static int +systolic_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) { int pq = qrtree->p * qrtree->a; return libhqr_imin( pq, qrtree->mt - k); } -static int systolic_getm( const libhqr_tree_t *qrtree, int k, int i ) +static int +systolic_getm( const libhqr_tree_t *qrtree, int k, int i ) { (void)qrtree; return k+i; } -static int systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) +static int +systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) { (void)qrtree; return m-k; @@ -57,7 +47,8 @@ static int systolic_geti( const libhqr_tree_t *qrtree, int k, int m ) * 1 - if m is reduced thanks to the 2nd coordinate flat tree * 3 - if m is reduced thanks to the 1st coordinate flat tree */ -static int systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { +static int +systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { int p = qrtree->p; int q = qrtree->a; int pq = p * q; @@ -80,7 +71,8 @@ static int systolic_gettype( const libhqr_tree_t *qrtree, int k, int m ) { * Generic functions currpiv,prevpiv,nextpiv * ***************************************************/ -static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) +static int +systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) { int p = qrtree->p; int q = qrtree->a; @@ -100,7 +92,7 @@ static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) default: return qrtree->mt; } -}; +} /** * systolic_nextpiv - Computes the next row killed by the row p, after @@ -121,7 +113,8 @@ static int systolic_currpiv(const libhqr_tree_t *qrtree, int k, int m) * - -1 if start doesn't respect the previous conditions * - m, the following row killed by p if it exists, A->mt otherwise */ -static int systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +static int +systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int q = qrtree->a; @@ -210,7 +203,8 @@ static int systolic_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s * - -1 if start doesn't respect the previous conditions * - m, the previous row killed by p if it exists, A->mt otherwise */ -static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) +static int +systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start) { int ls, lp, nextp; int rpivot; @@ -290,7 +284,7 @@ static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s default: return mt; } -}; +} /** ******************************************************************************* @@ -349,94 +343,78 @@ static int systolic_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int s * ******************************************************************************/ int -libhqr_systolic_init( libhqr_tree_t *qrtree, - libhqr_typefacto_e trans, libhqr_tiledesc_t *A, - int p, int q ) +libhqr_initfct_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) { - libhqr_tree_t qrtree_init; - if (qrtree == NULL) { - fprintf(stderr, "libhqr_systolic_init, illegal value of qrtree"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of qrtree"); return -1; } + if ((trans != LIBHQR_QR) && (trans != LIBHQR_LQ)) { - fprintf(stderr, "libhqr_systolic_init, illegal value of trans"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of trans"); return -2; } if (A == NULL) { - fprintf(stderr, "libhqr_systolic_init, illegal value of A"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of A"); return -3; } if ( p < 0 ) { - fprintf(stderr, "libhqr_systolic_init, illegal value of p"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of p"); return -4; } if ( q < -1 ) { - fprintf(stderr, "libhqr_systolic_init, illegal value of q"); + fprintf(stderr, "libhqr_initfct_sys: illegal value of q"); return -5; } - /* Create a temporary qrtree structure based on the functions */ - { - qrtree_init.getnbgeqrf = systolic_getnbgeqrf; - qrtree_init.getm = systolic_getm; - qrtree_init.geti = systolic_geti; - qrtree_init.gettype = systolic_gettype; - qrtree_init.currpiv = systolic_currpiv; - qrtree_init.nextpiv = systolic_nextpiv; - qrtree_init.prevpiv = systolic_prevpiv; - - qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt; - qrtree_init.nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - - qrtree_init.a = libhqr_imax( q, 1 ); - qrtree_init.p = libhqr_imax( p, 1 ); - qrtree_init.args = NULL; - } - - memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) ); qrtree->getnbgeqrf = systolic_getnbgeqrf; qrtree->getm = systolic_getm; qrtree->geti = systolic_geti; - qrtree->gettype = rdmtx_gettype; - qrtree->currpiv = rdmtx_currpiv; - qrtree->nextpiv = rdmtx_nextpiv; - qrtree->prevpiv = rdmtx_prevpiv; - qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) ); + qrtree->gettype = systolic_gettype; + qrtree->currpiv = systolic_currpiv; + qrtree->nextpiv = systolic_nextpiv; + qrtree->prevpiv = systolic_prevpiv; - /*Initialize the matrix */ - libhqr_matrix_init(qrtree, &qrtree_init); + qrtree->init = LIBHQR_QRTREE_SYS; + qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt; + qrtree->nt = (trans == LIBHQR_QR) ? A->nt : A->mt; - /* Free the initial qrtree */ - libhqr_systolic_finalize( &qrtree_init ); + qrtree->a = libhqr_imax( q, 1 ); + qrtree->p = libhqr_imax( p, 1 ); + qrtree->args = NULL; return 0; } -/** - ******************************************************************************* - * - * @ingroup dplasma - * - * libhqr_systolic_finalize - Cleans the qrtree data structure allocated by - * call to libhqr_systolic_init(). - * - ******************************************************************************* - * - * @param[in,out] qrtree - * On entry, an allocated structure to destroy. - * On exit, the structure is destroy and cannot be used. - * - ******************************************************************************* - * - * @sa libhqr_systolic_init - * - ******************************************************************************/ -void -libhqr_systolic_finalize( libhqr_tree_t *qrtree ) + +int +libhqr_initmtx_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) { - if ( qrtree->args != NULL) { - free( qrtree->args ); + libhqr_tree_t qrtreefct; + int rc; + + rc = libhqr_initfct_sys( &qrtreefct, trans, A, p, q ); + if ( rc != 0 ) { + return rc; } + + libhqr_fct_to_mtx( &qrtreefct, qrtree ); + + /* Free the initial qrtree */ + libhqr_finalize( &qrtreefct ); + + return 0; +} + +int +libhqr_init_sys( libhqr_tree_t *qrtree, + libhqr_facto_e trans, libhqr_matrix_t *A, + int p, int q ) +{ + return libhqr_initmtx_sys( qrtree, trans, A, p, q ); } diff --git a/src/treedraw.c b/src/treedraw.c index 4a22317d6fd35609769721190dc67005035fb4e6..35836b2052d4d848271aa0031bc694e655e952b8 100644 --- a/src/treedraw.c +++ b/src/treedraw.c @@ -13,7 +13,6 @@ * @date 2017-04-04 * */ - #include <string.h> #include <stdio.h> #include "libhqr_draw.h" diff --git a/src/treewalk.c b/src/treewalk.c deleted file mode 100644 index 81a74e732c16bb0e3f070d26fa5a6350dc4a7bb8..0000000000000000000000000000000000000000 --- a/src/treewalk.c +++ /dev/null @@ -1,192 +0,0 @@ -/** - * - * @file treewalk.c - * - * File for algorithm of treewalking. - * - * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - * @version 1.0.0 - * @author Raphael Boucherie - * @author Mathieu Faverge - * @date 2017-03-21 - * - */ - -#include "libhqr.h" -#include "libhqr_draw.h" -#include "libhqr_queue.h" -#include <stdio.h> -#include <assert.h> -#include <stdlib.h> -#include <string.h> - -/* - * Common prameters to the 3 following functions: - * k - Factorization step for the color - * p - annihilator - * m - tile eliminated - * file - File where the lines are drawn - * tiles - table stocking the tiles - */ - -void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file){ - int yp, ym; - int x, xp, xm; - - /* Row of the tiles */ - ym = SIZE + SIZE * m; - yp = SIZE + SIZE * p; - - /* Starting position of the tiles */ - xm = (SIZE + (SIZE / 4)) + SIZE * step_m; - xp = (SIZE + (SIZE / 4)) + SIZE * step_p; - - /* Final position of the tiles */ - x = libhqr_imax(step_m, step_p) + 1; - x = (SIZE + (SIZE / 4)) + SIZE * x; - - libhqr_drawline( xm, ym, x, ym, k, file ); - libhqr_drawline( xp, yp, x, yp, k, file ); -} - -void draw_vertical_line(int k, int p, int m, int step_m, FILE *file){ - int absciss, ym, yp; - absciss = SIZE + SIZE * step_m; - ym = SIZE + SIZE * m; - yp = SIZE + SIZE * p; - libhqr_drawline(absciss, ym, absciss, yp, k, file); -} - -void draw_rectangle(int k, int p, int m, int step_m, FILE *file){ - int absciss, ordinate; - absciss = ((SIZE * 3) /4) + SIZE * step_m; - ordinate = ((SIZE * 3) /4) + SIZE * m; - libhqr_drawTS(absciss, ordinate, WIDTH, HEIGHT, k, file); - ordinate = ((SIZE * 3) /4) + SIZE * p; - libhqr_drawTT(absciss, ordinate, WIDTH, HEIGHT, k, file); -} - -void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file){ - int i, m, p, tmp; - - /* Get order for step k */ - libhqr_treewalk(qrtree, k, tiles); - - /* Draw lines */ - for(i = k; i < (qrtree->mt-1); i++){ - m = tiles[i]; - p = qrtree->currpiv(qrtree, k, m); - - draw_horizontal_line(k, p, m, step[p], step[m], file); - - tmp = libhqr_imax( step[p], step[m] ) + 1; - step[m] = tmp; - step[p] = tmp; - draw_vertical_line(k, p, m, tmp, file); - } -} - -/**************************************************** - * LIBHQR_TREEWALK - ***************************************************/ - -/** - * libhqr_treewalk - * - * @param[in] arg - * Arguments specific to the reduction tree used - * - * @param[in] k - * Factorization step - * - * @param[in] tiles - * Table stocking the tiles and their step in the order they are killed - * - */ -void libhqr_treewalk(const libhqr_tree_t *qrtree, int k, int *tiles) -{ - int tsid = -1, ttid = -1; - int p = k; - int pivot = k; - int m = k; - libhqr_queue_tile_t *tt = libhqr_queue_tile_new(); - libhqr_queue_tile_t *ts = libhqr_queue_tile_new(); - libhqr_queue_tile_push(&tt, k); - - while( tt ) - { - /* Stack all elements killed on the way down */ - p = qrtree->prevpiv(qrtree, k, pivot, p); - while( p != qrtree->mt ) { - /* If it's a TS tile, or a TT at the bottom of the tree that kills noone */ - if( (qrtree->gettype(qrtree, k, p) == 0) || - (qrtree->prevpiv(qrtree, k, p, p) != qrtree->mt) ) - { - libhqr_queue_tile_push(&tt,p); - /* printf("PUSH TT: %d\n", p); */ - } - libhqr_queue_tile_push(&ts, p); - /* printf("PUSH TS: %d\n", p); */ - p = qrtree->prevpiv(qrtree, k, pivot, p); - assert( p != -1 ); - } - - tsid = libhqr_queue_tile_head(&ts); - ttid = libhqr_queue_tile_pop(&tt); - - /* Unstack all element killed by TS, or by TT and already discovered */ - while(tsid != -1 && tsid != ttid) { - /* printf( "TS=%d, TT=%d\n", tsid, ttid ); */ - - tsid = libhqr_queue_tile_pop(&ts); - /* printf("POP TS: %d\n", tsid); */ - /* printf("Call function on (%d, %d)\n", - qrtree->currpiv(qrtree, k, tsid), tsid ); */ - assert(m < (qrtree->mt-1)); - tiles[m] = tsid; - m++; - tsid = libhqr_queue_tile_head(&ts); - } - pivot = p = ttid; - } - - assert(m == (qrtree->mt-1)); - assert(ts == NULL); - assert(tt == NULL); -} - -/**************************************************** - * LIBHQR_PRINT_TREE - ***************************************************/ - -void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix){ - int maxMN, minMN, k, i; - int *tiles; - int *step; - FILE *file; - - maxMN = libhqr_imax(matrix->mt, matrix->nt ); - minMN = libhqr_imin(matrix->mt, matrix->nt ); - - tiles = (int*)malloc(maxMN*sizeof(int)); - memset( tiles, 0, maxMN*sizeof(int) ); - - step = (int*) malloc( qrtree->mt * sizeof(int) ); - memset( step, 0, qrtree->mt * sizeof(int) ); - - file = fopen("tree.svg","w+"); - libhqr_writeheader(file); - for (k = 0; k < minMN; k++){ - /* Drawing the lines */ - draw_lines(qrtree, k, tiles, step, file); - /* Drawing the rectangles */ - for (i = k + 1; i < maxMN; i++){ - draw_rectangle(k, qrtree->currpiv(qrtree, k, i), i, step[i], file); - } - } - libhqr_writeend(file); - fclose(file); - free(tiles); -} diff --git a/testings/CMakeLists.txt b/testings/CMakeLists.txt index 1a184dbcc280da64b5503b5eb43b43f9c1bbcc03..a0bf66c7da76e5e89297b81a7428c99feaf4e111 100644 --- a/testings/CMakeLists.txt +++ b/testings/CMakeLists.txt @@ -16,7 +16,6 @@ set(TESTINGS draw_systolic.c testing_hqr.c testing_systolic.c - testing_tileinit.c ) foreach (_file ${TESTINGS}) diff --git a/testings/draw_hqr.c b/testings/draw_hqr.c index 18b1bc9b8c6fb1f84434b895bf7bc338fd2b1577..9f781b0c96f391f280f216eb10ccd314bae4a9d9 100644 --- a/testings/draw_hqr.c +++ b/testings/draw_hqr.c @@ -17,14 +17,13 @@ #include <string.h> #include <stdlib.h> #include <libhqr.h> -#include <libhqr_draw.h> #include "common.h" int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; /* Get options */ @@ -36,9 +35,9 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - libhqr_print_tree( &qrtree, &matrix ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); + libhqr_print_svg( &qrtree, "hqr.svg" ); + libhqr_finalize( &qrtree ); return 1; } diff --git a/testings/draw_systolic.c b/testings/draw_systolic.c index 31252f86341e69c09b7b192a87294b68def835c8..72e944e1689860cabaee5d00f53ed35996356d1f 100644 --- a/testings/draw_systolic.c +++ b/testings/draw_systolic.c @@ -17,14 +17,13 @@ #include <string.h> #include <stdlib.h> #include <libhqr.h> -#include <libhqr_draw.h> #include "common.h" int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; /* Get options */ @@ -36,9 +35,9 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, P, Q ); - libhqr_print_tree( &qrtree, &matrix ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, P, Q ); + libhqr_print_svg( &qrtree, "systolic.svg" ); + libhqr_finalize( &qrtree ); return 1; } diff --git a/testings/testing_hqr.c b/testings/testing_hqr.c index e13f5529a5675f7e76954b932ff8bb92070c4064..15797e4b7c3a1798b81f817d83a332e687ce2db5 100644 --- a/testings/testing_hqr.c +++ b/testings/testing_hqr.c @@ -26,7 +26,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; int rc, ret = 0; @@ -90,11 +90,11 @@ main(int argc, char ** argv) if (tsrr==1 && a==1) continue; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --qr_a=%d %s FAILED(%d)\n", @@ -150,11 +150,11 @@ main(int argc, char ** argv) if (tsrr==1 && a==1) continue; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s FAILED(%d)\n", @@ -180,21 +180,24 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, + libhqr_init_hqr( &qrtree, LIBHQR_QR, &matrix, llvl, hlvl, qr_a, qr_p, domino, tsrr ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s FAILED(%d)\n", argv[0], MT, NT, llvl, hlvl, qr_a, qr_p, domino, (tsrr ? "-r" : " "), rc); ret |= 1; } + else { + fprintf(stderr, "%s -M %d -N %d --treel=%d --treeh=%d --qr_a=%d --qr_p=%d -d=%d %s SUCCESS\n", + argv[0], MT, NT, llvl, hlvl, qr_a, qr_p, domino, (tsrr ? "-r" : " ")); + } } if ( ret == 0 ) { - printf("\n"); return EXIT_SUCCESS; } else { diff --git a/testings/testing_systolic.c b/testings/testing_systolic.c index e39173f56ea2c4b40b9946b97e971993d9bcbdba..0291f84360f8e372861af75aba63e6ce852d03d1 100644 --- a/testings/testing_systolic.c +++ b/testings/testing_systolic.c @@ -26,7 +26,7 @@ int main(int argc, char ** argv) { libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; + libhqr_matrix_t matrix; int iparam[IPARAM_SIZEOF]; int rc, ret = 0; @@ -74,9 +74,9 @@ main(int argc, char ** argv) NT = all_nt[n]; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, P, Q ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, P, Q ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d FAILED(%d)\n", @@ -98,21 +98,24 @@ main(int argc, char ** argv) matrix.mt = MT; matrix.nt = NT; - libhqr_systolic_init( &qrtree, LIBHQR_QR, &matrix, - qr_p, qr_a ); + libhqr_init_sys( &qrtree, LIBHQR_QR, &matrix, + qr_p, qr_a ); - rc = libhqr_tree_check( &matrix, &qrtree ); - libhqr_matrix_finalize( &qrtree ); + rc = libhqr_check( &matrix, &qrtree ); + libhqr_finalize( &qrtree ); if (rc != 0) { fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d FAILED(%d)\n", argv[0], MT, NT, qr_p, qr_a, rc); ret |= 1; } + else { + fprintf(stderr, "%s -M %d -N %d --qr_p=%d --qr_a=%d SUCCESS\n", + argv[0], MT, NT, qr_p, qr_a); + } } if ( ret == 0 ) { - printf("\n"); return EXIT_SUCCESS; } else { diff --git a/testings/testing_tileinit.c b/testings/testing_tileinit.c deleted file mode 100644 index f75f8900c996ce9148aabc5fbd3a7924f52fa8cd..0000000000000000000000000000000000000000 --- a/testings/testing_tileinit.c +++ /dev/null @@ -1,37 +0,0 @@ -/** - * - * @file testing_tileinit.c - * - * Testing file for the functions stocking the informations of the tiles - * - * @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, - * Univ. Bordeaux. All rights reserved. - * - * @version 1.0.0 - * @author Raphael Boucherie - * @author Mathieu Faverge - * @date 2017-04-18 - * - */ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> -#include "libhqr.h" - -int -main(int argc, char ** argv) -{ - libhqr_tree_t qrtree; - libhqr_tiledesc_t matrix; - - memset( &qrtree, 0xdeadbeef, sizeof(libhqr_tree_t) ); - matrix.nodes = 1; - matrix.p = 3; - matrix.mt = 1; - matrix.nt = 1; - libhqr_hqr_init( &qrtree, LIBHQR_QR, &matrix, 0, 0, 1, 3, 0, 0); - if(libhqr_tree_check( &matrix, &qrtree )) return 0; - libhqr_matrix_finalize( &qrtree ); - return 1; -}