Mentions légales du service

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • solverstack/hqr
  • faverge/hqr
  • sylvand/hqr
  • fpruvost/hqr
4 results
Show changes
Commits on Source (10)
Showing with 2786 additions and 402 deletions
......@@ -38,35 +38,53 @@ if ( NOT BUILD_SUBPROJECT )
endif()
set(hdrs
include/common.h
include/libdraw.h
include/libhqr.h
include/queue.h
include/libhqr_internal.h
include/libhqr_queue.h
)
set(srcs
src/libhqr.c
src/libhqr_dbg.c
src/libhqr_systolic.c
# Low level tree functions
src/low_flat.c
src/low_binary.c
src/low_fibonacci.c
src/low_greedy.c
src/low_greedy1p.c
src/low_adaptiv.c
# High level tree functions
src/high_flat.c
src/high_binary.c
src/high_fibonacci.c
src/high_greedy.c
# Direct access tree functions
src/systolic.c
src/svd.c
src/hqr.c
src/mtxtree.c
# Others
src/check.c
src/gendot.c
src/gensvg.c
src/queue.c
src/treedraw.c
src/treewalk.c
)
include_directories(include)
add_library(hqr ${srcs})
set_property(TARGET hqr PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
target_link_libraries( hqr m )
add_subdirectory(testings)
install(FILES
include/libhqr.h
include/libhqr_common.h
# include/libhqr_dbg.h
DESTINATION include/libhqr )
DESTINATION include )
install(TARGETS hqr DESTINATION lib)
install(TARGETS hqr
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib)
#generate_hqr_pkgconfig_file()
generate_hqr_pkgconfig_file()
#-- Add a custom target to generate tags
add_custom_target (tags
......
###
#
# @copyright (c) 2009-2014 The University of Tennessee and The University
# of Tennessee Research Foundation.
# All rights reserved.
# @copyright (c) 2012-2017 Inria. All rights reserved.
# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
#
###
#
# - Find HQR include dirs and libraries
# Use this module by invoking find_package with the form:
# find_package(HQR
# [REQUIRED]) # Fail with error if hqr is not found
#
# This module finds headers and hqr library.
# Results are reported in variables:
# HQR_FOUND - True if headers and requested libraries were found
# HQR_INCLUDE_DIRS - hqr include directories
# HQR_LIBRARY_DIRS - Link directories for hqr libraries
# HQR_LIBRARIES - hqr component libraries to be linked
#
# The user can give specific paths where to find the libraries adding cmake
# options at configure (ex: cmake path/to/project -DHQR_DIR=path/to/hqr):
# HQR_DIR - Where to find the base directory of hqr
# HQR_INCDIR - Where to find the header files
# HQR_LIBDIR - Where to find the library files
# The module can also look for the following environment variables if paths
# are not given as cmake variable: HQR_DIR, HQR_INCDIR, HQR_LIBDIR
#=============================================================================
# Copyright 2012-2017 Inria
# Copyright 2012-2013 Emmanuel Agullo
# Copyright 2012-2013 Mathieu Faverge
# Copyright 2012 Cedric Castagnede
# Copyright 2013-2017 Florent Pruvost
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file MORSE-Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of Morse, substitute the full
# License text for the above reference.)
if (NOT HQR_FOUND)
set(HQR_DIR "" CACHE PATH "Installation directory of HQR library")
if (NOT HQR_FIND_QUIETLY)
message(STATUS "A cache variable, namely HQR_DIR, has been set to specify the install directory of HQR")
endif()
endif()
set(ENV_HQR_DIR "$ENV{HQR_DIR}")
set(ENV_HQR_INCDIR "$ENV{HQR_INCDIR}")
set(ENV_HQR_LIBDIR "$ENV{HQR_LIBDIR}")
set(HQR_GIVEN_BY_USER "FALSE")
if ( HQR_DIR OR ( HQR_INCDIR AND HQR_LIBDIR) OR ENV_HQR_DIR OR (ENV_HQR_INCDIR AND ENV_HQR_LIBDIR) )
set(HQR_GIVEN_BY_USER "TRUE")
endif()
# Optionally use pkg-config to detect include/library dirs (if pkg-config is available)
# -------------------------------------------------------------------------------------
include(FindPkgConfig)
find_package(PkgConfig QUIET)
if(PKG_CONFIG_EXECUTABLE AND NOT HQR_GIVEN_BY_USER)
pkg_search_module(HQR hqr)
if (NOT HQR_FIND_QUIETLY)
if (HQR_FOUND AND HQR_LIBRARIES)
message(STATUS "Looking for HQR - found using PkgConfig")
#if(NOT HQR_INCLUDE_DIRS)
# message("${Magenta}HQR_INCLUDE_DIRS is empty using PkgConfig."
# "Perhaps the path to hqr headers is already present in your"
# "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}")
#endif()
else()
message(STATUS "${Magenta}Looking for HQR - not found using PkgConfig."
"\n Perhaps you should add the directory containing hqr.pc to the"
"\n PKG_CONFIG_PATH environment variable.${ColourReset}")
endif()
endif()
endif(PKG_CONFIG_EXECUTABLE AND NOT HQR_GIVEN_BY_USER)
if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HQR_FOUND) OR (HQR_GIVEN_BY_USER) )
if (NOT HQR_FIND_QUIETLY)
message(STATUS "Looking for HQR - PkgConfig not used")
endif()
# Looking for include
# -------------------
# Add system include paths to search include
# ------------------------------------------
unset(_inc_env)
set(ENV_HQR_DIR "$ENV{HQR_DIR}")
set(ENV_HQR_INCDIR "$ENV{HQR_INCDIR}")
if(ENV_HQR_INCDIR)
list(APPEND _inc_env "${ENV_HQR_INCDIR}")
elseif(ENV_HQR_DIR)
list(APPEND _inc_env "${ENV_HQR_DIR}")
list(APPEND _inc_env "${ENV_HQR_DIR}/include")
else()
if(WIN32)
string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}")
else()
string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
list(APPEND _inc_env "${_path_env}")
string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}")
list(APPEND _inc_env "${_path_env}")
string(REPLACE ":" ";" _path_env "$ENV{CPATH}")
list(APPEND _inc_env "${_path_env}")
string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}")
list(APPEND _inc_env "${_path_env}")
endif()
endif()
list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}")
list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}")
list(REMOVE_DUPLICATES _inc_env)
# Try to find the hqr header in the given paths
# -------------------------------------------------
# call cmake macro to find the header path
if(HQR_INCDIR)
set(HQR_HQR.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND")
find_path(HQR_libhqr.h_DIRS
NAMES libhqr.h
HINTS ${HQR_INCDIR})
else()
if(HQR_DIR)
set(HQR_libhqr.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND")
find_path(HQR_libhqr.h_DIRS
NAMES libhqr.h
HINTS ${HQR_DIR}
else()
set(HQR_libhqr.h_DIRS "HQR_libhqr.h_DIRS-NOTFOUND")
find_path(HQR_libhqr.h_DIRS
NAMES libhqr.h
HINTS ${_inc_env}
PATH_SUFFIXES "hqr")
endif()
endif()
mark_as_advanced(HQR_libhqr.h_DIRS)
# Add path to cmake variable
# ------------------------------------
if (HQR_libhqr.h_DIRS)
set(HQR_INCLUDE_DIRS "${HQR_libhqr.h_DIRS}")
else ()
set(HQR_INCLUDE_DIRS "HQR_INCLUDE_DIRS-NOTFOUND")
if(NOT HQR_FIND_QUIETLY)
message(STATUS "Looking for hqr -- libhqr.h not found")
endif()
endif ()
if (HQR_INCLUDE_DIRS)
list(REMOVE_DUPLICATES HQR_INCLUDE_DIRS)
endif ()
# Looking for lib
# ---------------
# Add system library paths to search lib
# --------------------------------------
unset(_lib_env)
set(ENV_HQR_LIBDIR "$ENV{HQR_LIBDIR}")
if(ENV_HQR_LIBDIR)
list(APPEND _lib_env "${ENV_HQR_LIBDIR}")
elseif(ENV_HQR_DIR)
list(APPEND _lib_env "${ENV_HQR_DIR}")
list(APPEND _lib_env "${ENV_HQR_DIR}/lib")
else()
if(WIN32)
string(REPLACE ":" ";" _lib_env "$ENV{LIB}")
else()
if(APPLE)
string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}")
else()
string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}")
endif()
list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}")
list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
endif()
endif()
list(REMOVE_DUPLICATES _lib_env)
# Try to find the hqr lib in the given paths
# ----------------------------------------------
# call cmake macro to find the lib path
if(HQR_LIBDIR)
set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND")
find_library(HQR_hqr_LIBRARY
NAMES hqr
HINTS ${HQR_LIBDIR})
else()
if(HQR_DIR)
set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND")
find_library(HQR_hqr_LIBRARY
NAMES hqr
HINTS ${HQR_DIR}
PATH_SUFFIXES lib lib32 lib64)
else()
set(HQR_hqr_LIBRARY "HQR_hqr_LIBRARY-NOTFOUND")
find_library(HQR_hqr_LIBRARY
NAMES hqr
HINTS ${_lib_env})
endif()
endif()
mark_as_advanced(HQR_hqr_LIBRARY)
# If found, add path to cmake variable
# ------------------------------------
if (HQR_hqr_LIBRARY)
get_filename_component(hqr_lib_path ${HQR_hqr_LIBRARY} PATH)
# set cmake variables (respects naming convention)
set(HQR_LIBRARIES "${HQR_hqr_LIBRARY}")
set(HQR_LIBRARY_DIRS "${hqr_lib_path}")
else ()
set(HQR_LIBRARIES "HQR_LIBRARIES-NOTFOUND")
set(HQR_LIBRARY_DIRS "HQR_LIBRARY_DIRS-NOTFOUND")
if(NOT HQR_FIND_QUIETLY)
message(STATUS "Looking for hqr -- lib hqr not found")
endif()
endif ()
if (HQR_LIBRARY_DIRS)
list(REMOVE_DUPLICATES HQR_LIBRARY_DIRS)
endif ()
# check a function to validate the find
if(HQR_LIBRARIES)
set(REQUIRED_INCDIRS)
set(REQUIRED_LIBDIRS)
set(REQUIRED_LIBS)
# HQR
if (HQR_INCLUDE_DIRS)
set(REQUIRED_INCDIRS "${HQR_INCLUDE_DIRS}")
endif()
if (HQR_LIBRARY_DIRS)
set(REQUIRED_LIBDIRS "${HQR_LIBRARY_DIRS}")
endif()
set(REQUIRED_LIBS "${HQR_LIBRARIES}")
# set required libraries for link
set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
set(CMAKE_REQUIRED_LIBRARIES)
foreach(lib_dir ${REQUIRED_LIBDIRS})
list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}")
endforeach()
list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}")
string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}")
# test link
unset(HQR_WORKS CACHE)
include(CheckFunctionExists)
check_function_exists(libhqr_hqr_init HQR_WORKS)
mark_as_advanced(HQR_WORKS)
if(NOT HQR_WORKS)
if(NOT HQR_FIND_QUIETLY)
message(STATUS "Looking for hqr : test of libhqr_hqr_init with hqr library fails")
message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}")
message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}")
message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails")
endif()
endif()
set(CMAKE_REQUIRED_INCLUDES)
set(CMAKE_REQUIRED_FLAGS)
set(CMAKE_REQUIRED_LIBRARIES)
endif(HQR_LIBRARIES)
endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HQR_FOUND) OR (HQR_GIVEN_BY_USER) )
if (HQR_LIBRARIES)
if (HQR_LIBRARY_DIRS)
list(GET HQR_LIBRARY_DIRS 0 first_lib_path)
else()
list(GET HQR_LIBRARIES 0 first_lib)
get_filename_component(first_lib_path "${first_lib}" PATH)
endif()
if (${first_lib_path} MATCHES "/lib(32|64)?$")
string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}")
set(HQR_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HQR library" FORCE)
else()
set(HQR_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HQR library" FORCE)
endif()
endif()
mark_as_advanced(HQR_DIR)
mark_as_advanced(HQR_DIR_FOUND)
# check that HQR has been found
# -------------------------------
include(FindPackageHandleStandardArgs)
if (PKG_CONFIG_EXECUTABLE AND HQR_FOUND)
find_package_handle_standard_args(HQR DEFAULT_MSG
HQR_LIBRARIES)
else()
find_package_handle_standard_args(HQR DEFAULT_MSG
HQR_LIBRARIES
HQR_WORKS)
endif()
# - Try to find LibHQR
# Once done this will define
# LIBHQR_FOUND - System has LibHQR
# LIBHQR_INCLUDE_DIRS - The LibHQR include directories
# LIBHQR_LIBRARIES - The libraries needed to use LibHQR
# LIBHQR_DEFINITIONS - Compiler switches required for using LIBHQR
find_package(PkgConfig)
pkg_check_modules(PC_LIBHQR QUIET libhqr)
set(LIBHQR_DEFINITIONS ${PC_LIBHQR_CFLAGS_OTHER})
find_path(
LIBHQR_INCLUDE_DIR
libhqr.h
HINTS ${PC_LIBHQR_INCLUDEDIR}
${PC_LIBHQR_INCLUDE_DIRS}
)
find_library(
LIBHQR_LIBRARY
NAMES hqr
HINTS ${PC_LIBHQR_LIBDIR} ${PC_LIBHQR_LIBRARY_DIRS}
)
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments
# and set LIBHQR_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(
LIBHQR DEFAULT_MSG LIBHQR_LIBRARY LIBHQR_INCLUDE_DIR)
mark_as_advanced(LIBHQR_INCLUDE_DIR LIBHQR_LIBRARY )
set(LIBHQR_LIBRARIES ${LIBHQR_LIBRARY} )
set(LIBHQR_INCLUDE_DIRS ${LIBHQR_INCLUDE_DIR} )
......@@ -33,26 +33,26 @@
# used in CLEAN_LIB_LIST
#
###
macro(CONVERT_LIBSTYLE_TO_PKGCONFIG _liblist)
set(${_liblist}_CPY "${${_liblist}}")
set(${_liblist} "")
foreach(_dep ${${_liblist}_CPY})
if (${_dep} MATCHES "^/")
get_filename_component(dep_libname ${_dep} NAME)
get_filename_component(dep_libdir ${_dep} DIRECTORY)
string(REPLACE "lib" "" dep_libname "${dep_libname}")
string(REPLACE ".so" "" dep_libname "${dep_libname}")
string(REPLACE ".a" "" dep_libname "${dep_libname}")
string(REPLACE ".dylib" "" dep_libname "${dep_libname}")
string(REPLACE ".dll" "" dep_libname "${dep_libname}")
list(APPEND ${_liblist} -L${dep_libdir} -l${dep_libname})
elseif(NOT ${_dep} MATCHES "^-")
list(APPEND ${_liblist} "-l${_dep}")
else()
list(APPEND ${_liblist} ${_dep})
endif()
endforeach()
endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG)
# macro(CONVERT_LIBSTYLE_TO_PKGCONFIG _liblist)
# set(${_liblist}_CPY "${${_liblist}}")
# set(${_liblist} "")
# foreach(_dep ${${_liblist}_CPY})
# if (${_dep} MATCHES "^/")
# get_filename_component(dep_libname ${_dep} NAME)
# get_filename_component(dep_libdir ${_dep} DIRECTORY)
# string(REPLACE "lib" "" dep_libname "${dep_libname}")
# string(REPLACE ".so" "" dep_libname "${dep_libname}")
# string(REPLACE ".a" "" dep_libname "${dep_libname}")
# string(REPLACE ".dylib" "" dep_libname "${dep_libname}")
# string(REPLACE ".dll" "" dep_libname "${dep_libname}")
# list(APPEND ${_liblist} -L${dep_libdir} -l${dep_libname})
# elseif(NOT ${_dep} MATCHES "^-")
# list(APPEND ${_liblist} "-l${_dep}")
# else()
# list(APPEND ${_liblist} ${_dep})
# endif()
# endforeach()
# endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG)
###
#
......@@ -60,22 +60,22 @@ endmacro(CONVERT_LIBSTYLE_TO_PKGCONFIG)
# used in GENERATE_PKGCONFIG_FILE
#
###
macro(CLEAN_LIB_LIST _package)
list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS)
list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS_PRIVATE)
list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED)
list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED_PRIVATE)
convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS)
convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS_PRIVATE)
string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS "${${_package}_PKGCONFIG_LIBS}")
string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS_PRIVATE "${${_package}_PKGCONFIG_LIBS_PRIVATE}")
string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED "${${_package}_PKGCONFIG_REQUIRED}")
string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED_PRIVATE "${${_package}_PKGCONFIG_REQUIRED_PRIVATE}")
endmacro(CLEAN_LIB_LIST)
#macro(CLEAN_LIB_LIST _package)
# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS)
# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_LIBS_PRIVATE)
# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED)
# list(REMOVE_DUPLICATES ${_package}_PKGCONFIG_REQUIRED_PRIVATE)
# convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS)
# convert_libstyle_to_pkgconfig(${_package}_PKGCONFIG_LIBS_PRIVATE)
# string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS "${${_package}_PKGCONFIG_LIBS}")
# string(REPLACE ";" " " ${_package}_PKGCONFIG_LIBS_PRIVATE "${${_package}_PKGCONFIG_LIBS_PRIVATE}")
# string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED "${${_package}_PKGCONFIG_REQUIRED}")
# string(REPLACE ";" " " ${_package}_PKGCONFIG_REQUIRED_PRIVATE "${${_package}_PKGCONFIG_REQUIRED_PRIVATE}")
#endmacro(CLEAN_LIB_LIST)
###
#
# GENERATE_PKGCONFIG_FILE: generate files libhqr.pc
# GENERATE_PKGCONFIG_FILE: generate files hqr.pc
#
###
macro(GENERATE_HQR_PKGCONFIG_FILE)
......@@ -85,9 +85,9 @@ macro(GENERATE_HQR_PKGCONFIG_FILE)
set(HQR_PKGCONFIG_REQUIRED "")
set(HQR_PKGCONFIG_REQUIRED_PRIVATE "")
clean_lib_list(LIBHQR)
#clean_lib_list(HQR)
set(_output_libhqr_file "${CMAKE_BINARY_DIR}/hqr.pc")
set(_output_hqr_file "${CMAKE_BINARY_DIR}/hqr.pc")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/lib/pkgconfig/hqr.pc.in"
"${_output_hqr_file}"
......@@ -97,6 +97,7 @@ macro(GENERATE_HQR_PKGCONFIG_FILE)
FILES ${_output_hqr_file}
DESTINATION lib/pkgconfig
)
endmacro(GENERATE_HQR_PKGCONFIG_FILE)
##
......
......@@ -18,23 +18,24 @@
#ifndef _LIBHQR_H_
#define _LIBHQR_H_
#include <stdio.h>
#include "libhqr_common.h"
#ifdef BEGIN_C_DECLS
#undef BEGIN_C_DECLS
#endif
#ifdef END_C_DECLS
#undef END_C_DECLS
#endif
#if defined(c_plusplus) || defined(__cplusplus)
# define BEGIN_C_DECLS extern "C" {
# define END_C_DECLS }
#else
# define BEGIN_C_DECLS /* empty */
# define END_C_DECLS /* empty */
#endif
BEGIN_C_DECLS
static inline int libhqr_imin(int a, int b){
return (a > b) ? b : a;
}
static inline int libhqr_imax(int a, int b){
return (a > b) ? a : b;
}
static inline int libhqr_iceil(int a, int b){
return (a + b - 1) / b;
}
/**
* @brief Define which tree level the tile belongs to.
*
......@@ -50,8 +51,8 @@ static inline int libhqr_iceil(int a, int b){
* matrices.
*
* @remark LIBHQR_KILLED_BY_TS needs to be 0 for all variant of QR
* factorizations in order to distinguish TT kernels from TS kernels in
* compuations
* factorizations in order to easily distinguish TT kernels from TS kernels in
* computations.
*
*/
typedef enum libhqr_type_ {
......@@ -74,47 +75,42 @@ typedef enum libhqr_tree_ {
LIBHQR_FIBONACCI_TREE = 2,
LIBHQR_BINARY_TREE = 3,
LIBHQR_GREEDY1P_TREE = 4,
} libqr_tree_e;
} libhqr_tree_e;
/**
* @brief Define the type of factorization to apply: QR or LQ.
*/
typedef enum libhqr_typefacto_ {
typedef enum libhqr_facto_ {
LIBHQR_QR = 0, /**< QR factorization will be performed, and A shape is considered */
LIBHQR_LQ = 1, /**< LQ factorization will be performed, and A^t shape is considered */
} libhqr_typefacto_e;
} libhqr_facto_e;
/**
* @brief Minimal structure to define the shape of the matrix to factorize.
* @brief Minimal matrix description stucture.
*
* This structure describes the dimension of the matrix to factorize in number
* of tiles: mt-by-nt. When provided, nodes and p provides the total number of
* nodes involved in the computation, and the P parameter of 2D bloc-cyclic
* distributions of the tiles. LibHQR doesn't support any other distribtions for
* now.
*/
typedef struct libhqr_tiledesc_s{
int mt; /**< The number of rows of tiles */
int nt; /**< The number of columns of tiles */
typedef struct libhqr_matrix_s {
int mt; /**< The number of rows of tiles in the matrix */
int nt; /**< The number of columns of tiles in the matrix */
int nodes; /**< The number of nodes involved in the data distribution */
int p; /**< The number of nodes per column in the data distribution */
} libhqr_tiledesc_t;
} libhqr_matrix_t;
/**
* @brief Minimal structure to stock the information for each tile
* @brief Hierarchical tree structure for QR/LQ factorization like kernels.
*
* This structure holds the functions that allows one to traverse the tree, or
* to know form each node which ones are its neigbours in the reduction trees.
*
*/
typedef struct libhqr_tileinfo_s{
int type; /**< The type of the tile */
int currpiv; /**< Number of the time annihilating the current tile */
int nextpiv; /**< The next tile killed by the tile */
int prevpiv; /**< The previous tile killed by the tile */
int first_nextpiv; /**< The first next tile killed */
int first_prevpiv; /**< The first previous tile killed */
} libhqr_tileinfo_t;
struct libhqr_tree_s;
typedef struct libhqr_tree_s libhqr_tree_t;
typedef struct libhqr_context_s libhqr_context_t;
/**
* @brief Hierarchical tree structure for QR/LQ factorization like kernels.
*/
struct libhqr_tree_s {
/**
* @brief Return the number of geqrt/gelqt kernels in the column/row k
......@@ -186,6 +182,7 @@ struct libhqr_tree_s {
*/
int (*prevpiv)(const libhqr_tree_t *qrtree, int k, int p, int m);
int init; /**< Internal variable */
int mt; /**< Number of rows of tile A if QR factorization, and in A^t if LQ
factorization */
int nt; /**< Number of tile reflectors to compute. This is equal to
......@@ -199,53 +196,58 @@ struct libhqr_tree_s {
tree: hqr, systolic, svd, ...) */
};
int libhqr_systolic_init( libhqr_tree_t *qrtree,
libhqr_typefacto_e trans, libhqr_tiledesc_t *A,
int p, int q );
void libhqr_systolic_finalize( libhqr_tree_t *qrtree );
/**
* @name Initialization/Finalization functions
* @{
*/
int libhqr_init_sys( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int p, int q );
int libhqr_svd_init( libhqr_tree_t *qrtree,
libhqr_typefacto_e trans, libhqr_tiledesc_t *A,
int libhqr_init_svd( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_hlvl, int p, int nbcores_per_node, int ratio );
int libhqr_hqr_init( libhqr_tree_t *qrtree,
libhqr_typefacto_e trans, libhqr_tiledesc_t *A,
int libhqr_init_hqr( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_llvl, int type_hlvl,
int a, int p, int domino, int tsrr );
void libhqr_hqr_finalize( libhqr_tree_t *qrtree );
void libhqr_finalize( libhqr_tree_t *qrtree );
void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init);
int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
void libhqr_matrix_finalize(libhqr_tree_t *qrtree);
/*
* function for drawing the tree
/**
* @}
* @name Function to walk the tree in correct order for STF programing model
* @{
*/
void libhqr_walk_stepk( const libhqr_tree_t *qrtree, int k, int *tiles );
void libhqr_treewalk(const libhqr_tree_t *qrtree, int k, int *tiles);
void draw_rectangle(int k, int p, int m, int step_m, FILE *file);
void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file);
void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file);
void draw_vertical_line(int k, int p, int m, int step_m, FILE *file);
void libhqr_print_tree(const libhqr_tree_t *qrtree, libhqr_tiledesc_t *matrix);
/**
* @}
* @name Drawing functions
* @{
*/
void libhqr_print_dag( const libhqr_tree_t *qrtree,
const char *filename );
void libhqr_print_svg( const libhqr_tree_t *qrtree,
const char *filename );
/*
* Debugging functions
/**
* @}
* @name Checking/Debugging functions
* @{
*/
int libhqr_check ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree );
void libhqr_print_type ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree );
void libhqr_print_pivot ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree );
void libhqr_print_nbgeqrt( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree );
void libhqr_print_perm ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int *perm );
void libhqr_print_next_k ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k );
void libhqr_print_prev_k ( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k );
void libhqr_print_geqrt_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k );
int libhqr_tree_check ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree );
void libhqr_tree_print_dag ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename );
void libhqr_tree_print_type ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree );
void libhqr_tree_print_pivot ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree );
void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree );
void libhqr_tree_print_perm ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm );
void libhqr_tree_print_next_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k );
void libhqr_tree_print_prev_k ( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k );
void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k );
/**
* @}
*/
END_C_DECLS
......
/**
*
* @file libhqr_common.h
*
* Header file for common macro
*
* @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-04-05
*
*/
#ifndef _LIBHQR_COMMON_H_
#define _LIBHQR_COMMON_H_
#undef BEGIN_C_DECLS
#undef END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
# define BEGIN_C_DECLS extern "C" {
# define END_C_DECLS }
#else
# define BEGIN_C_DECLS /* empty */
# define END_C_DECLS /* empty */
#endif
#endif /* _LIBHQR_COMMON_H_ */
/**
*
* @file libhqr_draw.h
*
* Header file for all the functions of drawing.
*
* @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-04-04
*
**/
#ifndef _LIBHQR_DRAW_H_
#define _LIBHQR_DRAW_H_
#include "libhqr_common.h"
BEGIN_C_DECLS
#define WIDTH 50
#define HEIGHT 50
#define SIZE 100
/*
* Clobal array for color
*/
extern char *color[4];
/*
* function for treedraw
*/
void libhqr_writeheader(FILE *file);
void libhqr_writeend(FILE *file);
void libhqr_drawTT(int x, int y, int w, int h, int k, FILE *file);
void libhqr_drawTS(int x, int y, int w, int h, int k, FILE *file);
void libhqr_drawline(int x1, int y1, int x2, int y2, int k, FILE *file);
END_C_DECLS
#endif /* _LIBHQR_DRAW_H_ */
/**
*
* @file libhqr.h
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
*/
#ifndef _LIBHQR_INTERNAL_H_
#define _LIBHQR_INTERNAL_H_
#include "libhqr.h"
#include <stdio.h>
#include <assert.h>
#define PRINT_PIVGEN 0
#ifdef PRINT_PIVGEN
#define myassert( test ) {if ( ! (test) ) return -1;}
#else
#define myassert(test) {assert((test)); return -1;}
#endif
typedef enum qrtree_type_ {
LIBHQR_QRTREE_UNSET = 0,
LIBHQR_QRTREE_HQR,
LIBHQR_QRTREE_SVD,
LIBHQR_QRTREE_SYS,
LIBHQR_QRTREE_MTX,
} qrtree_type_e;
struct hqr_args_s;
typedef struct hqr_args_s hqr_args_t;
struct hqr_subpiv_s;
typedef struct hqr_subpiv_s hqr_subpiv_t;
/**
* @brief jhj
*/
struct hqr_args_s {
int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */
int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */
hqr_subpiv_t *llvl;
hqr_subpiv_t *hlvl;
int *perm;
};
struct hqr_subpiv_s {
/**
* currpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] m line you want to eliminate
*
* @return the annihilator p used with m at step k
*/
int (*currpiv)(const hqr_subpiv_t *arg, int k, int m);
/*
* nextpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = MT to find the first time p is used as an annihilator during step k
*
* @return the next line m' using the line p as annihilator during step k
* mt if p will never be used again as an annihilator.
*/
int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
/*
* prevpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = p to find the last time p has been used as an annihilator during step k
*
* @return the previous line m' using the line p as annihilator during step k
* mt if p has never been used before that as an annihilator.
*/
int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
int *ipiv;
int minMN;
int ldd;
int a;
int p;
int domino;
};
/**
* @brief Minimal structure to store the information related to each tile.
*/
typedef struct libhqr_tile_info_s {
int type; /**< The type of the reduction applied to the tile (@sa libhqr_type_e) */
int currpiv; /**< The row index of the pivot for this tile */
int nextpiv; /**< The next tile for which the currpiv is a pivot */
int prevpiv; /**< The previous tile for which currpiv was a pivot */
int first_nextpiv; /**< If type != 0, the first tile for which this tile is a pivot */
int first_prevpiv; /**< If type != 0, the last tile for which this tile is a pivot */
} libhqr_tile_info_t;
static inline int libhqr_imin(int a, int b){
return (a > b) ? b : a;
}
static inline int libhqr_imax(int a, int b){
return (a > b) ? a : b;
}
static inline int libhqr_iceil(int a, int b){
return (a + b - 1) / b;
}
/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */
#define nbextra1_formula (( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0)
/*
* Common functions
*/
/* int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ); */
/* int hqr_getm( const libhqr_tree_t *qrtree, int k, int i ); */
/* int hqr_geti( const libhqr_tree_t *qrtree, int k, int m ); */
/* int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ); */
/*
* Subtree for low-level
*/
void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init);
int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m);
int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m);
void libhqr_matrix_finalize(libhqr_tree_t *qrtree);
/*
* function for drawing the tree
*/
void draw_rectangle(int k, int p, int m, int step_m, FILE *file);
void draw_lines(const libhqr_tree_t *qrtree, int k, int *tiles, int *step, FILE *file);
void draw_horizontal_line(int k, int p, int m, int step_p, int step_m, FILE *file);
void draw_vertical_line( int k, int p, int m, int step_m, FILE *file);
/**
* @name Low level trees
* @{
*/
void hqr_low_flat_init ( hqr_subpiv_t *arg );
void hqr_low_binary_init ( hqr_subpiv_t *arg );
void hqr_low_fibonacci_init( hqr_subpiv_t *arg, int minMN );
void hqr_low_greedy_init ( hqr_subpiv_t *arg, int minMN );
void hqr_low_greedy1p_init ( hqr_subpiv_t *arg, int minMN );
void svd_low_adaptiv_init ( hqr_subpiv_t *arg, int gmt, int gnt, int nbcores, int ratio );
/**
* @}
*
* @name High level trees
* @{
*/
void hqr_high_flat_init ( hqr_subpiv_t *arg );
void hqr_high_binary_init ( hqr_subpiv_t *arg );
void hqr_high_fibonacci_init( hqr_subpiv_t *arg );
void hqr_high_greedy_init ( hqr_subpiv_t *arg, int minMN );
void hqr_high_greedy1p_init ( hqr_subpiv_t *arg );
/**
* @}
*/
void libhqr_fct_to_mtx( const libhqr_tree_t *in, libhqr_tree_t *out );
#endif /* _LIBHQR_INTERNAL_H_ */
......@@ -16,10 +16,6 @@
#ifndef _LIBHQR_QUEUE_H_
#define _LIBHQR_QUEUE_H_
#include "libhqr_common.h"
BEGIN_C_DECLS
typedef struct libhqr_queue_tile_s {
struct libhqr_queue_tile_s *prev;
struct libhqr_queue_tile_s *next;
......@@ -27,11 +23,9 @@ typedef struct libhqr_queue_tile_s {
} libhqr_queue_tile_t;
libhqr_queue_tile_t *libhqr_queue_tile_new (void);
void libhqr_queue_tile_push (libhqr_queue_tile_t ** queue_tile, int numero);
int libhqr_queue_tile_head (libhqr_queue_tile_t ** queue_tile);
int libhqr_queue_tile_pop (libhqr_queue_tile_t ** queue_tile);
void libhqr_queue_tile_delete(libhqr_queue_tile_t ** queue_tile);
END_C_DECLS
void libhqr_queue_tile_push (libhqr_queue_tile_t **queue_tile, int numero);
int libhqr_queue_tile_head (libhqr_queue_tile_t **queue_tile);
int libhqr_queue_tile_pop (libhqr_queue_tile_t **queue_tile);
void libhqr_queue_tile_delete(libhqr_queue_tile_t **queue_tile);
#endif /* _LIBHQR_QUEUE_H_ */
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${exec_prefix}/include
Name: HQR
Description: Build and Visualize Trees for Hierachical QR Factorizations
Version: @HQR_VERSION_MAJOR@.@HQR_VERSION_MINOR@.@HQR_VERSION_MICRO@
Cflags: -I${includedir}
Libs: -L${libdir} @HQR_PKGCONFIG_LIBS@
Libs.private: @HQR_PKGCONFIG_LIBS_PRIVATE@
Requires: @HQR_PKGCONFIG_REQUIRED@
Requires.private: @HQR_PKGCONFIG_REQUIRED_PRIVATE@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${exec_prefix}/include/libhqr
Name: libhqr
Description: Build and Visualize Tree of Hierachical QR Factorization
Version: @LIBHQR_VERSION_MAJOR@.@LIBHQR_VERSION_MINOR@.@LIBHQR_VERSION_MICRO@
Cflags: -I${includedir}
Libs: -L${libdir} @LIBHQR_PKGCONFIG_LIBS@
Libs.private: @LIBHQR_PKGCONFIG_LIBS_PRIVATE@
Requires: @LIBHQR_PKGCONFIG_REQUIRED@
Requires.private: @LIBHQR_PKGCONFIG_REQUIRED_PRIVATE@
/**
*
* @file libhqr_dbg.c
* @file check.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
......@@ -15,7 +15,7 @@
* @date 2017-03-21
*
*/
#include "libhqr.h"
#include "libhqr_internal.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
......@@ -30,7 +30,8 @@
return ret; \
}
int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
int
libhqr_check( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree)
{
int minMN = libhqr_imin(A->mt, A->nt );
int i, m, k, nb;
......@@ -43,8 +44,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
* Check Formula for NB geqrt
*/
{
/* libhqr_tree_print_type( A, qrtree ); */
/* libhqr_tree_print_nbgeqrt( A, qrtree ); */
/* libhqr_print_type( A, qrtree ); */
/* libhqr_print_nbgeqrt( A, qrtree ); */
check = 1;
for (k=0; k<minMN; k++) {
nb = 0;
......@@ -73,7 +74,7 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
int prevm = -1;
check = 1;
for (k=0; k<minMN; k++) {
/* libhqr_tree_print_geqrt_k( A, qrtree, k ); */
/* libhqr_print_geqrt_k( A, qrtree, k ); */
nb = qrtree->getnbgeqrf( qrtree, k );
prevm = -1;
for (i=0; i < nb; i++) {
......@@ -134,8 +135,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
nb++;
}
if ( nb > 1 ) {
libhqr_tree_print_next_k( A, qrtree, k);
libhqr_tree_print_prev_k( A, qrtree, k);
libhqr_print_next_k( A, qrtree, k);
libhqr_print_prev_k( A, qrtree, k);
printf(" ----------------------------------------------------\n"
" - a = %d, p = %d, M = %d, N = %d\n"
......@@ -146,8 +147,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
return 3;
}
else if ( nb == 0 ) {
libhqr_tree_print_next_k( A, qrtree, k);
libhqr_tree_print_prev_k( A, qrtree, k);
libhqr_print_next_k( A, qrtree, k);
libhqr_print_prev_k( A, qrtree, k);
printf(" ----------------------------------------------------\n"
" - a = %d, p = %d, M = %d, N = %d\n"
......@@ -177,8 +178,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
nb++;
}
if ( nb > 1 ) {
libhqr_tree_print_next_k( A, qrtree, k);
libhqr_tree_print_prev_k( A, qrtree, k);
libhqr_print_next_k( A, qrtree, k);
libhqr_print_prev_k( A, qrtree, k);
printf(" ----------------------------------------------------\n"
" - a = %d, p = %d, M = %d, N = %d\n"
......@@ -189,8 +190,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
return 3;
}
else if ( nb == 0 ) {
libhqr_tree_print_next_k( A, qrtree, k);
libhqr_tree_print_prev_k( A, qrtree, k);
libhqr_print_next_k( A, qrtree, k);
libhqr_print_prev_k( A, qrtree, k);
printf(" ----------------------------------------------------\n"
" - a = %d, p = %d, M = %d, N = %d\n"
......@@ -224,8 +225,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
prev = qrtree->prevpiv(qrtree, k, m, next);
if ( start != prev ) {
libhqr_tree_print_next_k( A, qrtree, k);
libhqr_tree_print_prev_k( A, qrtree, k);
libhqr_print_next_k( A, qrtree, k);
libhqr_print_prev_k( A, qrtree, k);
printf(" ----------------------------------------------------\n"
" - a = %d, p = %d, M = %d, N = %d\n"
......@@ -246,7 +247,8 @@ int libhqr_tree_check( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree)
return 0;
}
void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
void
libhqr_print_type( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree )
{
int minMN = libhqr_imin(A->mt, A->nt );
int m, k;
......@@ -282,7 +284,8 @@ void libhqr_tree_print_type( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
}
}
void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
void
libhqr_print_pivot( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree )
{
int minMN = libhqr_imin(A->mt, A->nt );
int m, k;
......@@ -317,7 +320,8 @@ void libhqr_tree_print_pivot( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
}
}
void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k )
void
libhqr_print_next_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k )
{
int m, s;
printf("\n------------ Next (k = %d)--------------\n", k);
......@@ -336,7 +340,8 @@ void libhqr_tree_print_next_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int
}
}
void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k )
void
libhqr_print_prev_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k )
{
int m, s;
printf("\n------------ Prev (k = %d)--------------\n", k);
......@@ -355,7 +360,8 @@ void libhqr_tree_print_prev_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int
}
}
void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *perm )
void
libhqr_print_perm( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int *perm )
{
int minMN = libhqr_imin(A->mt, A->nt );
int m, k;
......@@ -380,7 +386,8 @@ void libhqr_tree_print_perm( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int *p
printf( "\n" );
}
void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
void
libhqr_print_nbgeqrt( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree )
{
int minMN = libhqr_imin(A->mt, A->nt );
int m, k, nb;
......@@ -408,7 +415,8 @@ void libhqr_tree_print_nbgeqrt( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree )
printf( "\n" );
}
void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int k )
void
libhqr_print_geqrt_k( const libhqr_matrix_t *A, const libhqr_tree_t *qrtree, int k )
{
int i, m, nb;
(void)A;
......@@ -426,125 +434,3 @@ void libhqr_tree_print_geqrt_k( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, int
}
printf( "\n" );
}
/* static int libhqr_tree_getinon0( const libhqr_tree_t *qrtree, */
/* const int k, int i, int mt ) */
/* { */
/* int j; */
/* for(j=k; j<mt; j++) { */
/* if ( libhqr_tree_gettype( qrtree, k, j ) != 0 ) */
/* i--; */
/* if ( i == -1 ) */
/* break; */
/* } */
/* return qrtree->perm[k*(qrtree->desc->mt+1) + j]; */
/* } */
#define DAG_HEADER "digraph G { orientation=portrait; \n"
#define DAG_FOOTER "} // close graph\n"
#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n"
#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n"
#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n"
#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n"
#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n"
char *color[] = {
"red",
"blue",
"green",
"orange",
"cyan",
"purple",
"yellow",
};
#define DAG_NBCOLORS 7
void libhqr_tree_print_dag( libhqr_tiledesc_t *A, libhqr_tree_t *qrtree, char *filename )
{
int *pos, *next, *done;
int k, m, n, lpos, prev, length;
int minMN = libhqr_imin( A->mt, A->nt );
FILE *f = fopen( filename, "w" );
done = (int*)malloc( A->mt * sizeof(int) );
pos = (int*)malloc( A->mt * sizeof(int) );
next = (int*)malloc( A->mt * sizeof(int) );
memset(pos, 0, A->mt * sizeof(int) );
memset(next, 0, A->mt * sizeof(int) );
/* Print header */
fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/
for(m=0; m < A->mt; m++) {
fprintf(f, DAG_LABELNODE, m, m, m);
}
for(k=0; k<minMN; k++ ) {
int nb2reduce = A->mt - k - 1;
for(m=k; m < A->mt; m++) {
fprintf(f, DAG_STARTNODE, m, A->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, A->mt);
}
while( nb2reduce > 0 ) {
memset(done, 0, A->mt * sizeof(int) );
for(m=A->mt-1; m > (k-1); m--) {
n = next[m];
if ( next[n] != A->mt )
continue;
if ( n != A->mt ) {
lpos = libhqr_imax( pos[m], pos[n] );
lpos++;
pos[m] = lpos;
pos[n] = lpos;
fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, m, n );
fprintf(f, DAG_EDGE_PIV,
m, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, n, n );
if ( qrtree->gettype(qrtree, k, n) == 0 )
fprintf(f, DAG_EDGE_TS,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
else
fprintf(f, DAG_EDGE_TT,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, n);
done[m] = done[n] = 1;
nb2reduce--;
}
}
}
}
length = 0;
for(m=0; m < A->mt; m++) {
length = libhqr_imax(length, pos[m]);
}
length++;
for(k=0; k<length; k++)
fprintf(f, DAG_LENGTHNODE, k, k, k);
fprintf(f, DAG_FOOTER);
printf("Tic Max = %d\n", length-1);
fclose( f );
free(pos);
free(next);
}
/**
*
* @file gendot.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
*
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
*/
#include "libhqr_internal.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define DAG_HEADER "digraph G { orientation=portrait; \n"
#define DAG_FOOTER "} // close graph\n"
#define DAG_LABELNODE "%d [label=\"%d\",color=white,pos=\"-1.,-%d.!\"]\n"
#define DAG_LENGTHNODE "l%d [label=\"%d\",color=white,pos=\"%d.,0.5!\"]\n"
#define DAG_INVISEDGE "%d->%d [style=\"invis\"];\n"
#define DAG_STARTNODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_NODE "p%d_m%d_k%d [shape=point,width=0.1, pos=\"%d.,-%d.!\",color=\"%s\"];\n"
#define DAG_FIRSTEDGE_PIV "%d->p%d_m%d_k0\n"
#define DAG_FIRSTEDGE_TS "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_FIRSTEDGE_TT "%d->p%d_m%d_k0 [style=dotted,width=0.1]\n"
#define DAG_EDGE_PIV "p%d_m%d_k%d->p%d_m%d_k%d [width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TS "p%d_m%d_k%d->p%d_m%d_k%d [style=dotted, width=0.1,color=\"%s\"]\n"
#define DAG_EDGE_TT "p%d_m%d_k%d->p%d_m%d_k%d [style=dashed, width=0.1,color=\"%s\"]\n"
char *color[] = {
"red",
"blue",
"green",
"orange",
"cyan",
"purple",
"yellow",
};
#define DAG_NBCOLORS 7
void
libhqr_print_dot( const libhqr_tree_t *qrtree,
const char *filename )
{
int *pos, *next, *done;
int k, m, n, lpos, prev, length;
int minMN = libhqr_imin( qrtree->mt, qrtree->nt );
FILE *f = fopen( filename, "w" );
done = (int*)malloc( qrtree->mt * sizeof(int) );
pos = (int*)malloc( qrtree->mt * sizeof(int) );
next = (int*)malloc( qrtree->mt * sizeof(int) );
memset(pos, 0, qrtree->mt * sizeof(int) );
memset(next, 0, qrtree->mt * sizeof(int) );
/* Print header */
fprintf(f, DAG_HEADER ); /*, A->mt+2, minMN+2 );*/
for(m=0; m < qrtree->mt; m++) {
fprintf(f, DAG_LABELNODE, m, m, m);
}
for(k=0; k<minMN; k++ ) {
int nb2reduce = qrtree->mt - k - 1;
for(m=k; m < qrtree->mt; m++) {
fprintf(f, DAG_STARTNODE, m, qrtree->mt, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, qrtree->mt);
}
while( nb2reduce > 0 ) {
memset(done, 0, qrtree->mt * sizeof(int) );
for(m=qrtree->mt-1; m > (k-1); m--) {
n = next[m];
if ( next[n] != qrtree->mt )
continue;
if ( n != qrtree->mt ) {
lpos = libhqr_imax( pos[m], pos[n] );
lpos++;
pos[m] = lpos;
pos[n] = lpos;
fprintf(f, DAG_NODE, m, n, k, pos[m], m, color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, m, n );
fprintf(f, DAG_EDGE_PIV,
m, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
prev = qrtree->prevpiv( qrtree, k, n, n );
if ( qrtree->gettype(qrtree, k, n) == 0 )
fprintf(f, DAG_EDGE_TS,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
else
fprintf(f, DAG_EDGE_TT,
n, prev, k,
m, n, k,
color[ (m%qrtree->p) % DAG_NBCOLORS ]);
next[m] = qrtree->nextpiv( qrtree, k, m, n);
done[m] = done[n] = 1;
nb2reduce--;
}
}
}
}
length = 0;
for(m=0; m < qrtree->mt; m++) {
length = libhqr_imax(length, pos[m]);
}
length++;
for(k=0; k<length; k++)
fprintf(f, DAG_LENGTHNODE, k, k, k);
fprintf(f, DAG_FOOTER);
printf("Tic Max = %d\n", length-1);
fclose( f );
free(pos);
free(next);
}
/**
*
* @file gensvg.c
*
* File for algorithm of treewalking.
*
* @copyright 2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
*/
#include "libhqr_internal.h"
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#define WIDTH 50
#define HEIGHT 50
#define SIZE 100
/*
* Global array for color
*/
char *colortree[] = {"red", "blue", "green", "orange", "cyan", "purple", "yellow" };
#define NBCOLORS (sizeof( colortree ) / sizeof( char* ))
/*
* functions writing in the svg file
*/
static void
drawsvg_header( FILE *file )
{
int rc;
rc = fprintf(file,
"<?xml version=\"1.0\" standalone=\"no\"?>\n"
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n"
"<svg width=\"2000\" height=\"2000\" version=\"1.1\" \n xmlns=\"http://www.w3.org/2000/svg\">\n");
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_header)\n");
}
return;
}
static void
drawsvg_top_TS( FILE *file, int k, int x, int y, int w, int h )
{
int rc;
rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%NBCOLORS]);
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TS)\n");
}
return;
}
static void
drawsvg_bot_TS( FILE *file, int k, int x, int y, int w, int h )
{
int rc, x2, y2, w2, h2;
rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"%s\" /> \n", x, y, w, h, colortree[k%NBCOLORS]);
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n");
return;
}
x2 = x + (w / 4);
y2 = y + (h / 4);
w2 = (w / 2);
h2 = (h / 2);
rc = fprintf(file,"<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" fill =\"white\"/> \n", x2, y2, w2, h2);
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TS)\n");
}
return;
}
static void
drawsvg_top_TT( FILE *file, int k, int x, int y, int w, int h )
{
int rc;
rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n",
x + w / 2, y + h / 2, w / 2, colortree[k%NBCOLORS] );
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_top_TT)\n");
}
return;
}
static void
drawsvg_bot_TT( FILE *file, int k, int x, int y, int w, int h )
{
int rc;
rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"%s\" /> \n",
x + w / 2, y + h / 2, w / 2, colortree[k%NBCOLORS] );
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n");
return;
}
rc = fprintf( file,"<circle cx=\"%d\" cy=\"%d\" r=\"%d\" fill=\"white\" /> \n",
x + w / 2, y + h / 2, w / 4 );
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_bot_TT)\n");
}
return;
}
static void
drawsvg_line( FILE *file, int k, int x1, int y1, int x2, int y2 )
{
int rc;
rc = fprintf(file,"<line x1=\"%d\" y1=\"%d\" x2=\"%d\" y2=\"%d\" style=\"fill:none;stroke:%s;stroke-width:2px;\"/> \n", x1, y1, x2, y2, colortree[k%NBCOLORS]);
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_line)\n");
}
return;
}
static void
drawsvg_footer( FILE *file )
{
int rc;
rc = fprintf(file, "</svg>\n");
if (rc < 0) {
fprintf(stderr, "Something wrong happened while writing the file (drawsvg_footer)\n");
}
return;
}
static void
drawsvg_lines_rowm( FILE *file, int k,
int p, int m, int beg_p, int beg_m, int end )
{
int yp, ym;
int x, xp, xm;
/* Row of the tiles */
ym = SIZE + SIZE * m;
yp = SIZE + SIZE * p;
/* Starting position of the tiles */
xm = (SIZE + (SIZE / 4)) + SIZE * beg_m;
xp = (SIZE + (SIZE / 4)) + SIZE * beg_p;
/* Final position of the tiles */
x = SIZE + SIZE * end;
/* Horizontal lines */
drawsvg_line( file, k, xm, ym, x + (SIZE / 4), ym );
drawsvg_line( file, k, xp, yp, x + (SIZE / 4), yp );
/* Vertical line */
drawsvg_line( file, k, x, ym, x, yp );
}
static void
drawsvg_lines_stepk( const libhqr_tree_t *qrtree, FILE *file,
int k, int *tiles, int *step )
{
int i, m, p, end;
/* Get order for step k */
libhqr_walk_stepk( qrtree, k, tiles+(k+1) );
for(i = k+1; i < qrtree->mt; i++){
m = tiles[i];
p = qrtree->currpiv(qrtree, k, m);
end = libhqr_imax( step[p], step[m] ) + 1;
/* Draw horizontal lines for rows p and m */
drawsvg_lines_rowm( file, k, p, m, step[p], step[m], end );
/* Update last time the rows p and m have been modified for future lines */
step[m] = end;
step[p] = end;
}
}
static void
drawsvg_nodes_rowm( FILE *file, int k,
int type, int p, int m, int step_m )
{
int x, yp, ym;
x = ((SIZE * 3) / 4) + SIZE * step_m;
ym = ((SIZE * 3) / 4) + SIZE * m;
yp = ((SIZE * 3) / 4) + SIZE * p;
if ( type == LIBHQR_KILLED_BY_TS ) {
drawsvg_top_TS(file, k, x, yp, WIDTH, HEIGHT );
drawsvg_bot_TS(file, k, x, ym, WIDTH, HEIGHT );
}
else {
drawsvg_top_TT(file, k, x, yp, WIDTH, HEIGHT );
drawsvg_bot_TT(file, k, x, ym, WIDTH, HEIGHT );
}
}
void
libhqr_print_svg( const libhqr_tree_t *qrtree,
const char *filename )
{
FILE *file;
int *tiles, *steps;
int minMN, k, i;
minMN = libhqr_imin( qrtree->mt, qrtree->nt );
tiles = (int*)calloc( qrtree->mt, sizeof(int));
steps = (int*)calloc( qrtree->mt, sizeof(int));
file = fopen(filename,"w+");
drawsvg_header(file);
for (k = 0; k < minMN; k++) {
/* Drawing the lines */
drawsvg_lines_stepk( qrtree, file, k, tiles, steps );
/* Drawing the rectangles */
for (i = k+1; i < qrtree->mt; i++) {
drawsvg_nodes_rowm( file, k,
qrtree->gettype(qrtree, k, i),
qrtree->currpiv(qrtree, k, i), i, steps[i] );
}
}
drawsvg_footer(file);
fclose(file);
free(tiles);
free(steps);
}
/**
*
* @file high_binary.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level binary tree
*
*/
#include "libhqr_internal.h"
#include <math.h>
static int
hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
int tmp1 = m - k;
int tmp2 = 1;
(void)arg;
if ( tmp1 == 0)
return 0;
while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) {
tmp1 = tmp1 >> 1;
tmp2 = tmp2 << 1;
}
assert( m - tmp2 >= k );
return m - tmp2;
}
static int
hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int tmpp, bit;
myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) );
if ( start <= p )
return arg->ldd;
int offset = p - k;
bit = 0;
if (start != arg->ldd) {
while( ( (start - k) & (1 << bit ) ) == 0 )
bit++;
bit++;
}
tmpp = offset | (1 << bit);
if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) )
return tmpp + k;
else
return arg->ldd;
}
static int
hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int offset = p - k;
myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) );
if ( (start == p) && ( offset%2 == 0 ) ) {
int i, bit, tmp;
if ( offset == 0 )
bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) );
else {
bit = 0;
while( (offset & (1 << bit )) == 0 )
bit++;
}
for( i=bit; i>-1; i--){
tmp = offset | (1 << i);
if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) )
return tmp+k;
}
return arg->ldd;
}
if ( (start - p) > 1 )
return p + ( (start - p) >> 1 );
else {
return arg->ldd;
}
}
void
hqr_high_binary_init(hqr_subpiv_t *arg) {
arg->currpiv = hqr_high_binary_currpiv;
arg->nextpiv = hqr_high_binary_nextpiv;
arg->prevpiv = hqr_high_binary_prevpiv;
arg->ipiv = NULL;
}
/**
*
* @file high_fibonacci.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level fibonacci tree, and init for duplicated greedy.
*
*/
#include "libhqr_internal.h"
#include <stdlib.h>
/****************************************************
* HQR_HIGH_FIBONACCI_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
static inline int
hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
return (qrpiv->ipiv)[ m-k ] + k;
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
static inline int
hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p >= k && start >= p && start-k <= qrpiv->p);
int lp = p - k;
int lstart= start - k;
int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p);
for( i=lstart+1; i<end; i++ )
if ( (qrpiv->ipiv)[i] == lp )
return i+k;
return qrpiv->ldd;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
static inline int
hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) );
for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- )
if ( (qrpiv->ipiv)[i] == (p-k) )
return i + k;
return (qrpiv->ldd);
}
void
hqr_high_fibonacci_init(hqr_subpiv_t *arg) {
int *ipiv;
int p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
p = arg->p;
arg->ipiv = (int*)calloc( p, sizeof(int) );
ipiv = arg->ipiv;
/*
* Fibonacci of order 1:
* u_(n+1) = u_(n) + 1
*/
{
int f1, k, m;
/* Fill in the first column */
f1 = 1;
for (m=1; m < p; ) {
for (k=0; (k < f1) && (m < p); k++, m++) {
ipiv[m] = m - f1;
}
f1++;
}
}
}
/****************************************************
* HQR_HIGH_GREEDY_TREE (1 panel duplicated)
***************************************************/
void hqr_high_greedy1p_init(hqr_subpiv_t *arg){
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)calloc( p, sizeof(int) );
ipiv = arg->ipiv;
{
int minMN = 1;
int j, k, height, start, end, firstk = 0;
int *nT = (int*)calloc(minMN, sizeof(int));
int *nZ = (int*)calloc(minMN, sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
}
/**
*
* @file high_flat.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level flat tree
*
*/
#include "libhqr_internal.h"
static int
hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
(void)arg;
(void)m;
return k;
}
static int
hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
if ( p == k && arg->ldd > 1 ) {
if ( start == arg->ldd )
return p+1;
else if ( start < arg->ldd && (start-k < arg->p-1) )
return start+1;
}
return arg->ldd;
}
static int
hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
assert( arg->p > 1 );
if ( p == k && arg->ldd > 1 ) {
if ( start == p && p != arg->ldd-1 )
return libhqr_imin( p + arg->p - 1, arg->ldd - 1 );
else if ( start > p + 1 && (start-k < arg->p))
return start-1;
}
return arg->ldd;
}
void
hqr_high_flat_init(hqr_subpiv_t *arg) {
arg->currpiv = hqr_high_flat_currpiv;
arg->nextpiv = hqr_high_flat_nextpiv;
arg->prevpiv = hqr_high_flat_prevpiv;
arg->ipiv = NULL;
}
/**
*
* @file high_greedy.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for high level greedy tree
*
*/
#include "libhqr_internal.h"
#include <stdlib.h>
static int
hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
myassert( m >= k && m < k+arg->p );
return (arg->ipiv)[ k * (arg->p) + (m - k) ];
}
static int
hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == arg->ldd );
for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- )
if ( (arg->ipiv)[i-k + k* (arg->p)] == p )
return i;
return (arg->ldd);
}
static int
hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == p );
for( i=start-k+1; i<arg->p; i++ )
if ( (arg->ipiv)[i + k * (arg->p)] == p )
return k+i;
return arg->ldd;
}
void
hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN) {
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_greedy_currpiv;
arg->nextpiv = hqr_high_greedy_nextpiv;
arg->prevpiv = hqr_high_greedy_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)calloc( p * minMN, sizeof(int) );
ipiv = arg->ipiv;
{
int j, k, height, start, end, firstk = 0;
int *nT = (int*)calloc(minMN, sizeof(int));
int *nZ = (int*)calloc(minMN, sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
}
/**
*
* @file libhqr.c
* @file hqr.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
......@@ -77,166 +77,22 @@
* high level tree to reduce communications.
* These lines are defined by (i-k)/p = 0.
*/
#include "libhqr.h"
#include <assert.h>
#include "libhqr_internal.h"
#include "libhqr_queue.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#define PRINT_PIVGEN 0
#ifdef PRINT_PIVGEN
#define myassert( test ) {if ( ! (test) ) return -1;}
#else
#define myassert(test) {assert((test)); return -1;}
#endif
struct hqr_args_s;
typedef struct hqr_args_s hqr_args_t;
struct hqr_subpiv_s;
typedef struct hqr_subpiv_s hqr_subpiv_t;
/**
* @brief jhj
*/
struct hqr_args_s {
int domino; /**< Switch to enable/disable the domino tree linking high and lw level reduction trees */
int tsrr; /**< Switch to enable/disable round-robin on TS to optimise pipelining between TS and local tree */
hqr_subpiv_t *llvl;
hqr_subpiv_t *hlvl;
int *perm;
};
struct hqr_subpiv_s {
/**
* currpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] m line you want to eliminate
*
* @return the annihilator p used with m at step k
*/
int (*currpiv)(const hqr_subpiv_t *arg, int k, int m);
/*
* nextpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = MT to find the first time p is used as an annihilator during step k
*
* @return the next line m' using the line p as annihilator during step k
* mt if p will never be used again as an annihilator.
*/
int (*nextpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
/*
* prevpiv
* @param[in] arg pointer to the qr_piv structure
* @param[in] k step in the factorization
* @param[in] p line currently used as an annihilator
* @param[in] m line actually annihilated.
* m = p to find the last time p has been used as an annihilator during step k
*
* @return the previous line m' using the line p as annihilator during step k
* mt if p has never been used before that as an annihilator.
*/
int (*prevpiv)(const hqr_subpiv_t *arg, int k, int p, int m);
int *ipiv;
int minMN;
int ldd;
int a;
int p;
int domino;
};
/*
* Common functions
*/
static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k );
static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i );
static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m );
static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m );
static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k );
static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i );
static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m );
static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m );
/* Permutation */
static void hqr_genperm ( libhqr_tree_t *qrtree );
static int hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m );
/*
* Subtree for low-level
*/
static void hqr_low_flat_init( hqr_subpiv_t *arg);
static void hqr_low_greedy_init( hqr_subpiv_t *arg, int minMN);
static void hqr_low_binary_init( hqr_subpiv_t *arg);
static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN);
/****************************************************
* Reading functions
**************************************************
*
* Common parameters for the following functions
* qrtree - tree used for the factorization
* k - Step k of the QR factorization
* m - line anhilated
*/
int rdmtx_gettype(const libhqr_tree_t *qrtree, int k, int m){
int id;
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
id = (k * qrtree->mt) + m;
return arg[id].type;
}
int rdmtx_currpiv(const libhqr_tree_t *qrtree, int k, int m){
int id, perm_m, p, a;
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
perm_m = m;
p = qrtree->p;
a = qrtree->a;
myassert( (p==1) || (perm_m / (p*a)) == (m / (p*a)) );
myassert( (p==1) || (perm_m % p) == (m % p) );
id = (k * qrtree->mt) + m;
return arg[id].currpiv;
}
/*
* Extra parameter:
* p - line used as pivot
*/
int rdmtx_nextpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
int id;
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
int gmt = qrtree->mt;
myassert( m > p && p >= k );
myassert( m == gmt || p == rdmtx_currpiv( qrtree, k, m ) );
if(m == qrtree->mt){
id = (k * qrtree->mt) + p;
return arg[id].first_nextpiv;
}
else{
id = (k * qrtree->mt) + m;
return arg[id].nextpiv;
}
}
int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
int id;
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
int gmt = qrtree->mt;
myassert( m >= p && p >= k && m < gmt );
myassert( m == p || p == rdmtx_currpiv( qrtree, k, m ) );
if(m == p){
id = (k * qrtree->mt) + p;
return arg[id].first_prevpiv;
}
else{
id = (k * qrtree->mt) + m;
return arg[id].prevpiv;
}
}
/****************************************************
* Common ipiv
***************************************************
......@@ -247,7 +103,6 @@ int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
* k - Step k of the QR factorization
*
*/
#define nbextra1_formula ( (k % pa) > (pa - p) ) ? (-k)%pa + pa : 0
/*
* Extra parameter:
......@@ -255,7 +110,9 @@ int rdmtx_prevpiv(const libhqr_tree_t *qrtree, int k, int p, int m){
* Return:
* The number of geqrt to execute in the panel k
*/
static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) {
static int
hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k )
{
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
......@@ -275,7 +132,7 @@ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) {
nb_11 = ( ( (p * (k+1)) + pa - 1 ) / pa ) * pa;
}
else {
/* Number of extra tile of type 1 between the the tile of type 3 and the first of nb11 */
/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */
nb_2 = nbextra1_formula;
/* First multiple of p*a under the diagonal of step 1 */
......@@ -300,7 +157,8 @@ static int hqr_getnbgeqrf( const libhqr_tree_t *qrtree, int k ) {
* Return:
* The global indice m of the i th geqrt in the panel k
*/
static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i )
static int
hqr_getm( const libhqr_tree_t *qrtree, int k, int i )
{
int a = qrtree->a;
int p = qrtree->p;
......@@ -331,7 +189,8 @@ static int hqr_getm( const libhqr_tree_t *qrtree, int k, int i )
* Return:
* The index i of the geqrt in the panel k
*/
static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m )
static int
hqr_geti( const libhqr_tree_t *qrtree, int k, int m )
{
int a = qrtree->a;
int p = qrtree->p;
......@@ -367,7 +226,9 @@ static int hqr_geti( const libhqr_tree_t *qrtree, int k, int m )
* 2 - if m is reduced thanks to the bubble tree
* 3 - if m is reduced in distributed
*/
static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) {
static int
hqr_gettype( const libhqr_tree_t *qrtree, int k, int m )
{
int a = qrtree->a;
int p = qrtree->p;
int domino = qrtree->domino;
......@@ -392,2040 +253,204 @@ static int hqr_gettype( const libhqr_tree_t *qrtree, int k, int m ) {
}
/****************************************************
* HQR_LOW_FLAT_TREE
***************************************************/
static int hqr_low_flat_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
(void)m;
if ( arg->domino )
return k / arg->a;
else
return (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a ;
};
static int hqr_low_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
{
int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
int p_pa = (p / arg->p ) / arg->a;
#ifdef FLAT_UP
if ( ( p_pa == k_a ) && (start_pa > k_a+1 ) )
return start_pa-1;
else
#else /* FLAT_DOWN */
if ( start_pa <= p_pa )
return arg->ldd;
if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) {
if ( start_pa == arg->ldd )
return p_pa+1;
else if ( start_pa < arg->ldd )
return start_pa+1;
}
#endif
return arg->ldd;
}
static int hqr_low_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
{
int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
int p_pa = (p / arg->p ) / arg->a;
#ifdef FLAT_UP
if ( p_pa == k_a && (start_pa+1 < arg->ldd) )
return start_pa+1;
else
#else
if ( p_pa == k_a && ( arg->ldd - k_a ) > 1 ) {
if ( start_pa == p_pa )
return arg->ldd - 1;
else if ( start_pa > p_pa + 1 )
return start_pa-1;
}
#endif
return arg->ldd;
};
static void hqr_low_flat_init(hqr_subpiv_t *arg){
arg->currpiv = hqr_low_flat_currpiv;
arg->nextpiv = hqr_low_flat_nextpiv;
arg->prevpiv = hqr_low_flat_prevpiv;
arg->ipiv = NULL;
};
/****************************************************
* HQR_LOW_BINARY_TREE
*
* Generic functions currpiv,prevpiv,nextpiv
*
***************************************************/
static int hqr_low_binary_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - m%(arg->p)) / arg->p / arg->a;
int m_pa = (m / arg->p ) / arg->a;
int tmp1 = m_pa - k_a;
int tmp2 = 1;
(void)arg;
if ( tmp1 == 0)
return 0;
while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) {
tmp1 = tmp1 >> 1;
tmp2 = tmp2 << 1;
}
assert( m_pa - tmp2 >= k_a );
return m_pa - tmp2;
};
static int hqr_low_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
{
int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
int p_pa = (p / arg->p ) / arg->a;
int tmpp, bit;
myassert( (start_pa == arg->ldd) || (hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa || !arg->domino) );
if ( start_pa <= p_pa )
return arg->ldd;
int offset = p_pa-k_a;
bit = 0;
if (start_pa != arg->ldd) {
while( ( (start_pa-k_a) & (1 << bit ) ) == 0 )
bit++;
bit++;
}
tmpp = offset | (1 << bit);
if ( ( tmpp != offset ) && ( tmpp+k_a < arg->ldd ) )
return tmpp + k_a;
else
return arg->ldd;
};
static int hqr_low_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start_pa)
static int
hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m)
{
int k_a = arg->domino ? k / arg->a : (k + arg->p - 1 - p%(arg->p)) / arg->p / arg->a;
int p_pa = (p / arg->p ) / arg->a;
int offset = p_pa - k_a;
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, tmpk;
int lm, rank;
int a = qrtree->a;
int p = qrtree->p;
int domino = qrtree->domino;
int gmt = qrtree->mt;
myassert( start_pa >= p_pa && ( start_pa == p_pa || !arg->domino ||
hqr_low_binary_currpiv( arg, k, start_pa*arg->a*arg->p ) == p_pa ) );
lm = m / p; /* Local index in the distribution over p domains */
rank = m % p; /* Staring index in this distribution */
if ( (start_pa == p_pa) && ( offset%2 == 0 ) ) {
int i, bit, tmp;
if ((p_pa - k_a) == 0)
bit = (int)( log( (double)(arg->ldd - k_a) ) / log( 2. ) );
else {
bit = 0;
while( (offset & (1 << bit )) == 0 )
bit++;
}
for( i=bit; i>-1; i--){
tmp = offset | (1 << i);
if ( ( offset != tmp ) && ( tmp+k_a < arg->ldd ) )
return tmp+k_a;
/* TS level common to every case */
if ( domino ) {
switch( hqr_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
if ( tmp == k / a )
return k * p + rank ; /* Below to the first bloc including the diagonal */
else
return tmp * a * p + rank ;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank;
break;
case 2:
return m - p;
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
return arg->ldd;
}
if ( (start_pa - p_pa) > 1 )
return p_pa + ( (start_pa - p_pa) >> 1 );
else {
return arg->ldd;
switch( hqr_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ;
break;
case 2:
return m - p;
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
}
};
static void hqr_low_binary_init(hqr_subpiv_t *arg){
arg->currpiv = hqr_low_binary_currpiv;
arg->nextpiv = hqr_low_binary_nextpiv;
arg->prevpiv = hqr_low_binary_prevpiv;
arg->ipiv = NULL;
};
/****************************************************
* HQR_LOW_FIBONACCI_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
inline static int hqr_low_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - m%(qrpiv->p)) / qrpiv->p / qrpiv->a;
return (qrpiv->ipiv)[ k_a * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int hqr_low_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
int p_pa = (p / qrpiv->p ) / qrpiv->a;
for( i=start_pa+1; i<(qrpiv->ldd); i++ )
if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa )
return i;
return i;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int hqr_low_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
int p_pa = (p / qrpiv->p ) / qrpiv->a;
for( i=start_pa-1; i>k_a; i-- )
if ( (qrpiv->ipiv)[i + k_a * (qrpiv->ldd)] == p_pa )
return i;
return (qrpiv->ldd);
}
static void hqr_low_fibonacci_init(hqr_subpiv_t *arg, int minMN){
int *ipiv;
int mt;
/**
* hqr_nextpiv - Computes the next row killed by the row p, after
* it has kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
*
* @param[in] start
* Starting point to search the next line killed by p after start
* start must be equal to A.mt to find the first row killed by p.
* if start != A.mt, start must be killed by p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the following row killed by p if it exists, A->mt otherwise
*/
static int
hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int lpivot, rpivot, lstart;
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
arg->currpiv = hqr_low_fibonacci_currpiv;
arg->nextpiv = hqr_low_fibonacci_nextpiv;
arg->prevpiv = hqr_low_fibonacci_prevpiv;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
mt = arg->ldd;
/* Local index in the distribution over p domains */
lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p;
arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, mt*minMN*sizeof(int));
myassert( start > pivot && pivot >= k );
myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) );
/*
* Fibonacci of order 1:
* u_(n+1) = u_(n) + 1
*/
{
int f1, k, m;
/* TS level common to every case */
ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1;
lp = hqr_gettype( qrtree, k, pivot );
/* Fill in the first column */
f1 = 1;
for (m=1; m < mt; ) {
for (k=0; (k < f1) && (m < mt); k++, m++) {
ipiv[m] = m - f1;
}
f1++;
}
switch( ls )
{
case -1:
for( k=1; k<minMN; k++) {
for(m=k+1; m < mt; m++) {
ipiv[ k * mt + m ] = ipiv[ (k-1) * mt + m - 1 ] + 1;
if ( lp == LIBHQR_KILLED_BY_TS ) {
myassert( start == gmt );
return gmt;
}
}
}
};
/****************************************************
* HQR_LOW_GREEDY_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
inline static int hqr_low_greedy_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
if (qrpiv->domino)
return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
else
return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd)
+ ( ( m / qrpiv->p ) / qrpiv->a ) ];
}
case LIBHQR_KILLED_BY_TS:
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int hqr_low_greedy_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int p_pa = p / qrpiv->p / qrpiv->a;
int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
/* If the tile is over the diagonal of step k, skip directly to type 2 */
if ( arg->domino && lpivot < k )
goto next_2;
for( i=start_pa+1; i<(qrpiv->ldd); i++ )
if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
return i;
return i;
}
if ( start == gmt )
nextp = pivot + p;
else
nextp = start + p;
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int hqr_low_greedy_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int pa = qrpiv->p * qrpiv->a;
int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
int p_pa = p / pa;
int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
if ( ( nextp < gmt ) &&
( nextp < pivot + a*p ) &&
( (nextp/p)%a != 0 ) )
return nextp;
start = gmt;
lstart = arg->llvl->ldd * a;
for( i=start_pa-1; i> k_a; i-- )
if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
return i;
case LIBHQR_KILLED_BY_LOCALTREE:
return (qrpiv->ldd);
}
/* If the tile is over the diagonal of step k, skip directly to type 2 */
if ( arg->domino && lpivot < k )
goto next_2;
static void hqr_low_greedy_init(hqr_subpiv_t *arg, int minMN){
int *ipiv;
int mt, a, p, pa, domino;
/* Get the next pivot for the low level tree */
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a );
arg->currpiv = hqr_low_greedy_currpiv;
arg->nextpiv = hqr_low_greedy_nextpiv;
arg->prevpiv = hqr_low_greedy_prevpiv;
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == arg->llvl->ldd-1) )
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp);
mt = arg->ldd;
a = arg->a;
p = arg->p;
pa = p * a;
domino = arg->domino;
if ( tmp != arg->llvl->ldd )
return tmp * a * p + rpivot;
if ( domino )
{
int j, k, height, start, end, firstk = 0;
int *nT, *nZ;
arg->minMN = libhqr_imin( minMN, mt*a );
minMN = arg->minMN;
arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, mt*minMN*sizeof(int));
nT = (int*)malloc(minMN*sizeof(int));
nZ = (int*)malloc(minMN*sizeof(int));
memset( nT, 0, minMN*sizeof(int));
memset( nZ, 0, minMN*sizeof(int));
nT[0] = mt;
k = 0;
while ( (!( ( nT[minMN-1] == mt - ( (minMN - 1) / a ) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - ( firstk / a ) ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
if ( (( firstk % a) != a-1 )
&& ( firstk < minMN-1 ) )
nT[firstk+1]++;
firstk++;
}
k = firstk;
continue;
}
next_2:
/* no next of type 1, we reset start to search the next 2 */
start = gmt;
lstart = arg->llvl->ldd * a;
if (k < minMN-1) nT[k+1] += height;
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
case LIBHQR_KILLED_BY_DOMINO:
for( j=start; j > end; j-- ) {
ipiv[ k*mt + j ] = (j - height);
if ( lp < LIBHQR_KILLED_BY_DOMINO ) {
return gmt;
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
else
{
int j, k, myrank, height, start, end, firstk = 0;
int *nT2DO = (int*)malloc(minMN*sizeof(int));
int *nT = (int*)malloc(minMN*sizeof(int));
int *nZ = (int*)malloc(minMN*sizeof(int));
arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p );
ipiv = arg->ipiv;
memset( ipiv, 0, minMN*sizeof(int)*mt*p);
for ( myrank=0; myrank<p; myrank++ ) {
int lminMN = minMN;
memset( nT2DO, 0, minMN*sizeof(int));
memset( nT, 0, minMN*sizeof(int));
memset( nZ, 0, minMN*sizeof(int));
nT[0] = mt;
for(k=0; k<lminMN; k++) {
nT2DO[k] = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
if ( nT2DO[k] == 0 ) {
lminMN = k;
break;
}
/* Type 2 are killed only once if they are strictly in the band */
if ( arg->domino &&
(start == gmt) &&
(lpivot < k) &&
(pivot+p < gmt) ) {
return pivot+p;
}
k = 0; firstk = 0;
while ( (!( ( nT[lminMN-1] == nT2DO[lminMN-1] ) &&
( nZ[lminMN-1]+1 == nT[lminMN-1] ) ) )
&& ( firstk < lminMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < lminMN) &&
( nT[firstk] == nT2DO[firstk] ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
if ( ( firstk < lminMN-1 ) &&
(( (firstk) % pa) != ((a-1)*p+myrank) ) )
nT[firstk+1]++;
firstk++;
}
k = firstk;
continue;
}
/* no next of type 2, we reset start to search the next 3 */
start = gmt;
lstart = arg->llvl->ldd * a;
if (k < lminMN-1) nT[k+1] += height;
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
case LIBHQR_KILLED_BY_DISTTREE:
for( j=start; j > end; j-- ) {
ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height);
}
if ( lp < LIBHQR_KILLED_BY_DISTTREE ) {
return gmt;
}
k++;
if (k > lminMN-1) k = firstk;
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
default:
return gmt;
}
free(nT2DO);
free(nT);
free(nZ);
}
#if 0
{
int m, k;
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
printf( "%3d ", ipiv[k*mt + m] );
}
printf("\n");
}
}
if (!arg->domino) {
int m, k, myrank;
for ( myrank=1; myrank<p; myrank++ ) {
ipiv += mt*minMN;
printf("-------- rank %d ---------\n", myrank );
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
int k_a = (k + p - 1 - myrank) / p / a;
if ( m >= k_a )
printf( "%3d ", ipiv[k*mt + m] );
else
printf( "--- " );
}
printf("\n");
}
}
}
#endif
};
/****************************************************
* HQR_LOW_GREEDY1P_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
inline static int hqr_low_greedy1p_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
if (qrpiv->domino)
return (qrpiv->ipiv)[ k * (qrpiv->ldd) + ( (m / qrpiv->p) / qrpiv->a ) ];
else
return (qrpiv->ipiv)[ ( (m%qrpiv->p) * qrpiv->minMN + k ) * (qrpiv->ldd)
+ ( ( m / qrpiv->p ) / qrpiv->a ) ];
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int hqr_low_greedy1p_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int p_pa = p / qrpiv->p / qrpiv->a;
int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
for( i=start_pa+1; i<(qrpiv->ldd); i++ )
if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
return i;
return i;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int hqr_low_greedy1p_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start_pa ) {
int i;
int pa = qrpiv->p * qrpiv->a;
int k_a = qrpiv->domino ? k / qrpiv->a : (k + qrpiv->p - 1 - p%(qrpiv->p)) / qrpiv->p / qrpiv->a;
int p_pa = p / pa;
int *ipiv = qrpiv->domino ? qrpiv->ipiv : qrpiv->ipiv + p%qrpiv->p * qrpiv->minMN *qrpiv->ldd;
for( i=start_pa-1; i> k_a; i-- )
if ( ipiv[i + k * (qrpiv->ldd)] == p_pa )
return i;
return (qrpiv->ldd);
}
static void hqr_low_greedy1p_init(hqr_subpiv_t *arg, int minMN){
int *ipiv;
int mt, a, p, pa, domino;
int j, k, height, start, end, nT, nZ;
arg->currpiv = hqr_low_greedy1p_currpiv;
arg->nextpiv = hqr_low_greedy1p_nextpiv;
arg->prevpiv = hqr_low_greedy1p_prevpiv;
mt = arg->ldd;
a = arg->a;
p = arg->p;
pa = p * a;
domino = arg->domino;
/* This section has not been coded yet, and will perform a classic greedy */
if ( domino )
{
arg->minMN = libhqr_imin( minMN, mt*a );
minMN = arg->minMN;
arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, mt*minMN*sizeof(int));
/**
* Compute the local greedy tree of each column, on each node
*/
for(k=0; k<minMN; k++) {
/* Number of tiles to factorized in this column on this rank */
nT = libhqr_imax( mt - (k / a), 0 );
/* Number of tiles already killed */
nZ = 0;
while( nZ < (nT-1) ) {
height = (nT - nZ) / 2;
start = mt - nZ - 1;
end = start - height;
nZ += height;
for( j=start; j > end; j-- ) {
ipiv[ k*mt + j ] = (j - height);
}
}
assert( nZ+1 == nT );
}
}
else
{
int myrank;
end = 0;
arg->ipiv = (int*)malloc( mt * minMN * sizeof(int) * p );
ipiv = arg->ipiv;
memset( ipiv, 0, minMN*sizeof(int)*mt*p);
for ( myrank=0; myrank<p; myrank++ ) {
/**
* Compute the local greedy tree of each column, on each node
*/
for(k=0; k<minMN; k++) {
/* Number of tiles to factorized in this column on this rank */
nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
/* Number of tiles already killed */
nZ = 0;
/* No more computations on this node */
if ( nT == 0 ) {
break;
}
while( nZ < (nT-1) ) {
height = (nT - nZ) / 2;
start = mt - nZ - 1;
end = start - height;
nZ += height;
for( j=start; j > end; j-- ) {
ipiv[ myrank*mt*minMN + k*mt + j ] = (j - height);
}
}
assert( nZ+1 == nT );
}
}
}
#if 0
{
int m, k;
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
printf( "%3d ", ipiv[k*mt + m] );
}
printf("\n");
}
}
if (!arg->domino) {
int m, k, myrank;
for ( myrank=1; myrank<p; myrank++ ) {
ipiv += mt*minMN;
printf("-------- rank %d ---------\n", myrank );
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
int k_a = (k + p - 1 - myrank) / p / a;
if ( m >= k_a )
printf( "%3d ", ipiv[k*mt + m] );
else
printf( "--- " );
}
printf("\n");
}
}
}
#endif
};
/****************************************************
* HQR_HIGH_FLAT_TREE
***************************************************/
static int hqr_high_flat_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
(void)arg;
(void)m;
return k;
};
static int hqr_high_flat_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
if ( p == k && arg->ldd > 1 ) {
if ( start == arg->ldd )
return p+1;
else if ( start < arg->ldd && (start-k < arg->p-1) )
return start+1;
}
return arg->ldd;
};
static int hqr_high_flat_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
assert( arg->p > 1 );
if ( p == k && arg->ldd > 1 ) {
if ( start == p && p != arg->ldd-1 )
return libhqr_imin( p + arg->p - 1, arg->ldd - 1 );
else if ( start > p + 1 && (start-k < arg->p))
return start-1;
}
return arg->ldd;
};
static void hqr_high_flat_init(hqr_subpiv_t *arg){
arg->currpiv = hqr_high_flat_currpiv;
arg->nextpiv = hqr_high_flat_nextpiv;
arg->prevpiv = hqr_high_flat_prevpiv;
arg->ipiv = NULL;
};
/****************************************************
* HQR_HIGH_BINARY_TREE
***************************************************/
static int hqr_high_binary_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
int tmp1 = m - k;
int tmp2 = 1;
(void)arg;
if ( tmp1 == 0)
return 0;
while( (tmp1 != 0 ) && (tmp1 % 2 == 0) ) {
tmp1 = tmp1 >> 1;
tmp2 = tmp2 << 1;
}
assert( m - tmp2 >= k );
return m - tmp2;
};
static int hqr_high_binary_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int tmpp, bit;
myassert( (start == arg->ldd) || (hqr_high_binary_currpiv( arg, k, start ) == p) );
if ( start <= p )
return arg->ldd;
int offset = p - k;
bit = 0;
if (start != arg->ldd) {
while( ( (start - k) & (1 << bit ) ) == 0 )
bit++;
bit++;
}
tmpp = offset | (1 << bit);
if ( ( tmpp != offset ) && (tmpp < arg->p) && ( tmpp+k < arg->ldd ) )
return tmpp + k;
else
return arg->ldd;
};
static int hqr_high_binary_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int offset = p - k;
myassert( start >= p && ( start == p || hqr_high_binary_currpiv( arg, k, start ) == p ) );
if ( (start == p) && ( offset%2 == 0 ) ) {
int i, bit, tmp;
if ( offset == 0 )
bit = (int)( log( (double)( libhqr_imin(arg->p, arg->ldd - k) ) ) / log( 2. ) );
else {
bit = 0;
while( (offset & (1 << bit )) == 0 )
bit++;
}
for( i=bit; i>-1; i--){
tmp = offset | (1 << i);
if ( ( offset != tmp ) && ( tmp < arg->p ) && (tmp+k < arg->ldd) )
return tmp+k;
}
return arg->ldd;
}
if ( (start - p) > 1 )
return p + ( (start - p) >> 1 );
else {
return arg->ldd;
}
};
static void hqr_high_binary_init(hqr_subpiv_t *arg){
arg->currpiv = hqr_high_binary_currpiv;
arg->nextpiv = hqr_high_binary_nextpiv;
arg->prevpiv = hqr_high_binary_prevpiv;
arg->ipiv = NULL;
};
/****************************************************
* HQR_HIGH_FIBONACCI_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
inline static int hqr_high_fibonacci_currpiv( const hqr_subpiv_t *qrpiv, int k, int m ) {
return (qrpiv->ipiv)[ m-k ] + k;
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int hqr_high_fibonacci_prevpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p >= k && start >= p && start-k <= qrpiv->p);
int lp = p - k;
int lstart= start - k;
int end = libhqr_imin(qrpiv->ldd-k, qrpiv->p);
for( i=lstart+1; i<end; i++ )
if ( (qrpiv->ipiv)[i] == lp )
return i+k;
return qrpiv->ldd;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int hqr_high_fibonacci_nextpiv( const hqr_subpiv_t *qrpiv, int k, int p, int start ) {
int i;
myassert( p>=k && (start == qrpiv->ldd || start-k <= qrpiv->p) );
for( i=libhqr_imin(start-k-1, qrpiv->p-1); i>0; i-- )
if ( (qrpiv->ipiv)[i] == (p-k) )
return i + k;
return (qrpiv->ldd);
}
static void hqr_high_fibonacci_init(hqr_subpiv_t *arg){
int *ipiv;
int p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
p = arg->p;
arg->ipiv = (int*)malloc( p * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, p*sizeof(int));
/*
* Fibonacci of order 1:
* u_(n+1) = u_(n) + 1
*/
{
int f1, k, m;
/* Fill in the first column */
f1 = 1;
for (m=1; m < p; ) {
for (k=0; (k < f1) && (m < p); k++, m++) {
ipiv[m] = m - f1;
}
f1++;
}
}
};
/****************************************************
* HQR_HIGH_GREEDY_TREE (1 panel duplicated)
***************************************************/
static void hqr_high_greedy1p_init(hqr_subpiv_t *arg){
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_fibonacci_currpiv;
arg->nextpiv = hqr_high_fibonacci_nextpiv;
arg->prevpiv = hqr_high_fibonacci_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)malloc( p * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, p*sizeof(int));
{
int minMN = 1;
int j, k, height, start, end, firstk = 0;
int *nT = (int*)malloc(minMN*sizeof(int));
int *nZ = (int*)malloc(minMN*sizeof(int));
memset( nT, 0, minMN*sizeof(int));
memset( nZ, 0, minMN*sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
};
/****************************************************
* HQR_HIGH_GREEDY_TREE
***************************************************/
static int hqr_high_greedy_currpiv(const hqr_subpiv_t *arg, int k, int m)
{
myassert( m >= k && m < k+arg->p );
return (arg->ipiv)[ k * (arg->p) + (m - k) ];
};
static int hqr_high_greedy_nextpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == arg->ldd );
for( i=libhqr_imin(start-1, k+arg->p-1); i > k; i-- )
if ( (arg->ipiv)[i-k + k* (arg->p)] == p )
return i;
return (arg->ldd);
};
static int hqr_high_greedy_prevpiv(const hqr_subpiv_t *arg, int k, int p, int start)
{
int i;
myassert( (start >= k && start < k+arg->p) || start == p );
for( i=start-k+1; i<arg->p; i++ )
if ( (arg->ipiv)[i + k * (arg->p)] == p )
return k+i;
return arg->ldd;
};
static void hqr_high_greedy_init(hqr_subpiv_t *arg, int minMN){
int *ipiv;
int mt, p;
arg->currpiv = hqr_high_greedy_currpiv;
arg->nextpiv = hqr_high_greedy_nextpiv;
arg->prevpiv = hqr_high_greedy_prevpiv;
mt = arg->ldd;
p = arg->p;
arg->ipiv = (int*)malloc( p * minMN * sizeof(int) );
ipiv = arg->ipiv;
memset(ipiv, 0, p*minMN*sizeof(int));
{
int j, k, height, start, end, firstk = 0;
int *nT = (int*)malloc(minMN*sizeof(int));
int *nZ = (int*)malloc(minMN*sizeof(int));
memset( nT, 0, minMN*sizeof(int));
memset( nZ, 0, minMN*sizeof(int));
nT[0] = mt;
nZ[0] = libhqr_imax( mt - p, 0 );
for(k=1; k<minMN; k++) {
height = libhqr_imax(mt-k-p, 0);
nT[k] = height;
nZ[k] = height;
}
k = 0;
while ( (!( ( nT[minMN-1] == mt - (minMN - 1) ) &&
( nZ[minMN-1]+1 == nT[minMN-1] ) ) )
&& ( firstk < minMN ) ) {
height = (nT[k] - nZ[k]) / 2;
if ( height == 0 ) {
while ( (firstk < minMN) &&
( nT[firstk] == mt - firstk ) &&
( nZ[firstk]+1 == nT[firstk] ) ) {
firstk++;
}
k = firstk;
continue;
}
start = mt - nZ[k] - 1;
end = start - height;
nZ[k] += height;
if (k < minMN-1) nT[k+1] = nZ[k];
for( j=start; j > end; j-- ) {
ipiv[ k*p + j-k ] = (j - height);
}
k++;
if (k > minMN-1) k = firstk;
}
free(nT);
free(nZ);
}
};
/****************************************************
*
* Generic functions currpiv,prevpiv,nextpiv
*
***************************************************/
static int hqr_currpiv(const libhqr_tree_t *qrtree, int k, int m)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, tmpk;
int lm, rank;
int a = qrtree->a;
int p = qrtree->p;
int domino = qrtree->domino;
int gmt = qrtree->mt;
lm = m / p; /* Local index in the distribution over p domains */
rank = m % p; /* Staring index in this distribution */
/* TS level common to every case */
if ( domino ) {
switch( hqr_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
if ( tmp == k / a )
return k * p + rank ; /* Below to the first bloc including the diagonal */
else
return tmp * a * p + rank ;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
return ( tmp == k / a ) ? k * p + rank : tmp * a * p + rank;
break;
case 2:
return m - p;
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
}
else {
switch( hqr_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank ;
break;
case 2:
return m - p;
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
}
};
/**
* hqr_nextpiv - Computes the next row killed by the row p, after
* it has kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
*
* @param[in] start
* Starting point to search the next line killed by p after start
* start must be equal to A.mt to find the first row killed by p.
* if start != A.mt, start must be killed by p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the following row killed by p if it exists, A->mt otherwise
*/
static int hqr_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int lpivot, rpivot, lstart;
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
/* Local index in the distribution over p domains */
lstart = ( start == gmt ) ? arg->llvl->ldd * a : start / p;
myassert( start > pivot && pivot >= k );
myassert( start == gmt || pivot == hqr_currpiv( qrtree, k, start ) );
/* TS level common to every case */
ls = (start < gmt) ? hqr_gettype( qrtree, k, start ) : -1;
lp = hqr_gettype( qrtree, k, pivot );
switch( ls )
{
case -1:
if ( lp == LIBHQR_KILLED_BY_TS ) {
myassert( start == gmt );
return gmt;
}
case LIBHQR_KILLED_BY_TS:
/* If the tile is over the diagonal of step k, skip directly to type 2 */
if ( arg->domino && lpivot < k )
goto next_2;
if ( start == gmt )
nextp = pivot + p;
else
nextp = start + p;
if ( ( nextp < gmt ) &&
( nextp < pivot + a*p ) &&
( (nextp/p)%a != 0 ) )
return nextp;
start = gmt;
lstart = arg->llvl->ldd * a;
case LIBHQR_KILLED_BY_LOCALTREE:
/* If the tile is over the diagonal of step k, skip directly to type 2 */
if ( arg->domino && lpivot < k )
goto next_2;
/* Get the next pivot for the low level tree */
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a );
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == arg->llvl->ldd-1) )
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp);
if ( tmp != arg->llvl->ldd )
return tmp * a * p + rpivot;
next_2:
/* no next of type 1, we reset start to search the next 2 */
start = gmt;
lstart = arg->llvl->ldd * a;
case LIBHQR_KILLED_BY_DOMINO:
if ( lp < LIBHQR_KILLED_BY_DOMINO ) {
return gmt;
}
/* Type 2 are killed only once if they are strictly in the band */
if ( arg->domino &&
(start == gmt) &&
(lpivot < k) &&
(pivot+p < gmt) ) {
return pivot+p;
}
/* no next of type 2, we reset start to search the next 3 */
start = gmt;
lstart = arg->llvl->ldd * a;
case LIBHQR_KILLED_BY_DISTTREE:
if ( lp < LIBHQR_KILLED_BY_DISTTREE ) {
return gmt;
}
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
default:
return gmt;
}
}
/**
* hqr_prevpiv - Computes the previous row killed by the row p, before
* to kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
*
* @param[in] start
* Starting point to search the previous line killed by p before start
* start must be killed by p, and start must be greater or equal to p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the previous row killed by p if it exists, A->mt otherwise
*/
static int
hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int lpivot, rpivot, lstart;
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
lstart = start / p; /* Local index in the distribution over p domains */
myassert( start >= pivot && pivot >= k && start < gmt );
myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) );
/* T Slevel common to every case */
ls = hqr_gettype( qrtree, k, start );
lp = hqr_gettype( qrtree, k, pivot );
if ( lp == LIBHQR_KILLED_BY_TS )
return gmt;
myassert( lp >= ls );
switch( ls )
{
case LIBHQR_KILLED_BY_DISTTREE:
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
start = pivot;
lstart = pivot / p;
case LIBHQR_KILLED_BY_DOMINO:
/* If the tile is over the diagonal of step k, process it as type 2 */
if ( arg->domino && lpivot < k ) {
if ( ( start == pivot ) &&
(start+p < gmt ) )
return start+p;
if ( lp > LIBHQR_KILLED_BY_LOCALTREE )
return gmt;
}
start = pivot;
lstart = pivot / p;
/* If it is the 'local' diagonal block, we go to 1 */
case LIBHQR_KILLED_BY_LOCALTREE:
/* If the tile is over the diagonal of step k and is of type 2,
it cannot annihilate type 0 or 1 */
if ( arg->domino && lpivot < k )
return gmt;
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a);
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == arg->llvl->ldd-1) )
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp);
if ( tmp != arg->llvl->ldd )
return tmp * a * p + rpivot;
start = pivot;
case LIBHQR_KILLED_BY_TS:
/* Search for predecessor in TS tree */
/* if ( ( start+p < gmt ) && */
/* ( (((start+p) / p) % a) != 0 ) ) */
/* return start + p; */
if ( start == pivot ) {
tmp = lpivot + a - 1 - lpivot%a;
nextp = tmp * p + rpivot;
while( pivot < nextp && nextp >= gmt )
nextp -= p;
} else {
nextp = start - p; /*(lstart - 1) * p + rpivot;*/
}
assert(nextp < gmt);
if ( pivot < nextp )
return nextp;
default:
return gmt;
}
};
/****************************************************
*
* Generate the permutation required for the round-robin on TS
*
***************************************************/
static void
hqr_genperm( libhqr_tree_t *qrtree )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int m = qrtree->mt;
int n = qrtree->nt;
int a = qrtree->a;
int p = qrtree->p;
int domino = arg->domino;
int minMN = libhqr_imin( m, n );
int pa = p * a;
int i, j, k;
int nbextra1;
int end2;
int mpa = m % pa;
int endpa = m - mpa;
int *perm;
arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) );
perm = arg->perm;
if ( arg->tsrr ) {
for(k=0; k<minMN; k++) {
for( i=0; i<m+1; i++) {
perm[i] = -1;
}
perm += m+1;
}
perm = arg->perm;
for(k=0; k<minMN; k++) {
nbextra1 = nbextra1_formula;
end2 = p + ( domino ? k*p : k + nbextra1 );
end2 = (( end2 + pa - 1 ) / pa ) * pa;
end2 = libhqr_imin( end2, m );
/*
* All tiles of type 3, 2 and:
* - 1 when domino is disabled
* - 0 before the first multiple of pa under the considered diagonal
*/
for( i=k; i<end2; i++) {
perm[i] = i;
}
/* All permutations in groups of pa tiles */
assert( i%pa == 0 || i>=endpa);
for( ; i<endpa; i+=pa ) {
for(j=0; j<pa; j++) {
perm[i+j] = i + ( j + p * (k%a) )%pa;
}
}
/* Last group of tiles */
for( ; i<m; i++) {
perm[i] = i;
}
perm[m] = m;
perm += m+1;
}
}
else {
for(k=0; k<minMN; k++) {
for( i=0; i<m+1; i++) {
perm[i] = i;
}
perm += m+1;
}
}
}
static inline int
hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m )
{
int gmt = qrtree->mt + 1;
int a = qrtree->a;
int p = qrtree->p;
int pa = p * a;
int start = m / pa * pa;
int stop = libhqr_imin( start + pa, gmt ) - start;
int i;
if (a == 1)
return m;
for ( i=m%p; i < stop; i+=p ) {
//if( perm[i] == m )
if( i == m )
return i+start;
}
/* We should never arrive here */
myassert( 0 );
}
/**
*******************************************************************************
*
* @ingroup dplasma
*
* libhqr_hqr_init - Creates the tree structure that will describes the
* operation performed during QR/LQ factorization with parameterized QR/LQ
* algorithms family.
*
* Trees available parameters are described below. It is recommended to:
* - set p to the same value than the P-by-Q process grid used to distribute
* the data. (P for QR factorization, Q for LQ factorization).
* - set the low level tree to LIBHQR_GREEDY_TREE.
* - set the high level tree to:
* 1) LIBHQR_FLAT_TREE when the problem is square, because it divides
* by two the volume of communication of any other tree.
* 2) LIBHQR_FIBONACCI_TREE when the problem is tall and skinny (QR) or
* small and fat (LQ), because it reduces the critical path length.
* - Disable the domino effect when problem is square, to keep high efficiency
* kernel proportion high.
* - Enable the domino effect when problem is tall and skinny (QR) or
* small and fat (LQ) to increase parallelism and reduce critical path length.
* - Round-robin on TS domain (tsrr) option should be disabled. It is
* experimental and is not safe.
*
* These are the default when a parameter is set to -1;
*
* See http://www.netlib.org/lapack/lawnspdf/lawn257.pdf
*
*******************************************************************************
*
* @param[inout] qrtree
* On entry, an allocated structure uninitialized.
* On exit, the structure initialized according to the parameter given.
*
* @param[in] trans
* @arg QR: Structure is initialized for QR factorization.
* @arg LQ: Structure is initialized for LQ factorization.
*
* @param[in] A
* Descriptor of the distributed matrix A to be factorized, on which
* QR/LQ factorization will be performed.
* The descriptor is untouched and only mt/nt/P parameters are used.
*
* @param[in] type_llvl
* Defines the tree used to reduce the main tiles of each local domain
* together. The matrix of those tiles has a lower triangular structure
* with a diagonal by step a.
* @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local
* tiles.
* @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local
* tiles.
* @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the
* local tiles.
* @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local
* tiles.
* @arg -1: The default is used (LIBHQR_GREEDY_TREE)
*
* @param[in] type_hlvl
* Defines the tree used to reduce the main tiles of each domain. This
* is a band lower diagonal matrix of width p.
* @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the tiles.
* @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the tiles.
* @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the
* tiles.
* @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the tiles.
* @arg LIBHQR_GREEDY1P_TREE: A Greedy tree is computed for the first
* column and then duplicated on all others.
* @arg -1: The default is used (LIBHQR_FIBONACCI_TREE)
*
* @param[in] a
* Defines the size of the local domains on which a classic flat TS
* tree is performed. If a==1, then all local tiles are reduced by the
* type_lllvl tree. If a is larger than mt/p, then no local reduction
* tree is performed and type_llvl is ignored.
* If a == -1, it is set to 4 by default.
*
* @param[in] p
* Defines the number of distributed domains, ie the width of the high
* level reduction tree. If p == 1, no high level reduction tree is
* used. If p == mt, a and type_llvl are ignored since only high level
* reduction are performed.
* By default, it is recommended to set p to P if trans ==
* PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to
* distributed the data. (p > 0)
*
* @param[in] domino
* Enable/disable the domino effect that connects the high and low
* level reduction trees. Enabling the domino increases the proportion
* of TT (Triangle on top of Triangle) kernels that are less efficient,
* but increase the pipeline effect between independent factorization
* steps, reducin the critical path length.
* If disabled, it keeps the proprotion of more efficient TS (Triangle
* on top of Square) kernels high, but deteriorates the pipeline
* effect, and the critical path length.
* If domino == -1, it is enable when ration between M and N is lower
* than 1/2, and disabled otherwise.
*
* @param[in] tsrr
* Enable/Disable a round robin selection of the killer in local
* domains reduced by TS kernels. Enabling a round-robin selection of
* the killer allows to take benefit of the good pipelining of the flat
* trees and the high efficient TS kernels, while having other trees on
* top of it to reduce critical path length.
* WARNING: This option is under development and should not be enabled
* due to problem in corner cases.
*
*******************************************************************************
*
* @retval -i if the ith parameters is incorrect.
* @retval 0 on success.
*
*******************************************************************************
*
* @sa libhqr_hqr_finalize
* @sa libhqr_systolic_init
*
******************************************************************************/
int
libhqr_hqr_init( libhqr_tree_t *qrtree,
libhqr_typefacto_e trans, libhqr_tiledesc_t *A,
int type_llvl, int type_hlvl,
int a, int p,
int domino, int tsrr )
{
libhqr_tree_t qrtree_init;
double ratio = 0.0;
int low_mt, minMN;
hqr_args_t *arg;
if (qrtree == NULL) {
fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree");
return -1;
}
if ((trans != LIBHQR_QR) &&
(trans != LIBHQR_LQ)) {
fprintf(stderr, "libhqr_hqr_init, illegal value of trans");
return -2;
}
if (A == NULL) {
fprintf(stderr, "libhqr_hqr_init, illegal value of A");
return -3;
}
/* Compute parameters */
a = (a == -1) ? 4 : libhqr_imax( a, 1 );
p = libhqr_imax( p, 1 );
/* Domino */
if ( domino >= 0 ) {
domino = domino ? 1 : 0;
}
else {
if (trans == LIBHQR_QR) {
ratio = ((double)(A->nt) / (double)(A->mt));
} else {
ratio = ((double)(A->mt) / (double)(A->nt));
}
if ( ratio >= 0.5 ) {
domino = 0;
} else {
domino = 1;
}
}
minMN = libhqr_imin(A->mt, A->nt);
/* Create a temporary qrtree structure based on the functions */
{
qrtree_init.domino = domino;
qrtree_init.getnbgeqrf = hqr_getnbgeqrf;
qrtree_init.getm = hqr_getm;
qrtree_init.geti = hqr_geti;
qrtree_init.gettype = hqr_gettype;
qrtree_init.currpiv = hqr_currpiv;
qrtree_init.nextpiv = hqr_nextpiv;
qrtree_init.prevpiv = hqr_prevpiv;
qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt;
qrtree_init.nt = minMN;
a = libhqr_imin( a, qrtree_init.mt );
qrtree_init.a = a;
qrtree_init.p = p;
qrtree_init.args = NULL;
arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) );
arg->domino = domino;
arg->tsrr = tsrr;
arg->perm = NULL;
arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->hlvl = NULL;
low_mt = (qrtree_init.mt + p * a - 1) / ( p * a );
arg->llvl->minMN = minMN;
arg->llvl->ldd = low_mt;
arg->llvl->a = a;
arg->llvl->p = p;
arg->llvl->domino = domino;
switch( type_llvl ) {
case LIBHQR_FLAT_TREE :
hqr_low_flat_init(arg->llvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_low_fibonacci_init(arg->llvl, minMN);
break;
case LIBHQR_BINARY_TREE :
hqr_low_binary_init(arg->llvl);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_low_greedy1p_init(arg->llvl, minMN);
break;
case LIBHQR_GREEDY_TREE :
default:
hqr_low_greedy_init(arg->llvl, minMN);
}
if ( p > 1 ) {
arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->llvl->minMN = minMN;
arg->hlvl->ldd = qrtree_init.mt;
arg->hlvl->a = a;
arg->hlvl->p = p;
arg->hlvl->domino = domino;
switch( type_hlvl ) {
case LIBHQR_FLAT_TREE :
hqr_high_flat_init(arg->hlvl);
break;
case LIBHQR_GREEDY_TREE :
hqr_high_greedy_init(arg->hlvl, minMN);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_high_greedy1p_init(arg->hlvl);
break;
case LIBHQR_BINARY_TREE :
hqr_high_binary_init(arg->hlvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_high_fibonacci_init(arg->hlvl);
break;
default:
if ( ratio >= 0.5 ) {
hqr_high_flat_init(arg->hlvl);
} else {
hqr_high_fibonacci_init(arg->hlvl);
}
}
}
qrtree_init.args = (void*)arg;
hqr_genperm( &qrtree_init );
}
/* Initialize the final QR tree */
memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) );
qrtree->getnbgeqrf = hqr_getnbgeqrf;
qrtree->getm = hqr_getm;
qrtree->geti = hqr_geti;
qrtree->gettype = rdmtx_gettype;
qrtree->currpiv = rdmtx_currpiv;
qrtree->nextpiv = rdmtx_nextpiv;
qrtree->prevpiv = rdmtx_prevpiv;
qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) );
/* Initialize the matrix */
libhqr_matrix_init(qrtree, &qrtree_init);
/* Free the initial qrtree */
libhqr_hqr_finalize( &qrtree_init );
return 0;
}
/**
*******************************************************************************
*
* @ingroup dplasma
*
* libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to
* libhqr_hqr_init().
*
*******************************************************************************
*
* @param[in,out] qrtree
* On entry, an allocated structure to destroy.
* On exit, the structure is destroy and cannot be used.
*
*******************************************************************************
*
* @sa libhqr_hqr_init
*
******************************************************************************/
void
libhqr_hqr_finalize( libhqr_tree_t *qrtree )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
if (arg != NULL) {
if ( arg->llvl != NULL) {
if ( arg->llvl->ipiv != NULL )
free( arg->llvl->ipiv );
free( arg->llvl );
}
if ( arg->hlvl != NULL) {
if ( arg->hlvl->ipiv != NULL )
free( arg->hlvl->ipiv );
free( arg->hlvl );
}
if ( arg->perm != NULL )
free(arg->perm);
free(arg);
}
}
/*
* Common functions
*/
static int svd_getnbgeqrf( const libhqr_tree_t *qrtree, int k );
static int svd_getm( const libhqr_tree_t *qrtree, int k, int i );
static int svd_geti( const libhqr_tree_t *qrtree, int k, int m );
static int svd_gettype( const libhqr_tree_t *qrtree, int k, int m );
#define svd_getipiv( __qrtree, _k ) ((__qrtree)->llvl->ipiv + ((__qrtree)->llvl->ldd) * (_k) )
#define svd_geta( __qrtree, _k ) ( (svd_getipiv( (__qrtree), (_k) ))[0] )
/*
* Extra parameter:
* gmt - Global number of tiles in a column of the complete distributed matrix
* Return:
* The number of geqrt to execute in the panel k
*/
static int
svd_getnbgeqrf( const libhqr_tree_t *qrtree,
int k )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int gmt = qrtree->mt;
int a = svd_geta(arg, k);
int pa = p * a;
int nb_1, nb_2, nb_3;
int nb_11, nb_12;
/* Number of tasks of type 3 */
nb_3 = p;
/* Number of extra tile of type 1 between the tile of type 3 and the first of nb11 */
nb_2 = nbextra1_formula;
/* First multiple of p*a under the diagonal of step 1 */
nb_11 = ( (k + p + pa - 1 ) / pa ) * pa;
/* Last multiple of p*a lower than A->mt */
nb_12 = ( gmt / pa ) * pa;
/* Number of tasks of type 1 between nb_11 and nb_12 */
nb_1 = (nb_12 - nb_11) / a;
/* Add leftover */
nb_1 += libhqr_imin( p, gmt - nb_12 );
return libhqr_imin( nb_1 + nb_2 + nb_3, gmt - k);
}
/*
* Extra parameter:
* i - indice of the geqrt in the continuous space
* Return:
* The global indice m of the i th geqrt in the panel k
*/
static int
svd_getm( const libhqr_tree_t *qrtree,
int k, int i )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
int pos1, j, pa = p * a;
int nbextra1 = nbextra1_formula;
int nb23 = p + nbextra1;
/* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */
if ( i < nb23 )
return k+i;
/* Tile of type 1 */
else {
j = i - nb23;
pa = p * a;
pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa;
return pos1 + (j/p) * pa + j%p;
}
}
/*
* Extra parameter:
* m - The global indice m of a geqrt in the panel k
* Return:
* The index i of the geqrt in the panel k
*/
static int
svd_geti( const libhqr_tree_t *qrtree,
int k, int m )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
int pos1, j, pa = p * a;
int nbextra1 = nbextra1_formula;
int nb23 = p + nbextra1;
int end2 = p + k + nbextra1;
/* Tile of type 2 or 3 or the 1 between the diagonal and the multiple after the diagonal */
if ( m < end2 )
return m-k;
/* Tile of type 1 */
else {
pos1 = ( ( (p + k ) + pa - 1 ) / pa ) * pa;
j = m - pos1;
return nb23 + (j / pa) * p + j%pa;
}
}
/*
* Extra parameter:
* m - The global indice m of the row in the panel k
* Return
* -1 - Error
* 0 - if m is reduced thanks to a TS kernel
* 1 - if m is reduced thanks to the low level tree
* 2 - if m is reduced thanks to the bubble tree
* 3 - if m is reduced in distributed
*/
static int
svd_gettype( const libhqr_tree_t *qrtree,
int k, int m )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int p = qrtree->p;
int a = svd_geta(arg, k);
/* Element to be reduce in distributed */
if (m < k + p) {
return 3;
}
/* Lower triangle of the matrix */
else {
if( (m / p) % a == 0 )
return 1;
else
return 0;
}
}
/****************************************************
* SVD_LOW_ADAPTIV_TREE
***************************************************/
/* Return the pivot to use for the row m at step k */
inline static int
svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg,
int k, int m )
{
int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
ipiv+=2;
return ipiv[ ( m / arg->p ) / a ];
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int
svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int p_pa = p / arg->p / a;
ipiv+=2;
for( i=start_pa+1; i<ldd; i++ )
if ( ipiv[i] == p_pa )
return i;
return i;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int
svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int pa = arg->p * a;
int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a;
int p_pa = p / pa;
ipiv+=2;
for( i=start_pa-1; i> k_a; i-- )
if ( ipiv[i] == p_pa )
return i;
return ldd;
}
static void
svd_low_adaptiv_init(hqr_subpiv_t *arg,
int gmt, int gnt, int nbcores, int ratio)
{
int *ipiv;
int mt, a, p, pa, maxmt, myrank;
int j, k, height, start, end, nT, nZ;
int minMN = libhqr_imin(gmt, gnt);
arg->currpiv = svd_low_adaptiv_currpiv;
arg->nextpiv = svd_low_adaptiv_nextpiv;
arg->prevpiv = svd_low_adaptiv_prevpiv;
p = arg->p;
end = 0;
/**
* Compute the local greedy tree of each column, on each node
*/
maxmt = 1;
for(k=0; k<minMN; k++) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
/* Compute max dimension of the tree */
mt = libhqr_iceil( gmt, p * a );
maxmt = libhqr_imax( mt, maxmt );
}
arg->ldd = maxmt + 2;
arg->ipiv = (int*)malloc( arg->ldd * minMN * sizeof(int) * p );
ipiv = arg->ipiv;
memset( ipiv, 0, minMN*sizeof(int)*arg->ldd*p );
for ( myrank=0; myrank<p; myrank++ ) {
/**
* Compute the local greedy tree of each column, on each node
*/
for(k=0; k<minMN; k++, ipiv += arg->ldd) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* (ldd / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
pa = p * a;
mt = libhqr_iceil( gmt, pa );
ipiv[0] = a;
ipiv[1] = mt;
assert( a > 0 );
assert( mt < arg->ldd-1 );
/* Number of tiles to factorized in this column on this rank */
nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
/* Number of tiles already killed */
nZ = 0;
assert( nT <= mt );
/* No more computations on this node */
if ( nT == 0 ) {
continue;
}
while( nZ < (nT-1) ) {
height = (nT - nZ) / 2;
start = mt - nZ - 1;
end = start - height;
nZ += height;
for( j=start; j > end; j-- ) {
ipiv[ j+2 ] = (j - height);
}
}
assert( nZ+1 == nT );
}
}
#if 0
{
int m, k;
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
printf( "%3d ", ipiv[k*(arg->ldd) + m] );
}
printf("\n");
}
}
if (!arg->domino) {
int m, k, myrank;
for ( myrank=1; myrank<p; myrank++ ) {
ipiv += arg->ldd * minMN;
printf("-------- rank %d ---------\n", myrank );
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
int k_a = (k + p - 1 - myrank) / p / a;
if ( m >= k_a )
printf( "%3d ", ipiv[k * arg->ldd + m] );
else
printf( "--- " );
}
printf("\n");
}
}
}
#endif
};
/****************************************************
*
* Generic functions currpiv,prevpiv,nextpiv
*
***************************************************/
static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, tmpk;
int lm, rank;
int a = svd_geta( arg, k );
int p = qrtree->p;
int gmt = qrtree->mt;
lm = m / p; /* Local index in the distribution over p domains */
rank = m % p; /* Staring index in this distribution */
/* TS level common to every case */
switch( svd_gettype( qrtree, k, m ) )
{
case 0:
tmp = lm / a;
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank;
break;
case 1:
tmp = arg->llvl->currpiv(arg->llvl, k, m);
/* tmpk = (k + p - 1 - m%p) / p / a; */
tmpk = k / (p * a);
return ( tmp == tmpk ) ? k + (m-k)%p : tmp * a * p + rank;
break;
case 2:
assert(0);
break;
case 3:
if ( arg->hlvl != NULL )
return arg->hlvl->currpiv(arg->hlvl, k, m);
default:
return gmt;
}
return -1;
};
}
/**
* svd_nextpiv - Computes the next row killed by the row p, after
* it has kill the row start.
* hqr_prevpiv - Computes the previous row killed by the row p, before
* to kill the row start.
*
* @param[in] k
* Factorization step
......@@ -2434,195 +459,217 @@ static int svd_currpiv(const libhqr_tree_t *qrtree, int k, int m)
* Line used as killer
*
* @param[in] start
* Starting point to search the next line killed by p after start
* start must be equal to A.mt to find the first row killed by p.
* if start != A.mt, start must be killed by p
* Starting point to search the previous line killed by p before start
* start must be killed by p, and start must be greater or equal to p
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the following row killed by p if it exists, A->mt otherwise
* - m, the previous row killed by p if it exists, A->mt otherwise
*/
static int svd_nextpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
static int
hqr_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int rpivot, lstart;
int *ipiv = svd_getipiv( arg, k );
int a = ipiv[0];
int ldd = ipiv[1];
int lpivot, rpivot, lstart;
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
lstart = start / p; /* Local index in the distribution over p domains */
/* Local index in the distribution over p domains */
lstart = ( start == gmt ) ? ldd * a : start / p;
myassert( start >= pivot && pivot >= k && start < gmt );
myassert( start == pivot || pivot == hqr_currpiv( qrtree, k, start ) );
myassert( start > pivot && pivot >= k );
myassert( start == gmt || pivot == svd_currpiv( qrtree, k, start ) );
/* T Slevel common to every case */
ls = hqr_gettype( qrtree, k, start );
lp = hqr_gettype( qrtree, k, pivot );
/* TS level common to every case */
ls = (start < gmt) ? svd_gettype( qrtree, k, start ) : -1;
lp = svd_gettype( qrtree, k, pivot );
if ( lp == LIBHQR_KILLED_BY_TS )
return gmt;
myassert( lp >= ls );
switch( ls )
{
case LIBHQR_KILLED_BY_DISTTREE:
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
}
start = pivot;
lstart = pivot / p;
case LIBHQR_KILLED_BY_DOMINO:
assert(0);
case -1:
/* If the tile is over the diagonal of step k, process it as type 2 */
if ( arg->domino && lpivot < k ) {
if ( lp == LIBHQR_KILLED_BY_TS ) {
myassert( start == gmt );
return gmt;
if ( ( start == pivot ) &&
(start+p < gmt ) )
return start+p;
if ( lp > LIBHQR_KILLED_BY_LOCALTREE )
return gmt;
}
case LIBHQR_KILLED_BY_TS:
if ( start == gmt )
nextp = pivot + p;
else
nextp = start + p;
start = pivot;
lstart = pivot / p;
if ( ( nextp < gmt ) &&
( nextp < pivot + a*p ) &&
( (nextp/p)%a != 0 ) )
return nextp;
start = gmt;
lstart = ldd * a;
/* If it is the 'local' diagonal block, we go to 1 */
case LIBHQR_KILLED_BY_LOCALTREE:
/* Get the next pivot for the low level tree */
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, lstart / a );
/* If the tile is over the diagonal of step k and is of type 2,
it cannot annihilate type 0 or 1 */
if ( arg->domino && lpivot < k )
return gmt;
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a);
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == ldd-1) )
tmp = arg->llvl->nextpiv(arg->llvl, k, pivot, tmp);
&& (tmp == arg->llvl->ldd-1) )
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp);
if ( tmp != ldd )
if ( tmp != arg->llvl->ldd )
return tmp * a * p + rpivot;
/* no next of type 1, we reset start to search the next 2 */
start = gmt;
lstart = ldd * a;
start = pivot;
case LIBHQR_KILLED_BY_DISTTREE:
case LIBHQR_KILLED_BY_TS:
/* Search for predecessor in TS tree */
/* if ( ( start+p < gmt ) && */
/* ( (((start+p) / p) % a) != 0 ) ) */
/* return start + p; */
if ( lp < LIBHQR_KILLED_BY_DISTTREE ) {
return gmt;
}
if ( start == pivot ) {
tmp = lpivot + a - 1 - lpivot%a;
nextp = tmp * p + rpivot;
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->nextpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
while( pivot < nextp && nextp >= gmt )
nextp -= p;
} else {
nextp = start - p; /*(lstart - 1) * p + rpivot;*/
}
assert(nextp < gmt);
if ( pivot < nextp )
return nextp;
default:
return gmt;
}
}
/**
* svd_prevpiv - Computes the previous row killed by the row p, before
* to kill the row start.
*
* @param[in] k
* Factorization step
*
* @param[in] pivot
* Line used as killer
/****************************************************
*
* @param[in] start
* Starting point to search the previous line killed by p before start
* start must be killed by p, and start must be greater or equal to p
* Generate the permutation required for the round-robin on TS
*
* @return:
* - -1 if start doesn't respect the previous conditions
* - m, the previous row killed by p if it exists, A->mt otherwise
*/
static int
svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
***************************************************/
static void
hqr_genperm( libhqr_tree_t *qrtree )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
int tmp, ls, lp, nextp;
int lpivot, rpivot, lstart;
int *ipiv = svd_getipiv( arg, k );
int a = ipiv[0];
int ldd = ipiv[1];
int m = qrtree->mt;
int n = qrtree->nt;
int a = qrtree->a;
int p = qrtree->p;
int gmt = qrtree->mt;
lpivot = pivot / p; /* Local index in the distribution over p domains */
rpivot = pivot % p; /* Staring index in this distribution */
lstart = start / p; /* Local index in the distribution over p domains */
myassert( start >= pivot && pivot >= k && start < gmt );
myassert( start == pivot || pivot == svd_currpiv( qrtree, k, start ) );
/* TS level common to every case */
ls = svd_gettype( qrtree, k, start );
lp = svd_gettype( qrtree, k, pivot );
int domino = arg->domino;
int minMN = libhqr_imin( m, n );
int pa = p * a;
int i, j, k;
int nbextra1;
int end2;
int mpa = m % pa;
int endpa = m - mpa;
int *perm;
if ( lp == LIBHQR_KILLED_BY_TS )
return gmt;
arg->perm = (int*)malloc( (m+1) * minMN * sizeof(int) );
perm = arg->perm;
myassert( lp >= ls );
switch( ls )
{
case LIBHQR_KILLED_BY_DOMINO:
assert(0);
case LIBHQR_KILLED_BY_DISTTREE:
if( arg->hlvl != NULL ) {
tmp = arg->hlvl->prevpiv( arg->hlvl, k, pivot, start );
if ( tmp != gmt )
return tmp;
if ( arg->tsrr ) {
for(k=0; k<minMN; k++) {
for( i=0; i<m+1; i++) {
perm[i] = -1;
}
perm += m+1;
}
perm = arg->perm;
for(k=0; k<minMN; k++) {
nbextra1 = nbextra1_formula;
start = pivot;
lstart = pivot / p;
case LIBHQR_KILLED_BY_LOCALTREE:
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, lstart / a);
end2 = p + ( domino ? k*p : k + nbextra1 );
end2 = (( end2 + pa - 1 ) / pa ) * pa;
end2 = libhqr_imin( end2, m );
if ( (tmp * a * p + rpivot >= gmt)
&& (tmp == ldd-1) )
tmp = arg->llvl->prevpiv(arg->llvl, k, pivot, tmp);
/*
* All tiles of type 3, 2 and:
* - 1 when domino is disabled
* - 0 before the first multiple of pa under the considered diagonal
*/
for( i=k; i<end2; i++) {
perm[i] = i;
}
if ( tmp != ldd )
return tmp * a * p + rpivot;
/* All permutations in groups of pa tiles */
assert( i%pa == 0 || i>=endpa);
for( ; i<endpa; i+=pa ) {
for(j=0; j<pa; j++) {
perm[i+j] = i + ( j + p * (k%a) )%pa;
}
}
start = pivot;
/* Last group of tiles */
for( ; i<m; i++) {
perm[i] = i;
}
perm[m] = m;
perm += m+1;
}
}
else {
for(k=0; k<minMN; k++) {
for( i=0; i<m+1; i++) {
perm[i] = i;
}
perm += m+1;
}
}
}
case LIBHQR_KILLED_BY_TS:
/* Search for predecessor in TS tree */
/* if ( ( start+p < gmt ) && */
/* ( (((start+p) / p) % a) != 0 ) ) */
/* return perm[start + p]; */
static inline int
hqr_getinvperm( const libhqr_tree_t *qrtree, int k, int m )
{
int gmt = qrtree->mt + 1;
int a = qrtree->a;
int p = qrtree->p;
int pa = p * a;
int start = m / pa * pa;
int stop = libhqr_imin( start + pa, gmt ) - start;
int i;
if ( start == pivot ) {
tmp = lpivot + a - 1 - lpivot%a;
nextp = tmp * p + rpivot;
if (a == 1)
return m;
while( pivot < nextp && nextp >= gmt )
nextp -= p;
} else {
nextp = start - p; /*(lstart - 1) * p + rpivot;*/
}
assert(nextp < gmt);
if ( pivot < nextp )
return nextp;
for ( i=m%p; i < stop; i+=p ) {
//if( perm[i] == m )
if( i == m )
return i+start;
}
default:
return gmt;
}
};
/* We should never arrive here */
myassert( 0 );
(void)k;
}
/**
*******************************************************************************
*
* @ingroup libhqr
* @ingroup dplasma
*
* libhqr_svd_init - Create the tree structures that will describes the
* operation performed during QR/LQ reduction step of the gebrd_ge2gb operation.
* libhqr_hqr_init - Creates the tree structure that will describes the
* operation performed during QR/LQ factorization with parameterized QR/LQ
* algorithms family.
*
* Trees available parameters are described below. It is recommended to:
* - set p to the same value than the P-by-Q process grid used to distribute
......@@ -2646,21 +693,33 @@ svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
*
*******************************************************************************
*
* @param[in,out] qrtree
* @param[inout] qrtree
* On entry, an allocated structure uninitialized.
* On exit, the structure initialized according to the given parameters.
* On exit, the structure initialized according to the parameter given.
*
* @param[in] trans
* @arg LIBHQR_QR: Structure is initialized for the QR steps.
* @arg LIBHQR_LQ: Structure is initialized for the LQ steps.
* @arg QR: Structure is initialized for QR factorization.
* @arg LQ: Structure is initialized for LQ factorization.
*
* @param[in,out] A
* @param[in] A
* Descriptor of the distributed matrix A to be factorized, on which
* QR/LQ reduction steps will be performed. In case, of
* R-bidiagonalization, don't forget to provide the square submatrix
* that is concerned by those operations.
* QR/LQ factorization will be performed.
* The descriptor is untouched and only mt/nt/P parameters are used.
*
* @param[in] type_llvl
* Defines the tree used to reduce the main tiles of each local domain
* together. The matrix of those tiles has a lower triangular structure
* with a diagonal by step a.
* @arg LIBHQR_FLAT_TREE: A Flat tree is used to reduce the local
* tiles.
* @arg LIBHQR_GREEDY_TREE: A Greedy tree is used to reduce the local
* tiles.
* @arg LIBHQR_FIBONACCI_TREE: A Fibonacci tree is used to reduce the
* local tiles.
* @arg LIBHQR_BINARY_TREE: A Binary tree is used to reduce the local
* tiles.
* @arg -1: The default is used (LIBHQR_GREEDY_TREE)
*
* @param[in] type_hlvl
* Defines the tree used to reduce the main tiles of each domain. This
* is a band lower diagonal matrix of width p.
......@@ -2673,184 +732,374 @@ svd_prevpiv(const libhqr_tree_t *qrtree, int k, int pivot, int start)
* column and then duplicated on all others.
* @arg -1: The default is used (LIBHQR_FIBONACCI_TREE)
*
* @param[in] a
* Defines the size of the local domains on which a classic flat TS
* tree is performed. If a==1, then all local tiles are reduced by the
* type_lllvl tree. If a is larger than mt/p, then no local reduction
* tree is performed and type_llvl is ignored.
* If a == -1, it is set to 4 by default.
*
* @param[in] p
* Defines the number of distributed domains, ie the width of the high
* level reduction tree. If p == 1, no high level reduction tree is
* used. If p == mt, this enforce the high level reduction tree to be
* performed on the full matrix.
* used. If p == mt, a and type_llvl are ignored since only high level
* reduction are performed.
* By default, it is recommended to set p to P if trans ==
* LIBHQR_QR, and to Q otherwise, where P-by-Q is the process grid
* used to distributed the data. (p > 0)
* PlasmaNoTrans, Q otherwise, where P-by-Q is the process grid used to
* distributed the data. (p > 0)
*
* @param[in] nbthread_per_node
* Define the number of working threads per node to configure the
* adaptativ local tree to provide at least (ratio * nbthread_per_node)
* tasks per step when possible by creating the right amount of TS and
* TT kernels.
* @param[in] domino
* Enable/disable the domino effect that connects the high and low
* level reduction trees. Enabling the domino increases the proportion
* of TT (Triangle on top of Triangle) kernels that are less efficient,
* but increase the pipeline effect between independent factorization
* steps, reducin the critical path length.
* If disabled, it keeps the proprotion of more efficient TS (Triangle
* on top of Square) kernels high, but deteriorates the pipeline
* effect, and the critical path length.
* If domino == -1, it is enable when ration between M and N is lower
* than 1/2, and disabled otherwise.
*
* @param[in] ratio
* Define the minimal number of tasks per thread that the adaptiv tree
* must provide at the lowest level of the tree.
* @param[in] tsrr
* Enable/Disable a round robin selection of the killer in local
* domains reduced by TS kernels. Enabling a round-robin selection of
* the killer allows to take benefit of the good pipelining of the flat
* trees and the high efficient TS kernels, while having other trees on
* top of it to reduce critical path length.
* WARNING: This option is under development and should not be enabled
* due to problem in corner cases.
*
*******************************************************************************
*
* @return
* \retval -i if the ith parameters is incorrect.
* \retval 0 on success.
* @retval -i if the ith parameters is incorrect.
* @retval 0 on success.
*
*******************************************************************************
*
* @sa libhqr_hqr_finalize
* @sa libhqr_hqr_init
* @sa dplasma_zgeqrf_param
* @sa dplasma_cgeqrf_param
* @sa dplasma_dgeqrf_param
* @sa dplasma_sgeqrf_param
* @sa libhqr_systolic_init
*
******************************************************************************/
int
libhqr_svd_init( libhqr_tree_t *qrtree,
libhqr_typefacto_e trans, libhqr_tiledesc_t *A,
int type_hlvl, int p, int nbthread_per_node, int ratio )
libhqr_initfct_hqr( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_llvl, int type_hlvl,
int a, int p,
int domino, int tsrr )
{
int low_mt, minMN, a = -1;
double ratio = 0.0;
int low_mt, minMN;
hqr_args_t *arg;
libhqr_tree_t qrtree_init;
if (qrtree == NULL) {
fprintf(stderr,"libhqr_svd_init, illegal value of qrtree");
return -1;
fprintf(stderr, "libhqr_hqr_init, illegal value of qrtree");
return -1;
}
if ((trans != LIBHQR_QR) &&
(trans != LIBHQR_LQ)) {
fprintf(stderr, "libhqr_svd_init, illegal value of trans");
return -2;
fprintf(stderr, "libhqr_hqr_init, illegal value of trans");
return -2;
}
if (A == NULL) {
fprintf(stderr, "libhqr_svd_init, illegal value of A");
return -3;
fprintf(stderr, "libhqr_hqr_init, illegal value of A");
return -3;
}
/* Compute parameters */
p = libhqr_imax( p, 1 );
if ( a == -1) {
a = 4; /* TODO: add automatic computation */
}
else {
a = libhqr_imax( a, 1 );
}
/* Create a temporary qrtree structure based on the functions */
{
qrtree_init.getnbgeqrf = svd_getnbgeqrf;
qrtree_init.getm = svd_getm;
qrtree_init.geti = svd_geti;
qrtree_init.gettype = svd_gettype;
qrtree_init.currpiv = svd_currpiv;
qrtree_init.nextpiv = svd_nextpiv;
qrtree_init.prevpiv = svd_prevpiv;
if ( p == -1 ) {
if ( trans == LIBHQR_QR ) {
p = A->p;
}
else {
p = A->nodes / A->p;
}
}
else {
p = libhqr_imax( p, 1 );
}
/* Domino */
if ( domino >= 0 ) {
domino = domino ? 1 : 0;
}
else {
if (trans == LIBHQR_QR) {
ratio = ((double)(A->nt) / (double)(A->mt));
} else {
ratio = ((double)(A->mt) / (double)(A->nt));
}
if ( ratio >= 0.5 ) {
domino = 0;
} else {
domino = 1;
}
}
minMN = libhqr_imin(A->mt, A->nt);
qrtree_init.mt = (trans == LIBHQR_QR) ? A->mt : A->nt;
qrtree_init.nt = (trans == LIBHQR_QR) ? A->nt : A->mt;
qrtree->domino = domino;
qrtree->getnbgeqrf = hqr_getnbgeqrf;
qrtree->getm = hqr_getm;
qrtree->geti = hqr_geti;
qrtree->gettype = hqr_gettype;
qrtree->currpiv = hqr_currpiv;
qrtree->nextpiv = hqr_nextpiv;
qrtree->prevpiv = hqr_prevpiv;
qrtree_init.a = a;
qrtree_init.p = p;
qrtree_init.args = NULL;
qrtree->init = LIBHQR_QRTREE_HQR;
arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) );
arg->domino = 0;
arg->tsrr = 0;
arg->perm = NULL;
qrtree->mt = (trans == LIBHQR_QR) ? A->mt : A->nt;
qrtree->nt = minMN;
arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->hlvl = NULL;
a = libhqr_imin( a, qrtree->mt );
minMN = libhqr_imin(A->mt, A->nt);
low_mt = (qrtree_init.mt + p - 1) / ( p );
qrtree->a = a;
qrtree->p = p;
qrtree->args = NULL;
arg->llvl->minMN = minMN;
arg->llvl->ldd = low_mt;
arg->llvl->a = a;
arg->llvl->p = p;
arg->llvl->domino = 0;
arg = (hqr_args_t*) malloc( sizeof(hqr_args_t) );
arg->domino = domino;
arg->tsrr = tsrr;
arg->perm = NULL;
svd_low_adaptiv_init(arg->llvl, qrtree_init.mt, qrtree_init.nt,
nbthread_per_node * (A->nodes / p), ratio );
arg->llvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->hlvl = NULL;
low_mt = (qrtree->mt + p * a - 1) / ( p * a );
arg->llvl->minMN = minMN;
arg->llvl->ldd = low_mt;
arg->llvl->a = a;
arg->llvl->p = p;
arg->llvl->domino = domino;
switch( type_llvl ) {
case LIBHQR_FLAT_TREE :
hqr_low_flat_init(arg->llvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_low_fibonacci_init(arg->llvl, minMN);
break;
case LIBHQR_BINARY_TREE :
hqr_low_binary_init(arg->llvl);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_low_greedy1p_init(arg->llvl, minMN);
break;
case LIBHQR_GREEDY_TREE :
default:
hqr_low_greedy_init(arg->llvl, minMN);
}
if ( p > 1 ) {
arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
if ( p > 1 ) {
arg->hlvl = (hqr_subpiv_t*) malloc( sizeof(hqr_subpiv_t) );
arg->llvl->minMN = minMN;
arg->hlvl->ldd = qrtree_init.mt;
arg->hlvl->a = a;
arg->hlvl->p = p;
arg->hlvl->domino = 0;
arg->llvl->minMN = minMN;
arg->hlvl->ldd = qrtree->mt;
arg->hlvl->a = a;
arg->hlvl->p = p;
arg->hlvl->domino = domino;
switch( type_hlvl ) {
case LIBHQR_FLAT_TREE :
switch( type_hlvl ) {
case LIBHQR_FLAT_TREE :
hqr_high_flat_init(arg->hlvl);
break;
case LIBHQR_GREEDY_TREE :
hqr_high_greedy_init(arg->hlvl, minMN);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_high_greedy1p_init(arg->hlvl);
break;
case LIBHQR_BINARY_TREE :
hqr_high_binary_init(arg->hlvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_high_fibonacci_init(arg->hlvl);
break;
default:
if ( ratio >= 0.5 ) {
hqr_high_flat_init(arg->hlvl);
break;
case LIBHQR_GREEDY_TREE :
hqr_high_greedy_init(arg->hlvl, minMN);
break;
case LIBHQR_GREEDY1P_TREE :
hqr_high_greedy1p_init(arg->hlvl);
break;
case LIBHQR_BINARY_TREE :
hqr_high_binary_init(arg->hlvl);
break;
case LIBHQR_FIBONACCI_TREE :
hqr_high_fibonacci_init(arg->hlvl);
break;
default:
} else {
hqr_high_fibonacci_init(arg->hlvl);
}
}
qrtree_init.args = (void*)arg;
hqr_genperm( &qrtree_init );
}
/* Initialize the final QR tree */
memcpy( qrtree, &qrtree_init, sizeof(libhqr_tree_t) );
qrtree->getnbgeqrf = svd_getnbgeqrf;
qrtree->getm = svd_getm;
qrtree->geti = svd_geti;
qrtree->gettype = rdmtx_gettype;
qrtree->currpiv = rdmtx_currpiv;
qrtree->nextpiv = rdmtx_nextpiv;
qrtree->prevpiv = rdmtx_prevpiv;
qrtree->args = malloc( qrtree->mt * qrtree->nt * sizeof(libhqr_tileinfo_t) );
qrtree->args = (void*)arg;
hqr_genperm( qrtree );
return 0;
}
int
libhqr_initmtx_hqr( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_llvl, int type_hlvl,
int a, int p,
int domino, int tsrr )
{
libhqr_tree_t qrtreefct;
int rc;
rc = libhqr_initfct_hqr( &qrtreefct, trans, A,
type_llvl, type_hlvl, a, p,
domino, tsrr );
if ( rc != 0 ) {
return rc;
}
/* Initialize the matrix */
libhqr_matrix_init(qrtree, &qrtree_init);
libhqr_fct_to_mtx( &qrtreefct, qrtree );
/* Free the initial qrtree */
libhqr_hqr_finalize( &qrtree_init );
libhqr_finalize( &qrtreefct );
return 0;
}
void libhqr_matrix_init(libhqr_tree_t *qrtree, const libhqr_tree_t *qrtree_init){
int i, minMN, tile_id, p, k;
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
minMN = libhqr_imin(qrtree->nt, qrtree->mt);
for (k = 0; k < minMN; k++){
for (i = 0; i < qrtree->mt; i++){
tile_id = (k * qrtree->mt) + i;
arg[tile_id].type = qrtree_init->gettype(qrtree_init, k, i);
assert( (i < k) || (arg[tile_id].type >= 0) );
arg[tile_id].currpiv = qrtree_init->currpiv(qrtree_init, k, i);
p = arg[tile_id].currpiv;
arg[tile_id].first_nextpiv = qrtree_init->nextpiv(qrtree_init, k, i, qrtree->mt);
arg[tile_id].first_prevpiv = qrtree_init->prevpiv(qrtree_init, k, i, i);
arg[tile_id].nextpiv = qrtree_init->nextpiv(qrtree_init, k, p, i);
arg[tile_id].prevpiv = qrtree_init->prevpiv(qrtree_init, k, p, i);
assert( (arg[tile_id].nextpiv <= qrtree->mt && arg[tile_id].nextpiv >= k) || arg[tile_id].nextpiv==-1 );
assert( (arg[tile_id].prevpiv <= qrtree->mt && arg[tile_id].prevpiv >= k) || arg[tile_id].prevpiv==-1 );
assert( (arg[tile_id].first_nextpiv <= qrtree->mt && arg[tile_id].first_nextpiv >= k) || arg[tile_id].first_nextpiv==-1 );
assert( (arg[tile_id].first_prevpiv <= qrtree->mt && arg[tile_id].first_prevpiv >= k) || arg[tile_id].first_prevpiv==-1 );
int
libhqr_init_hqr( libhqr_tree_t *qrtree,
libhqr_facto_e trans, libhqr_matrix_t *A,
int type_llvl, int type_hlvl,
int a, int p,
int domino, int tsrr )
{
return libhqr_initmtx_hqr( qrtree, trans, A,
type_llvl, type_hlvl, a, p,
domino, tsrr );
}
/**
* libhqr_walk_stepk
*
* @param[in] arg
* Arguments specific to the reduction tree used
*
* @param[in] k
* Factorization step
*
* @param[in] tiles
* Array of size qrtree->mt that stores in the first entries the tiles
* to kill in the right order to walk through the tree.
*
*/
void
libhqr_walk_stepk( const libhqr_tree_t *qrtree,
int k, int *tiles )
{
int tsid = -1, ttid = -1;
int p = k;
int pivot = k;
int m = 0;
libhqr_queue_tile_t *tt = libhqr_queue_tile_new();
libhqr_queue_tile_t *ts = libhqr_queue_tile_new();
libhqr_queue_tile_push(&tt, k);
while( tt )
{
/* Stack all elements killed on the way down */
p = qrtree->prevpiv(qrtree, k, pivot, p);
while( p != qrtree->mt ) {
/* If it's a TS tile, or a TT at the bottom of the tree that kills noone */
if( (qrtree->gettype(qrtree, k, p) == 0) ||
(qrtree->prevpiv(qrtree, k, p, p) != qrtree->mt) )
{
libhqr_queue_tile_push(&tt,p);
/* printf("PUSH TT: %d\n", p); */
}
libhqr_queue_tile_push(&ts, p);
/* printf("PUSH TS: %d\n", p); */
p = qrtree->prevpiv(qrtree, k, pivot, p);
assert( p != -1 );
}
tsid = libhqr_queue_tile_head(&ts);
ttid = libhqr_queue_tile_pop(&tt);
/* Unstack all element killed by TS, or by TT and already discovered */
while( (tsid != -1) &&
(tsid != ttid) )
{
/* printf( "TS=%d, TT=%d\n", tsid, ttid ); */
tsid = libhqr_queue_tile_pop(&ts);
/* printf("POP TS: %d\n", tsid); */
/* printf("Call function on (%d, %d)\n",
qrtree->currpiv(qrtree, k, tsid), tsid ); */
assert(m < (qrtree->mt-1));
tiles[m] = tsid;
m++;
tsid = libhqr_queue_tile_head(&ts);
}
pivot = p = ttid;
}
qrtree->args = (void*)arg;
//assert(m == (qrtree->mt-1));
assert(ts == NULL);
assert(tt == NULL);
}
void libhqr_matrix_finalize(libhqr_tree_t *qrtree){
libhqr_tileinfo_t *arg = (libhqr_tileinfo_t*)(qrtree->args);
free(arg);
/**
*******************************************************************************
*
* @ingroup dplasma
*
* libhqr_hqr_finalize - Cleans the qrtree data structure allocated by call to
* libhqr_hqr_init().
*
*******************************************************************************
*
* @param[in,out] qrtree
* On entry, an allocated structure to destroy.
* On exit, the structure is destroy and cannot be used.
*
*******************************************************************************
*
* @sa libhqr_hqr_init
*
******************************************************************************/
void
libhqr_finalize( libhqr_tree_t *qrtree )
{
hqr_args_t *arg = (hqr_args_t*)(qrtree->args);
if ( (qrtree->init != LIBHQR_QRTREE_HQR) &&
(qrtree->init != LIBHQR_QRTREE_SVD) &&
(qrtree->init != LIBHQR_QRTREE_SYS) &&
(qrtree->init != LIBHQR_QRTREE_MTX) )
{
fprintf(stderr, "libhqr_finalize: qrtree not initialized\n" );
return;
}
if (arg != NULL) {
if ( (qrtree->init == LIBHQR_QRTREE_HQR) ||
(qrtree->init == LIBHQR_QRTREE_HQR) )
{
if ( arg->llvl != NULL) {
if ( arg->llvl->ipiv != NULL )
free( arg->llvl->ipiv );
free( arg->llvl );
}
if ( arg->hlvl != NULL) {
if ( arg->hlvl->ipiv != NULL )
free( arg->hlvl->ipiv );
free( arg->hlvl );
}
if ( arg->perm != NULL )
free(arg->perm);
}
free(arg);
}
}
/**
*
* @file low_adaptiv.c
*
* @copyright 2010-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
* @version 1.0.0
* @author Raphael Boucherie
* @author Mathieu Faverge
* @date 2017-03-21
*
* Functions for low level adaptiv tree for SVD/EVD reduction to band.
*
*/
#include "libhqr_internal.h"
#include <stdlib.h>
/* Return the pivot to use for the row m at step k */
inline static int
svd_low_adaptiv_currpiv( const hqr_subpiv_t *arg,
int k, int m )
{
int *ipiv = arg->ipiv + (m%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
ipiv+=2;
return ipiv[ ( m / arg->p ) / a ];
}
/* Return the last row which has used the row m as a pivot in step k before the row start */
inline static int
svd_low_adaptiv_prevpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int p_pa = p / arg->p / a;
ipiv+=2;
for( i=start_pa+1; i<ldd; i++ )
if ( ipiv[i] == p_pa )
return i;
return i;
}
/* Return the next row which will use the row m as a pivot in step k after it has been used by row start */
inline static int
svd_low_adaptiv_nextpiv( const hqr_subpiv_t *arg,
int k, int p, int start_pa )
{
int i;
int *ipiv = arg->ipiv + (p%arg->p * arg->minMN + k ) * arg->ldd;
int a = ipiv[0];
int ldd = ipiv[1];
int pa = arg->p * a;
int k_a = (k + arg->p - 1 - p%(arg->p)) / arg->p / a;
int p_pa = p / pa;
ipiv+=2;
for( i=start_pa-1; i> k_a; i-- )
if ( ipiv[i] == p_pa )
return i;
return ldd;
}
void
svd_low_adaptiv_init( hqr_subpiv_t *arg,
int gmt, int gnt, int nbcores, int ratio )
{
int *ipiv;
int mt, a, p, pa, maxmt, myrank;
int j, k, height, start, end, nT, nZ;
int minMN = libhqr_imin(gmt, gnt);
arg->currpiv = svd_low_adaptiv_currpiv;
arg->nextpiv = svd_low_adaptiv_nextpiv;
arg->prevpiv = svd_low_adaptiv_prevpiv;
p = arg->p;
end = 0;
/**
* Compute the local greedy tree of each column, on each node
*/
maxmt = 1;
for(k=0; k<minMN; k++) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* ((gmt-k) / p / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
/* Compute max dimension of the tree */
mt = libhqr_iceil( gmt, p * a );
maxmt = libhqr_imax( mt, maxmt );
}
arg->ldd = maxmt + 2;
arg->ipiv = (int*)calloc( arg->ldd * minMN * p, sizeof(int) );
ipiv = arg->ipiv;
for ( myrank=0; myrank<p; myrank++ ) {
/**
* Compute the local greedy tree of each column, on each node
*/
for(k=0; k<minMN; k++, ipiv += arg->ldd) {
/**
* The objective is to have at least two columns of TS to reduce per
* core, so it must answer the following inequality:
* (ldd / a ) * (gnt-k) >= ( ratio * nbcores );
* so,
* a <= mt * (gnt-k) / (ratio * nbcores )
*/
height = libhqr_iceil( gmt-k, p );
a = libhqr_imax( height * (gnt-k) / (ratio * nbcores), 1 );
/* Now let's make sure all sub-parts are equilibrate */
j = libhqr_iceil( height, a );
a = libhqr_iceil( gmt-k, j );
pa = p * a;
mt = libhqr_iceil( gmt, pa );
ipiv[0] = a;
ipiv[1] = mt;
assert( a > 0 );
assert( mt < arg->ldd-1 );
/* Number of tiles to factorized in this column on this rank */
nT = libhqr_imax( mt - ((k + p - 1 - myrank) / pa), 0 );
/* Number of tiles already killed */
nZ = 0;
assert( nT <= mt );
/* No more computations on this node */
if ( nT == 0 ) {
continue;
}
while( nZ < (nT-1) ) {
height = (nT - nZ) / 2;
start = mt - nZ - 1;
end = start - height;
nZ += height;
for( j=start; j > end; j-- ) {
ipiv[ j+2 ] = (j - height);
}
}
assert( nZ+1 == nT );
}
}
#if 0
{
int m, k;
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
printf( "%3d ", ipiv[k*(arg->ldd) + m] );
}
printf("\n");
}
}
if (!arg->domino) {
int m, k, myrank;
for ( myrank=1; myrank<p; myrank++ ) {
ipiv += arg->ldd * minMN;
printf("-------- rank %d ---------\n", myrank );
for(m=0; m<mt; m++) {
printf("%3d | ", m);
for (k=0; k<minMN; k++) {
int k_a = (k + p - 1 - myrank) / p / a;
if ( m >= k_a )
printf( "%3d ", ipiv[k * arg->ldd + m] );
else
printf( "--- " );
}
printf("\n");
}
}
}
#endif
}