Commit 8b5b646d authored by COULAUD Olivier's avatar COULAUD Olivier

Merge branch 'master' of git+ssh://scm.gforge.inria.fr//gitroot/scalfmm/scalfmm

# By Berenger Bramas (50) and others
# Via Berenger Bramas (5) and others
* 'master' of git+ssh://scm.gforge.inria.fr//gitroot/scalfmm/scalfmm: (83 commits)
  oups forgot the str length in snprintf
  update the task name to use simgrid
  update the task name to use simgrid
  another fix for simgrid data alloc
  indent
  fixing avoid alloc for simgrid
  forgot to use also config for the simgrid no alloc
  update prio v2
  avoid alloc for simgrid
  hmat: fix installation of hmat addon (headers are copied in includes/ScalFmm/HMat/Src/.../*.hpp or includes/ScalFmm/HMat/CClusteringLibrary/*.h); Still need to fix pb when including cluster.h.
  hmat: Implement interface for kmeans and kmedians; Provided test in testCCLKCluster.
  Clean up some routines that are not used anymore in blas interface and UnifKernel.
  update starpu mpi rotation test
  update cuda M2L MPI
  use new prio and update starpu mpi
  add simple prio to avoid priority transfer
  hmat: minor fixes in datafile names and update tests.
  update wrappers
  remove warning from FPoint
  add a test to get best performance on cpu and gpu
  ...
parents 7e3239d4 a5616c5d
......@@ -51,11 +51,16 @@ if(SCALFMM_ADDON_HMAT)
# Install lib
install( TARGETS scalfmmhmat ARCHIVE DESTINATION lib )
# Install header
SET(my_include_dirs "Src")
# Install headers
SET(my_include_dirs "Src/Blocks" "Src/Clustering" "Src/Containers" "Src/Utils" "Src/Viewers" "CClusteringLibrary")
file( GLOB hpp_in_dir Src/*.hpp Src/*.hpp)
INSTALL( FILES ${hpp_in_dir} DESTINATION include/ScalFmm/HMat )
FOREACH(my_dir ${my_include_dirs})
file(GLOB
hpp_in_dir
${my_dir}/*.hpp ${my_dir}/*.h
)
INSTALL( FILES ${hpp_in_dir} DESTINATION include/ScalFmm/HMat/${my_dir} )
ENDFOREACH()
# Add C Clustering Library
file( GLOB_RECURSE ccl_lib_files CClusteringLibrary/*.c )
......@@ -64,6 +69,7 @@ if(SCALFMM_ADDON_HMAT)
target_link_libraries( cclusteringlib scalfmm)
install( TARGETS cclusteringlib ARCHIVE DESTINATION lib )
# Tests
file( GLOB_RECURSE source_tests_files Tests/*.cpp )
INCLUDE_DIRECTORIES( ${SCALFMM_BINARY_DIR}/Src )
......
......@@ -28,7 +28,7 @@
//#define PARTIALLY_PIVOTED_ACA
/*! Choose \a RECOMPRESSED_ACA */
//#define RECOMPRESSED_ACA
#define RECOMPRESSED_ACA
template <class FReal, int ORDER = 14>
......@@ -48,7 +48,7 @@ protected:
public:
FACABlock()
: block(nullptr), U(nullptr), VT(nullptr), nbRows(0), nbCols(0), level(0), rank(0), accuracy(FMath::pow(10.0,static_cast<FReal>(-ORDER))) {
: block(nullptr), U(nullptr), VT(nullptr), nbRows(0), nbCols(0), level(0), rank(0), accuracy(FMath::pow(FReal(10.0),static_cast<FReal>(-ORDER))) {
}
// ctor
......
......@@ -55,6 +55,17 @@ public:
}
};
void resize(const int inNbRow, const int inNbCol){
if(inNbRow != nbRows ||
inNbCol != nbCols){
clear();
nbRows = inNbRow;
nbCols = inNbCol;
block = new FReal[nbRows*nbCols];
}
memset(block, 0, sizeof(FReal)*nbRows*nbCols);
}
// dtor
~FDenseBlock(){
// Free memory
......@@ -69,6 +80,26 @@ public:
block = 0;
}
int getNbRows() const{
return nbRows;
}
int getNbCols() const{
return nbCols;
}
FReal getValue(const int idxRow, const int idxCol) const{
return block[idxCol*nbRows+idxRow];
}
FReal& getValue(const int idxRow, const int idxCol) {
return block[idxCol*nbRows+idxRow];
}
void setValue(const int idxRow, const int idxCol, const FReal& val) {
block[idxCol*nbRows+idxRow] = val;
}
void gemv(FReal res[], const FReal vec[], const FReal scale = FReal(1.)) const {
FBlas::gemva(nbRows, nbCols, scale, const_cast<FReal*>(block), const_cast<FReal*>(vec), res);
}
......
......@@ -110,7 +110,7 @@ protected:
public:
FSVDBlock()
: block(nullptr), U(nullptr), S(nullptr), VT(nullptr), nbRows(0), nbCols(0), level(0), rank(0), accuracy(FMath::pow(10.0,static_cast<FReal>(-ORDER))) {
: block(nullptr), U(nullptr), S(nullptr), VT(nullptr), nbRows(0), nbCols(0), level(0), rank(0), accuracy(FMath::pow(FReal(10.0),static_cast<FReal>(-ORDER))) {
}
// ctor
......
// @SCALFMM_PRIVATE
#ifndef FCCLKCLUSTER_HPP
#define FCCLKCLUSTER_HPP
#include "./Utils/FGlobal.hpp"
#include "./Utils/FAssert.hpp"
#include "./Utils/FMath.hpp"
#include "../Utils/FHUtils.hpp"
#include <stack>
#include <vector>
#include <functional>
#include <queue>
#include <limits>
#include <algorithm>
extern "C" {
#include <cluster.h>
}
namespace CCL {
enum ClusterCenterMethod {
CCL_CCM_ARITHMETIC_MEAN,
CCL_CCM_MEDIAN,
CCL_CCM_DUMMY
};
inline char ClusterCenterMethodToChar(const ClusterCenterMethod method){
switch (method) {
case CCL_CCM_ARITHMETIC_MEAN:
return 'a';
break;
case CCL_CCM_MEDIAN:
return 'm';
break;
default:
break;
}
return '?';
}
enum Distance {
CCL_DIST_MEAN,
CCL_DIST_MEDIAN,
CCL_DIST_SHORTEST,
CCL_DIST_LONGEST,
CCL_DIST_AVG,
CCL_DIST_DUMMY
};
inline char DistanceToChar(const Distance method){
switch (method) {
case CCL_DIST_MEAN:
return 'a';
break;
case CCL_DIST_MEDIAN:
return 'm';
break;
case CCL_DIST_SHORTEST:
return 's';
break;
case CCL_DIST_LONGEST:
return 'x';
break;
case CCL_DIST_AVG:
return 'v';
break;
default:
break;
}
return '?';
}
}
template <class FReal>
class FCCLKCluster {
protected:
const int nbPartitions;
const int nbElements;
const int nbDim;
const int nbPass; //< Number of call to EM algorithm
CCL::ClusterCenterMethod method;
CCL::Distance distance;
int* partitions;
public:
FCCLKCluster(const int inNbPartitions, const int inNbElements, const FReal inDistMat[], const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(0), nbPass(inNbPass), method(CCL::CCL_CCM_DUMMY), distance(CCL::CCL_DIST_DUMMY), partitions(nullptr) {
double** distMatPtrs = new double*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
distMatPtrs[idxRow] = new double[idxRow+1];
for(int idxCol = 0 ; idxCol <= idxRow ; ++idxCol){
distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[nbElements];
// Errors
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[nbElements];
kmedoids (nbPartitions, nbElements, distMatPtrs, nbPass, partitions, error, ifound);
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] distMatPtrs[idxRow];
}
delete[] distMatPtrs;
}
FCCLKCluster(const int inNbPartitions, const int inNbElements, const int inNbDim, const FReal inDataMat[], const CCL::ClusterCenterMethod inMethod, const CCL::Distance inDistance, const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(inNbDim), nbPass(inNbPass), method(inMethod), distance(inDistance), partitions(nullptr) {
double** dataMatPtrs = new double*[nbElements];
int** mask = new int*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
mask[idxRow] = new int[idxRow+1];
dataMatPtrs[idxRow] = new double[nbDim];
for(int idxCol = 0 ; idxCol < nbDim ; ++idxCol){
mask[idxRow][idxCol] = 1;
dataMatPtrs[idxRow][idxCol] = double(inDataMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[nbElements];
// Errors
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[nbElements];
// Weights
double* weights = new double[nbElements];
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow)
weights[idxRow]=double(1.0);
kcluster(nbPartitions, nbElements, nbDim, dataMatPtrs, mask, weights, 0, nbPass, ClusterCenterMethodToChar(method), DistanceToChar(distance), partitions, error, ifound);
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] mask[idxRow];
delete[] dataMatPtrs[idxRow];
}
delete[] mask;
delete[] dataMatPtrs;
}
~FCCLKCluster(){
delete[] partitions;
}
int getPartitions(const int inNbPartitions, int inNbIdxInPartitions[]) const{
/// Map partitions to 0.. nbPartitions
FAssertLF(inNbPartitions == nbPartitions);
// Copy partitions
int* sortedPartitions = new int[nbElements];
for(int idx = 0 ; idx < nbElements ; ++idx){
sortedPartitions[idx]=partitions[idx];
}
// sort partitions
std::sort(sortedPartitions,sortedPartitions+nbElements);
// Map partitions to 0..nbPartitions
int counterPartition=0;
int* mapPartitions = new int[inNbPartitions];
int currentPartition=sortedPartitions[0];
for(int idx = 0 ; idx < nbElements ; ++idx){
mapPartitions[counterPartition]=currentPartition;
if(sortedPartitions[idx+1]!=currentPartition){
currentPartition=sortedPartitions[idx+1];
++counterPartition;
}
}
FAssertLF(counterPartition == inNbPartitions);
/// Count particles in each partition
int totalGiven = 0;
for(int idxPartition = 0 ; idxPartition < inNbPartitions ; ++idxPartition){
inNbIdxInPartitions[idxPartition]=0;
for(int idx = 0 ; idx < nbElements ; ++idx){
if(partitions[idx]==mapPartitions[idxPartition])
inNbIdxInPartitions[idxPartition]+=1;
}
totalGiven +=inNbIdxInPartitions[idxPartition];
}
FAssertLF(totalGiven == nbElements);
return 0; // no empty partition in kclusters/kmedoids algorithms
}
};
#endif // FCCLKCLUSTER_HPP
#ifndef FCONNEXCLUSTERING_HPP
#define FCONNEXCLUSTERING_HPP
// @SCALFMM_PRIVATE
#include "./Utils/FGlobal.hpp"
#include "./Utils/FAssert.hpp"
#include "./Utils/FMath.hpp"
#include "./Containers/FBoolArray.hpp"
#include "FClusterTree.hpp"
#include <stack>
#include <vector>
#include <functional>
#include <queue>
#include <limits>
#include <memory>
template <class FReal>
class FConnexClustering {
protected:
const int dim;
int* permsNewToOrig;
int* permsOrigToNew;
int* partitions;
int* partitionsOffset;
int nbPartitions;
public:
FConnexClustering(const int inDim, const FReal inDistMat[], const FReal thresh)
: dim(inDim),
permsNewToOrig(new int[dim]),
permsOrigToNew(new int[dim]),
partitions(new int[dim]),
partitionsOffset(new int[dim+1]),
nbPartitions (0){
std::unique_ptr<int[]> partitionsMappin(new int[dim]);
for(int idx = 0 ; idx < dim ; ++idx){
partitionsMappin[idx] = -1;
}
partitionsOffset[0] = 0;
partitions[0] = 0;
for(int idx = 0 ; idx < dim ; ++idx){
if(partitionsMappin[idx] == -1){
FAssertLF(nbPartitions < dim);
partitionsOffset[nbPartitions+1] = partitionsOffset[nbPartitions]+1;
partitionsMappin[idx] = nbPartitions;
int idxPartitionElement = partitionsOffset[nbPartitions];
permsNewToOrig[idxPartitionElement] = idx;
while(idxPartitionElement < partitionsOffset[nbPartitions+1]){
FAssertLF(idxPartitionElement < dim);
for(int idxOther = 0 ; idxOther < dim ; ++idxOther){
if(partitionsMappin[idxOther] == -1
&& inDistMat[permsNewToOrig[idxPartitionElement]*dim + idxOther] < thresh){
partitionsMappin[idxOther] = nbPartitions;
permsNewToOrig[partitionsOffset[nbPartitions+1]] = idxOther;
permsOrigToNew[idxOther] = partitionsOffset[nbPartitions+1];
partitionsOffset[nbPartitions+1] += 1;
FAssertLF(partitionsOffset[nbPartitions+1] <= dim);
}
}
idxPartitionElement += 1;
}
partitions[nbPartitions] = partitionsOffset[nbPartitions+1]-partitionsOffset[nbPartitions];
nbPartitions += 1;
}
}
FAssertLF(partitionsOffset[nbPartitions] == dim);
}
~FConnexClustering(){
delete[] permsNewToOrig;
delete[] permsOrigToNew;
delete[] partitionsOffset;
delete[] partitions;
}
void fillPermutations(int* inPermuts, int* invPermuts = nullptr) const {
memcpy(inPermuts, permsOrigToNew, sizeof(int)*dim);
if(invPermuts){
memcpy(invPermuts, permsNewToOrig, sizeof(int)*dim);
}
}
int getNbPartitions() const{
return nbPartitions;
}
void getPartitions(const int inNbPartitions, int inNbIdxInPartitions[]) const{
FAssertLF(nbPartitions == inNbPartitions);
memcpy(inNbIdxInPartitions, partitions, sizeof(int)*nbPartitions);
}
};
#endif // FCONNEXCLUSTERING_HPP
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FBLOCKPMAPPING_HPP
#define FBLOCKPMAPPING_HPP
// @SCALFMM_PRIVATE
#include "Utils/FGlobal.hpp"
#include "Utils/FMath.hpp"
#include "Utils/FAssert.hpp"
#include "../Utils/FHUtils.hpp"
#include <functional>
#include <memory>
template <class FReal, class CellClass >
class FBlockPMapping {
protected:
struct CellCNode {
FBlockDescriptor infos;
CellClass cell;
};
struct RowUNode {
FBlockDescriptor infos;
CellClass cell;
};
struct ColVNode {
FBlockDescriptor infos;
CellClass cell;
};
const int dim;
const int nbPartitions;
const int nbCells;
CellCNode* cBlocks;
RowUNode* uRowBlocks;
ColVNode* vColBlocks;
FBlockPMapping(const FBlockPMapping&) = delete;
FBlockPMapping& operator=(const FBlockPMapping&) = delete;
public:
explicit FBlockPMapping(const int inDim, const int partitions[], const int inNbPartitions)
: dim(inDim),
nbPartitions(inNbPartitions),
nbCells(inNbPartitions*inNbPartitions),
cBlocks(nullptr){
FAssertLF(nbPartitions <= inDim);
FAssertLF(1 <= nbPartitions);
std::unique_ptr<int[]> partitionsOffset(new int[nbPartitions]);
partitionsOffset[0] = 0;
for(int idxPart = 1 ; idxPart < nbPartitions ; ++idxPart){
partitionsOffset[idxPart] = partitionsOffset[idxPart-1] + partitions[idxPart-1];
}
cBlocks = new CellCNode[nbCells];
for(int idxPartCol = 0 ; idxPartCol < nbPartitions ; ++idxPartCol){
for(int idxPartRow = 0 ; idxPartRow < nbPartitions ; ++idxPartRow){
cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.row = partitionsOffset[idxPartRow];
cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.col = partitionsOffset[idxPartCol];
cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.nbRows = partitions[idxPartRow];
cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.nbCols = partitions[idxPartCol];
cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.level = 0;
}
}
uRowBlocks = new RowUNode[nbPartitions];
for(int idxPartRow = 0 ; idxPartRow < nbPartitions ; ++idxPartRow){
uRowBlocks[idxPartRow].infos.row = partitionsOffset[idxPartRow];
uRowBlocks[idxPartRow].infos.col = 0;
uRowBlocks[idxPartRow].infos.nbRows = partitions[idxPartRow];
uRowBlocks[idxPartRow].infos.nbCols = dim;
uRowBlocks[idxPartRow].infos.level = 0;
}
vColBlocks = new ColVNode[nbPartitions];
for(int idxPartCol = 0 ; idxPartCol < nbPartitions ; ++idxPartCol){
vColBlocks[idxPartCol].infos.row = 0;
vColBlocks[idxPartCol].infos.col = partitionsOffset[idxPartCol];
vColBlocks[idxPartCol].infos.nbRows = dim;
vColBlocks[idxPartCol].infos.nbCols = partitions[idxPartCol];
vColBlocks[idxPartCol].infos.level = 0;
}
}
~FBlockPMapping(){
delete[] cBlocks;
delete[] uRowBlocks;
delete[] vColBlocks;
}
int getNbBlocks() const {
return nbCells;
}
// Iterate blocks
CellClass& getCBlock(const int idxRowPart, const int idxColPart){
return cBlocks[idxColPart*nbPartitions + idxRowPart].cell;
}
const CellClass& getCBlock(const int idxRowPart, const int idxColPart) const {
return cBlocks[idxColPart*nbPartitions + idxRowPart].cell;
}
const FBlockDescriptor& getCBlockInfo(const int idxRowPart, const int idxColPart) const {
return cBlocks[idxColPart*nbPartitions + idxRowPart].infos;
}
void forAllCBlocksDescriptor(std::function<void(const FBlockDescriptor&)> callback){
for(int idxCell = 0 ; idxCell < nbCells ; ++idxCell){
callback(cBlocks[idxCell].infos);
}
}
void forAllBlocks(std::function<void(const FBlockDescriptor&,
CellClass&, CellClass&, CellClass&)> callback){
for(int idxPartCol = 0 ; idxPartCol < nbPartitions ; ++idxPartCol){
for(int idxPartRow = 0 ; idxPartRow < nbPartitions ; ++idxPartRow){
callback(cBlocks[idxPartCol*nbPartitions + idxPartRow].infos,
cBlocks[idxPartCol*nbPartitions + idxPartRow].cell,
uRowBlocks[idxPartRow].cell,
vColBlocks[idxPartCol].cell);
}
}
}
// Iterate row blocks
CellClass& getUBlock(const int idxRowPart){
return uRowBlocks[idxRowPart].cell;
}
const CellClass& getUBlock(const int idxRowPart) const {
return uRowBlocks[idxRowPart].cell;
}
const FBlockDescriptor& getUBlockInfo(const int idxRowPart) const {
return uRowBlocks[idxRowPart].infos;
}
// Iterate col blocks
CellClass& getVBlock(const int idxColPart){
return vColBlocks[idxColPart].cell;
}
const CellClass& getVBlock(const int idxColPart) const {
return vColBlocks[idxColPart].cell;
}
const FBlockDescriptor& getVBlockInfo(const int idxColPart) const {
return vColBlocks[idxColPart].infos;
}
// Operations
void gemv(FReal res[], const FReal vec[]) const {
for(int idxPartCol = 0 ; idxPartCol < nbPartitions ; ++idxPartCol){
for(int idxPartRow = 0 ; idxPartRow < nbPartitions ; ++idxPartRow){
// &res[cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.row],
// &vec[cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.col])
// cBlocks[idxPartCol*nbPartitions + idxPartRow].cell,
// uRowBlocks[idxPartRow].cell,
// vColBlocks[idxPartCol].cell;
}
}
}
void gemm(FReal res[], const FReal mat[], const int nbRhs) const {
for(int idxPartCol = 0 ; idxPartCol < nbPartitions ; ++idxPartCol){
for(int idxPartRow = 0 ; idxPartRow < nbPartitions ; ++idxPartRow){
// &res[cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.row],
// &vec[cBlocks[idxPartCol*nbPartitions + idxPartRow].infos.col])
// cBlocks[idxPartCol*nbPartitions + idxPartRow].cell,
// uRowBlocks[idxPartRow].cell,
// vColBlocks[idxPartCol].cell;
// nbRhs, dim
}
}
}
};
#endif // FBLOCKPMAPPING_HPP
......@@ -87,6 +87,11 @@ public:
FAssertLF(colIdx + nbRows <= matDim);
return FDenseBlockWrapper<FReal>(&values[colIdx*matDim+rowIdx], nbRows, nbCols, matDim);
}
FDenseBlockWrapper<FReal> getBlock(const FBlockDescriptor& info) const {
return getBlock(info.row, info.col, info.nbRows, info.nbCols);
}
};
#endif // FMATDENSE_HPP
......
......@@ -99,6 +99,10 @@ public:
FAssertLF(colIdx + nbCols <= matDim);
return FDenseBlockPermWrapper<FReal, FMatDensePerm<FReal> >(*this, rowIdx, colIdx, nbRows, nbCols);
}
FDenseBlockPermWrapper<FReal, FMatDensePerm<FReal> > getBlock(const FBlockDescriptor& info) const {
return getBlock(info.row, info.col, info.nbRows, info.nbCols);
}
};
#endif // FMATDENSEPERM_HPP
......