Attention une mise à jour du service Gitlab va être effectuée le mardi 18 janvier (et non lundi 17 comme annoncé précédemment) entre 18h00 et 18h30. Cette mise à jour va générer une interruption du service dont nous ne maîtrisons pas complètement la durée mais qui ne devrait pas excéder quelques minutes.

Commit c75d6652 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
Browse files

hmat: Implement interface for kmeans and kmedians; Provided test in testCCLKCluster.

parent d058572a
......@@ -23,7 +23,8 @@ extern "C" {
namespace CCL {
enum ClusterCenterMethod {
CCL_CCM_ARITHMETIC_MEAN,
CCL_CCM_MEDIAN
CCL_CCM_MEDIAN,
CCL_CCM_DUMMY
};
inline char ClusterCenterMethodToChar(const ClusterCenterMethod method){
......@@ -45,7 +46,8 @@ namespace CCL {
CCL_DIST_MEDIAN,
CCL_DIST_SHORTEST,
CCL_DIST_LONGEST,
CCL_DIST_AVG
CCL_DIST_AVG,
CCL_DIST_DUMMY
};
inline char DistanceToChar(const Distance method){
......@@ -67,7 +69,7 @@ namespace CCL {
break;
default:
break;
}
}
return '?';
}
}
......@@ -77,40 +79,82 @@ template <class FReal>
class FCCLKCluster {
protected:
const int nbPartitions;
const int dim;
const int nbElements;
const int nbDim;
const int nbPass; //< Number of call to EM algorithm
CCL::ClusterCenterMethod method;
CCL::Distance distance;
int* partitions;
public:
FCCLKCluster(const int inNbPartitions, const int inDim, const FReal inDistMat[], const CCL::ClusterCenterMethod inMethod)
: nbPartitions(inNbPartitions), dim(inDim), method(inMethod), partitions(nullptr) {
FCCLKCluster(const int inNbPartitions, const int inNbElements, const FReal inDistMat[], const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(0), nbPass(inNbPass), method(CCL::CCL_CCM_DUMMY), distance(CCL::CCL_DIST_DUMMY), partitions(nullptr) {
double** distMatPtrs = new double*[dim];
double** distMatPtrs = new double*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < dim ; ++idxRow){
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
distMatPtrs[idxRow] = new double[idxRow+1];
for(int idxCol = 0 ; idxCol <= idxRow ; ++idxCol){
distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*dim + idxRow]);
distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[dim];
partitions = new int[nbElements];
// Number of call to EM algorithm
const int nbPass = 1;
// Errors
double* error = new double[dim];
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[dim];
int* ifound = new int[nbElements];
kmedoids (nbPartitions, dim, distMatPtrs, nbPass, partitions, error, ifound);
kmedoids (nbPartitions, nbElements, distMatPtrs, nbPass, partitions, error, ifound);
for(int idxRow = 0 ; idxRow < dim ; ++idxRow){
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] distMatPtrs[idxRow];
}
delete[] distMatPtrs;
}
FCCLKCluster(const int inNbPartitions, const int inNbElements, const int inNbDim, const FReal inDataMat[], const CCL::ClusterCenterMethod inMethod, const CCL::Distance inDistance, const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(inNbDim), nbPass(inNbPass), method(inMethod), distance(inDistance), partitions(nullptr) {
double** dataMatPtrs = new double*[nbElements];
int** mask = new int*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
mask[idxRow] = new int[idxRow+1];
dataMatPtrs[idxRow] = new double[nbDim];
for(int idxCol = 0 ; idxCol < nbDim ; ++idxCol){
mask[idxRow][idxCol] = 1;
dataMatPtrs[idxRow][idxCol] = double(inDataMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[nbElements];
// Errors
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[nbElements];
// Weights
double* weights = new double[nbElements];
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow)
weights[idxRow]=double(1.0);
kcluster(nbPartitions, nbElements, nbDim, dataMatPtrs, mask, weights, 0, nbPass, ClusterCenterMethodToChar(method), DistanceToChar(distance), partitions, error, ifound);
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] mask[idxRow];
delete[] dataMatPtrs[idxRow];
}
delete[] mask;
delete[] dataMatPtrs;
}
~FCCLKCluster(){
......@@ -123,18 +167,18 @@ public:
FAssertLF(inNbPartitions == nbPartitions);
// Copy partitions
int* sortedPartitions = new int[dim];
for(int idx = 0 ; idx < dim ; ++idx){
int* sortedPartitions = new int[nbElements];
for(int idx = 0 ; idx < nbElements ; ++idx){
sortedPartitions[idx]=partitions[idx];
}
// sort partitions
std::sort(sortedPartitions,sortedPartitions+dim);
std::sort(sortedPartitions,sortedPartitions+nbElements);
// Map partitions to 0..nbPartitions
int counterPartition=0;
int* mapPartitions = new int[inNbPartitions];
int currentPartition=sortedPartitions[0];
for(int idx = 0 ; idx < dim ; ++idx){
for(int idx = 0 ; idx < nbElements ; ++idx){
mapPartitions[counterPartition]=currentPartition;
if(sortedPartitions[idx+1]!=currentPartition){
currentPartition=sortedPartitions[idx+1];
......@@ -149,7 +193,7 @@ public:
inNbIdxInPartitions[idxPartition]=0;
for(int idx = 0 ; idx < dim ; ++idx){
for(int idx = 0 ; idx < nbElements ; ++idx){
if(partitions[idx]==mapPartitions[idxPartition])
inNbIdxInPartitions[idxPartition]+=1;
......@@ -157,7 +201,7 @@ public:
}
totalGiven +=inNbIdxInPartitions[idxPartition];
}
FAssertLF(totalGiven == dim);
FAssertLF(totalGiven == nbElements);
return 0; // no empty partition in kclusters/kmedoids algorithms
}
......
// @SCALFMM_PRIVATE
// HMAT specific includes
#include "../Src/Clustering/FCCLKCluster.hpp"
#include "../Src/Utils/FMatrixIO.hpp"
......@@ -9,6 +10,10 @@
#include "../Src/Viewers/FDenseBlockWrapper.hpp"
#include "../Src/Blocks/FDenseBlock.hpp"
// ScalFMM's include
#include "Files/FFmaGenericLoader.hpp" // load grid
#include "Utils/FGlobal.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FParameters.hpp"
#include "Utils/FParameterNames.hpp"
......@@ -19,84 +24,216 @@ int main(int argc, char** argv){
{"-fout", "--out", "-out"} ,
"Svg output directory."
};
static const FParameterNames ElemParam = {
{"-nE", "-nbElements", "-elem"} ,
"Nbof elements to partition."
};
static const FParameterNames DimParam = {
{"-N", "-nb", "-dim"} ,
"Dim of the matrix."
{"-nD", "-nbDim", "-dim"} ,
"Dimension of ambiant space."
};
static const FParameterNames NbPartitionsParam = {
{"-P", "-nbPartitions", "-nbClusters"} ,
{"-Part", "-nbPartitions", "-nbClusters"} ,
"Dim of the matrix."
};
static const FParameterNames NbPassesParam = {
{"-Pass", "-nbPasses", "-nbPass"} ,
"Dim of the matrix."
};
//FHelpDescribeAndExit(argc, argv,"Test the k-means, k-medians and k-medoids algorithms from CCL (using 2 methods for distance computation) on 3D grid (unitCube).",SvgOutParam,ElemParam,DimParam,NbPartitionsParam,NbPassesParam,FParameterDefinitions::InputFile);
FHelpDescribeAndExit(argc, argv,"Test the k-medoid algorithm from CCL (using 2 methods for cluster center computation).",SvgOutParam,DimParam,NbPartitionsParam,FParameterDefinitions::InputFile);
////////////////////////////////////////////////////////////////////
/// Command line examples
// Partition the unitCube with 1000 particles in 8 partitions (use 5 iterations of EM algorithm)
// ./Addons/HMat/testCCLKCluster -binin -nE 1000 -d unitCube -Part 8 -Pass 5
// > Results show that k-medoids seems to work better for unitCube1000 (use fewer pass, but still more costly)
//
//
////////////////////////////////////////////////////////////////////
/// Parameters
// size of the grid
const int nbElements = FParameters::getValue(argc,argv,"-nE", 1000);
const char* filename = FParameters::getStr(argc, argv, FParameterDefinitions::InputFile.options, "../Addons/HMat/Data/unitCube1000.bin");
// size of the grid
const int nbDim = FParameters::getValue(argc,argv,"-nD", 3);
typedef float FReal;
// Define number of partitions
const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4);
// Define number of passes
const int nbPasses = FParameters::getValue(argc, argv, NbPassesParam.options, 5);
// Precision
typedef double FReal;
if(sizeof(FReal)==4)
std::cout<< "Precision: Single Float" <<std::endl;
if(sizeof(FReal)==8)
std::cout<< "Precision: Double Float" <<std::endl;
////////////////////////////////////////////////////////////////////
/// Files and Directories
// Output directory (SVG vizualisation of partitions)
const char* outputdir = FParameters::getStr(argc, argv, SvgOutParam.options, "/tmp/");
// Data path
const std::string ioPath = FParameters::getStr(argc,argv,"-path", std::string("../Addons/HMat/Data/").c_str());
int readNbRows = 0;
int readNbCols = 0;
// Read distances
FReal* distances = nullptr;
FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols));
FAssertLF(readNbRows == readNbCols);
const int dim = readNbRows;
// Geometry
const std::string geometryName(FParameters::getStr(argc,argv,"-d", "unitCube"));
// Define number of partitions
const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4);
// Test Cluster Center Method ARITHMETIC_MEAN
// Distribution
std::ostringstream oss_nbElements; oss_nbElements << nbElements;
const std::string distributionName(geometryName + oss_nbElements.str());
// Compute partitions from distance matrix
{
FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_ARITHMETIC_MEAN);
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
const char* filename = (ioPath + distributionName + ".bin").c_str();
std::cout << "Distance matrix read from file:" << filename << std::endl;
int readNbRows = 0;
int readNbCols = 0;
// Read distances
FReal* distances = nullptr;
FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols));
FAssertLF(readNbRows == readNbCols);
FAssertLF(readNbRows == nbElements);
// Test Cluster Center Method K-MEDOIDS
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
FTic timeKMedoids;
double tKMedoids;
timeKMedoids.tic();
GridClass bissection(dim, partitions.get(), nbPartitions);
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, distances, nbPasses);
char svgName[1024];
sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_ARITHMETIC_MEAN", nbPartitions);
FSvgRect output(svgName, dim);
std::cout << "\tSave svg to " << svgName << "\n";
tKMedoids = timeKMedoids.tacAndElapsed();
std::cout << "... took @tKMedoids = "<< tKMedoids <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDOIDS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
// Test Cluster Center Method MEDIAN
// Compute partitions from data matrix (i.e. grid)
{
FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_MEDIAN);
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
// Read geometry
std::string distributionFileName = ioPath + distributionName;
if( FParameters::existParameter(argc, argv, FParameterDefinitions::InputBinFormat.options)){
distributionFileName += ".bfma";
}
else {
distributionFileName += ".fma";
}
std::cout << "Data matrix read from file:" << distributionFileName << std::endl;
// open particle file
FFmaGenericLoader<FReal> loader(distributionFileName.c_str());
if(!loader.isOpen()) throw std::runtime_error("Particle distribution file couldn't be opened!");
////////////////////////////////////////////////////////////////////
/// Load grid from distribution file
//FPoint<FReal>* grid = new FPoint<FReal>[nbElements];
FReal* grid = new FReal[nbElements*nbDim];
for(FSize idxPart = 0 ; idxPart < nbElements ; ++idxPart){
FPoint<FReal> position;
FReal physicalValue = 0.0;
loader.fillParticle(&position,&physicalValue);
//grid[idxPart]=position;
grid[idxPart+0*nbElements]=position.getX();
grid[idxPart+1*nbElements]=position.getY();
grid[idxPart+2*nbElements]=position.getZ();
}
// Test Cluster Center Method K-MEANS
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
FTic timeKMeans;
double tKMeans;
timeKMeans.tic();
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_ARITHMETIC_MEAN, CCL::CCL_DIST_MEAN, nbPasses);
tKMeans = timeKMeans.tacAndElapsed();
std::cout << "... took @tKMeans = "<< tKMeans <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
GridClass bissection(dim, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEANS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
char svgName[1024];
sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_MEDIAN", nbPartitions);
FSvgRect output(svgName, dim);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
// Test Cluster Center Method K-MEDIANS
{
FTic timeKMedians;
double tKMedians;
timeKMedians.tic();
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_MEDIAN, CCL::CCL_DIST_MEAN, nbPasses);
tKMedians = timeKMedians.tacAndElapsed();
std::cout << "... took @tKMedians = "<< tKMedians <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDIANS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment