Commit c75d6652 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
Browse files

hmat: Implement interface for kmeans and kmedians; Provided test in testCCLKCluster.

parent d058572a
......@@ -23,7 +23,8 @@ extern "C" {
namespace CCL {
enum ClusterCenterMethod {
CCL_CCM_ARITHMETIC_MEAN,
CCL_CCM_MEDIAN
CCL_CCM_MEDIAN,
CCL_CCM_DUMMY
};
inline char ClusterCenterMethodToChar(const ClusterCenterMethod method){
......@@ -45,7 +46,8 @@ namespace CCL {
CCL_DIST_MEDIAN,
CCL_DIST_SHORTEST,
CCL_DIST_LONGEST,
CCL_DIST_AVG
CCL_DIST_AVG,
CCL_DIST_DUMMY
};
inline char DistanceToChar(const Distance method){
......@@ -67,7 +69,7 @@ namespace CCL {
break;
default:
break;
}
}
return '?';
}
}
......@@ -77,40 +79,82 @@ template <class FReal>
class FCCLKCluster {
protected:
const int nbPartitions;
const int dim;
const int nbElements;
const int nbDim;
const int nbPass; //< Number of call to EM algorithm
CCL::ClusterCenterMethod method;
CCL::Distance distance;
int* partitions;
public:
FCCLKCluster(const int inNbPartitions, const int inDim, const FReal inDistMat[], const CCL::ClusterCenterMethod inMethod)
: nbPartitions(inNbPartitions), dim(inDim), method(inMethod), partitions(nullptr) {
FCCLKCluster(const int inNbPartitions, const int inNbElements, const FReal inDistMat[], const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(0), nbPass(inNbPass), method(CCL::CCL_CCM_DUMMY), distance(CCL::CCL_DIST_DUMMY), partitions(nullptr) {
double** distMatPtrs = new double*[dim];
double** distMatPtrs = new double*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < dim ; ++idxRow){
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
distMatPtrs[idxRow] = new double[idxRow+1];
for(int idxCol = 0 ; idxCol <= idxRow ; ++idxCol){
distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*dim + idxRow]);
distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[dim];
partitions = new int[nbElements];
// Number of call to EM algorithm
const int nbPass = 1;
// Errors
double* error = new double[dim];
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[dim];
int* ifound = new int[nbElements];
kmedoids (nbPartitions, dim, distMatPtrs, nbPass, partitions, error, ifound);
kmedoids (nbPartitions, nbElements, distMatPtrs, nbPass, partitions, error, ifound);
for(int idxRow = 0 ; idxRow < dim ; ++idxRow){
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] distMatPtrs[idxRow];
}
delete[] distMatPtrs;
}
FCCLKCluster(const int inNbPartitions, const int inNbElements, const int inNbDim, const FReal inDataMat[], const CCL::ClusterCenterMethod inMethod, const CCL::Distance inDistance, const int inNbPass = 0)
: nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(inNbDim), nbPass(inNbPass), method(inMethod), distance(inDistance), partitions(nullptr) {
double** dataMatPtrs = new double*[nbElements];
int** mask = new int*[nbElements];
// Build mask, everyone is here
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
mask[idxRow] = new int[idxRow+1];
dataMatPtrs[idxRow] = new double[nbDim];
for(int idxCol = 0 ; idxCol < nbDim ; ++idxCol){
mask[idxRow][idxCol] = 1;
dataMatPtrs[idxRow][idxCol] = double(inDataMat[idxCol*nbElements + idxRow]);
}
}
// allocate partitions
partitions = new int[nbElements];
// Errors
double* error = new double[nbElements];
// Nb of times the optimal clustering was found
int* ifound = new int[nbElements];
// Weights
double* weights = new double[nbElements];
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow)
weights[idxRow]=double(1.0);
kcluster(nbPartitions, nbElements, nbDim, dataMatPtrs, mask, weights, 0, nbPass, ClusterCenterMethodToChar(method), DistanceToChar(distance), partitions, error, ifound);
for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){
delete[] mask[idxRow];
delete[] dataMatPtrs[idxRow];
}
delete[] mask;
delete[] dataMatPtrs;
}
~FCCLKCluster(){
......@@ -123,18 +167,18 @@ public:
FAssertLF(inNbPartitions == nbPartitions);
// Copy partitions
int* sortedPartitions = new int[dim];
for(int idx = 0 ; idx < dim ; ++idx){
int* sortedPartitions = new int[nbElements];
for(int idx = 0 ; idx < nbElements ; ++idx){
sortedPartitions[idx]=partitions[idx];
}
// sort partitions
std::sort(sortedPartitions,sortedPartitions+dim);
std::sort(sortedPartitions,sortedPartitions+nbElements);
// Map partitions to 0..nbPartitions
int counterPartition=0;
int* mapPartitions = new int[inNbPartitions];
int currentPartition=sortedPartitions[0];
for(int idx = 0 ; idx < dim ; ++idx){
for(int idx = 0 ; idx < nbElements ; ++idx){
mapPartitions[counterPartition]=currentPartition;
if(sortedPartitions[idx+1]!=currentPartition){
currentPartition=sortedPartitions[idx+1];
......@@ -149,7 +193,7 @@ public:
inNbIdxInPartitions[idxPartition]=0;
for(int idx = 0 ; idx < dim ; ++idx){
for(int idx = 0 ; idx < nbElements ; ++idx){
if(partitions[idx]==mapPartitions[idxPartition])
inNbIdxInPartitions[idxPartition]+=1;
......@@ -157,7 +201,7 @@ public:
}
totalGiven +=inNbIdxInPartitions[idxPartition];
}
FAssertLF(totalGiven == dim);
FAssertLF(totalGiven == nbElements);
return 0; // no empty partition in kclusters/kmedoids algorithms
}
......
// @SCALFMM_PRIVATE
// HMAT specific includes
#include "../Src/Clustering/FCCLKCluster.hpp"
#include "../Src/Utils/FMatrixIO.hpp"
......@@ -9,6 +10,10 @@
#include "../Src/Viewers/FDenseBlockWrapper.hpp"
#include "../Src/Blocks/FDenseBlock.hpp"
// ScalFMM's include
#include "Files/FFmaGenericLoader.hpp" // load grid
#include "Utils/FGlobal.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FParameters.hpp"
#include "Utils/FParameterNames.hpp"
......@@ -19,84 +24,216 @@ int main(int argc, char** argv){
{"-fout", "--out", "-out"} ,
"Svg output directory."
};
static const FParameterNames ElemParam = {
{"-nE", "-nbElements", "-elem"} ,
"Nbof elements to partition."
};
static const FParameterNames DimParam = {
{"-N", "-nb", "-dim"} ,
"Dim of the matrix."
{"-nD", "-nbDim", "-dim"} ,
"Dimension of ambiant space."
};
static const FParameterNames NbPartitionsParam = {
{"-P", "-nbPartitions", "-nbClusters"} ,
{"-Part", "-nbPartitions", "-nbClusters"} ,
"Dim of the matrix."
};
static const FParameterNames NbPassesParam = {
{"-Pass", "-nbPasses", "-nbPass"} ,
"Dim of the matrix."
};
//FHelpDescribeAndExit(argc, argv,"Test the k-means, k-medians and k-medoids algorithms from CCL (using 2 methods for distance computation) on 3D grid (unitCube).",SvgOutParam,ElemParam,DimParam,NbPartitionsParam,NbPassesParam,FParameterDefinitions::InputFile);
FHelpDescribeAndExit(argc, argv,"Test the k-medoid algorithm from CCL (using 2 methods for cluster center computation).",SvgOutParam,DimParam,NbPartitionsParam,FParameterDefinitions::InputFile);
////////////////////////////////////////////////////////////////////
/// Command line examples
// Partition the unitCube with 1000 particles in 8 partitions (use 5 iterations of EM algorithm)
// ./Addons/HMat/testCCLKCluster -binin -nE 1000 -d unitCube -Part 8 -Pass 5
// > Results show that k-medoids seems to work better for unitCube1000 (use fewer pass, but still more costly)
//
//
////////////////////////////////////////////////////////////////////
/// Parameters
// size of the grid
const int nbElements = FParameters::getValue(argc,argv,"-nE", 1000);
const char* filename = FParameters::getStr(argc, argv, FParameterDefinitions::InputFile.options, "../Addons/HMat/Data/unitCube1000.bin");
// size of the grid
const int nbDim = FParameters::getValue(argc,argv,"-nD", 3);
typedef float FReal;
// Define number of partitions
const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4);
// Define number of passes
const int nbPasses = FParameters::getValue(argc, argv, NbPassesParam.options, 5);
// Precision
typedef double FReal;
if(sizeof(FReal)==4)
std::cout<< "Precision: Single Float" <<std::endl;
if(sizeof(FReal)==8)
std::cout<< "Precision: Double Float" <<std::endl;
////////////////////////////////////////////////////////////////////
/// Files and Directories
// Output directory (SVG vizualisation of partitions)
const char* outputdir = FParameters::getStr(argc, argv, SvgOutParam.options, "/tmp/");
// Data path
const std::string ioPath = FParameters::getStr(argc,argv,"-path", std::string("../Addons/HMat/Data/").c_str());
int readNbRows = 0;
int readNbCols = 0;
// Read distances
FReal* distances = nullptr;
FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols));
FAssertLF(readNbRows == readNbCols);
const int dim = readNbRows;
// Geometry
const std::string geometryName(FParameters::getStr(argc,argv,"-d", "unitCube"));
// Define number of partitions
const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4);
// Test Cluster Center Method ARITHMETIC_MEAN
// Distribution
std::ostringstream oss_nbElements; oss_nbElements << nbElements;
const std::string distributionName(geometryName + oss_nbElements.str());
// Compute partitions from distance matrix
{
FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_ARITHMETIC_MEAN);
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
const char* filename = (ioPath + distributionName + ".bin").c_str();
std::cout << "Distance matrix read from file:" << filename << std::endl;
int readNbRows = 0;
int readNbCols = 0;
// Read distances
FReal* distances = nullptr;
FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols));
FAssertLF(readNbRows == readNbCols);
FAssertLF(readNbRows == nbElements);
// Test Cluster Center Method K-MEDOIDS
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
FTic timeKMedoids;
double tKMedoids;
timeKMedoids.tic();
GridClass bissection(dim, partitions.get(), nbPartitions);
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, distances, nbPasses);
char svgName[1024];
sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_ARITHMETIC_MEAN", nbPartitions);
FSvgRect output(svgName, dim);
std::cout << "\tSave svg to " << svgName << "\n";
tKMedoids = timeKMedoids.tacAndElapsed();
std::cout << "... took @tKMedoids = "<< tKMedoids <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDOIDS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
// Test Cluster Center Method MEDIAN
// Compute partitions from data matrix (i.e. grid)
{
FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_MEDIAN);
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
// Read geometry
std::string distributionFileName = ioPath + distributionName;
if( FParameters::existParameter(argc, argv, FParameterDefinitions::InputBinFormat.options)){
distributionFileName += ".bfma";
}
else {
distributionFileName += ".fma";
}
std::cout << "Data matrix read from file:" << distributionFileName << std::endl;
// open particle file
FFmaGenericLoader<FReal> loader(distributionFileName.c_str());
if(!loader.isOpen()) throw std::runtime_error("Particle distribution file couldn't be opened!");
////////////////////////////////////////////////////////////////////
/// Load grid from distribution file
//FPoint<FReal>* grid = new FPoint<FReal>[nbElements];
FReal* grid = new FReal[nbElements*nbDim];
for(FSize idxPart = 0 ; idxPart < nbElements ; ++idxPart){
FPoint<FReal> position;
FReal physicalValue = 0.0;
loader.fillParticle(&position,&physicalValue);
//grid[idxPart]=position;
grid[idxPart+0*nbElements]=position.getX();
grid[idxPart+1*nbElements]=position.getY();
grid[idxPart+2*nbElements]=position.getZ();
}
// Test Cluster Center Method K-MEANS
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
FTic timeKMeans;
double tKMeans;
timeKMeans.tic();
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_ARITHMETIC_MEAN, CCL::CCL_DIST_MEAN, nbPasses);
tKMeans = timeKMeans.tacAndElapsed();
std::cout << "... took @tKMeans = "<< tKMeans <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
GridClass bissection(dim, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEANS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
char svgName[1024];
sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_MEDIAN", nbPartitions);
FSvgRect output(svgName, dim);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
// Test Cluster Center Method K-MEDIANS
{
FTic timeKMedians;
double tKMedians;
timeKMedians.tic();
FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_MEDIAN, CCL::CCL_DIST_MEAN, nbPasses);
tKMedians = timeKMedians.tacAndElapsed();
std::cout << "... took @tKMedians = "<< tKMedians <<"\n";
std::unique_ptr<int[]> partitions(new int[nbPartitions]);
partitioner.getPartitions(nbPartitions,partitions.get());
{
typedef FDenseBlock<FReal> CellClass;
typedef FPartitionsMapping<FReal, CellClass> GridClass;
GridClass bissection(nbElements, partitions.get(), nbPartitions);
char svgName[1024];
sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDIANS_DIST_MEAN", nbPartitions);
FSvgRect output(svgName, nbElements);
std::cout << "\tSave svg to " << svgName << "\n";
bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){
output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level);
});
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment