diff --git a/Addons/HMat/Data/first_reads_1k-fmr-covar.bin b/Addons/HMat/Data/first_reads_1k-fmr-covar.bin new file mode 100644 index 0000000000000000000000000000000000000000..e54c603228532de21362fcd4fbc3e1cb79228976 Binary files /dev/null and b/Addons/HMat/Data/first_reads_1k-fmr-covar.bin differ diff --git a/Addons/HMat/Data/first_reads_1k-fmr-dissw.bin b/Addons/HMat/Data/first_reads_1k-fmr-dissw.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3dc81aabbd1c27e04339fe21a4e32a3ae45f56f Binary files /dev/null and b/Addons/HMat/Data/first_reads_1k-fmr-dissw.bin differ diff --git a/Addons/HMat/Src/Clustering/FCCLKCluster.hpp b/Addons/HMat/Src/Clustering/FCCLKCluster.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8f91980ff02266ce7c170a4a43a24a6a1defe5a2 --- /dev/null +++ b/Addons/HMat/Src/Clustering/FCCLKCluster.hpp @@ -0,0 +1,169 @@ +// @SCALFMM_PRIVATE + +#ifndef FCCLKCLUSTER_HPP +#define FCCLKCLUSTER_HPP + +#include "./Utils/FGlobal.hpp" +#include "./Utils/FAssert.hpp" +#include "./Utils/FMath.hpp" +#include "../Utils/FHUtils.hpp" + +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +} + + +namespace CCL { + enum ClusterCenterMethod { + CCL_CCM_ARITHMETIC_MEAN, + CCL_CCM_MEDIAN + }; + + inline char ClusterCenterMethodToChar(const ClusterCenterMethod method){ + switch (method) { + case CCL_CCM_ARITHMETIC_MEAN: + return 'a'; + break; + case CCL_CCM_MEDIAN: + return 'm'; + break; + default: + break; + } + return '?'; + } + + enum Distance { + CCL_DIST_MEAN, + CCL_DIST_MEDIAN, + CCL_DIST_SHORTEST, + CCL_DIST_LONGEST, + CCL_DIST_AVG + }; + + inline char DistanceToChar(const Distance method){ + switch (method) { + case CCL_DIST_MEAN: + return 'a'; + break; + case CCL_DIST_MEDIAN: + return 'm'; + break; + case CCL_DIST_SHORTEST: + return 's'; + break; + case CCL_DIST_LONGEST: + return 'x'; + break; + case CCL_DIST_AVG: + return 'v'; + break; + default: + break; + } + return '?'; + } +} + + +template +class FCCLKCluster { +protected: + const int nbPartitions; + const int dim; + CCL::ClusterCenterMethod method; + int* partitions; + +public: + FCCLKCluster(const int inNbPartitions, const int inDim, const FReal inDistMat[], const CCL::ClusterCenterMethod inMethod) + : nbPartitions(inNbPartitions), dim(inDim), method(inMethod), partitions(nullptr) { + + double** distMatPtrs = new double*[dim]; + + // Build mask, everyone is here + for(int idxRow = 0 ; idxRow < dim ; ++idxRow){ + distMatPtrs[idxRow] = new double[idxRow+1]; + for(int idxCol = 0 ; idxCol <= idxRow ; ++idxCol){ + distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*dim + idxRow]); + } + } + + // allocate partitions + partitions = new int[dim]; + + // Number of call to EM algorithm + const int nbPass = 1; + // Errors + double* error = new double[dim]; + // Nb of times the optimal clustering was found + int* ifound = new int[dim]; + + kmedoids (nbPartitions, dim, distMatPtrs, nbPass, partitions, error, ifound); + + for(int idxRow = 0 ; idxRow < dim ; ++idxRow){ + delete[] distMatPtrs[idxRow]; + } + delete[] distMatPtrs; + } + + ~FCCLKCluster(){ + delete[] partitions; + } + + int getPartitions(const int inNbPartitions, int inNbIdxInPartitions[]) const{ + + /// Map partitions to 0.. nbPartitions + FAssertLF(inNbPartitions == nbPartitions); + + // Copy partitions + int* sortedPartitions = new int[dim]; + for(int idx = 0 ; idx < dim ; ++idx){ + sortedPartitions[idx]=partitions[idx]; + } + // sort partitions + std::sort(sortedPartitions,sortedPartitions+dim); + + // Map partitions to 0..nbPartitions + int counterPartition=0; + int* mapPartitions = new int[inNbPartitions]; + int currentPartition=sortedPartitions[0]; + for(int idx = 0 ; idx < dim ; ++idx){ + mapPartitions[counterPartition]=currentPartition; + if(sortedPartitions[idx+1]!=currentPartition){ + currentPartition=sortedPartitions[idx+1]; + ++counterPartition; + } + } + FAssertLF(counterPartition == inNbPartitions); + + /// Count particles in each partition + int totalGiven = 0; + for(int idxPartition = 0 ; idxPartition < inNbPartitions ; ++idxPartition){ + + inNbIdxInPartitions[idxPartition]=0; + + for(int idx = 0 ; idx < dim ; ++idx){ + + if(partitions[idx]==mapPartitions[idxPartition]) + inNbIdxInPartitions[idxPartition]+=1; + + } + totalGiven +=inNbIdxInPartitions[idxPartition]; + } + FAssertLF(totalGiven == dim); + + return 0; // no empty partition in kclusters/kmedoids algorithms + } + + +}; + +#endif // FCCLKCLUSTER_HPP + diff --git a/Addons/HMat/Tests/testCCLKCluster.cpp b/Addons/HMat/Tests/testCCLKCluster.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15fd659a7ca4a91f9652947140470ab3cf2788a8 --- /dev/null +++ b/Addons/HMat/Tests/testCCLKCluster.cpp @@ -0,0 +1,105 @@ + +// @SCALFMM_PRIVATE + +#include "../Src/Clustering/FCCLKCluster.hpp" +#include "../Src/Utils/FMatrixIO.hpp" + +#include "../Src/Containers/FPartitionsMapping.hpp" +#include "../Src/Utils/FSvgRect.hpp" +#include "../Src/Viewers/FDenseBlockWrapper.hpp" +#include "../Src/Blocks/FDenseBlock.hpp" + +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" + +#include + +int main(int argc, char** argv){ + static const FParameterNames SvgOutParam = { + {"-fout", "--out", "-out"} , + "Svg output directory." + }; + static const FParameterNames DimParam = { + {"-N", "-nb", "-dim"} , + "Dim of the matrix." + }; + static const FParameterNames NbPartitionsParam = { + {"-P", "-nbPartitions", "-nbClusters"} , + "Dim of the matrix." + }; + + + FHelpDescribeAndExit(argc, argv,"Test the k-medoid algorithm from CCL (using 2 methods for cluster center computation).",SvgOutParam,DimParam,NbPartitionsParam,FParameterDefinitions::InputFile); + + const char* filename = FParameters::getStr(argc, argv, FParameterDefinitions::InputFile.options, "../Addons/HMat/Data/unitCube1000.bin"); + + typedef float FReal; + + const char* outputdir = FParameters::getStr(argc, argv, SvgOutParam.options, "/tmp/"); + + + int readNbRows = 0; + int readNbCols = 0; + // Read distances + FReal* distances = nullptr; + FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols)); + FAssertLF(readNbRows == readNbCols); + const int dim = readNbRows; + + // Define number of partitions + const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4); + + // Test Cluster Center Method ARITHMETIC_MEAN + { + FCCLKCluster partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_ARITHMETIC_MEAN); + + std::unique_ptr partitions(new int[nbPartitions]); + partitioner.getPartitions(nbPartitions,partitions.get()); + { + typedef FDenseBlock CellClass; + typedef FPartitionsMapping GridClass; + + GridClass bissection(dim, partitions.get(), nbPartitions); + + char svgName[1024]; + sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_ARITHMETIC_MEAN", nbPartitions); + FSvgRect output(svgName, dim); + std::cout << "\tSave svg to " << svgName << "\n"; + + bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ + output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); + }); + } + + } + + // Test Cluster Center Method MEDIAN + { + FCCLKCluster partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_MEDIAN); + + std::unique_ptr partitions(new int[nbPartitions]); + partitioner.getPartitions(nbPartitions,partitions.get()); + { + typedef FDenseBlock CellClass; + typedef FPartitionsMapping GridClass; + + GridClass bissection(dim, partitions.get(), nbPartitions); + + char svgName[1024]; + sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_MEDIAN", nbPartitions); + FSvgRect output(svgName, dim); + std::cout << "\tSave svg to " << svgName << "\n"; + + bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ + output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); + }); + } + } + + + + + + return 0; +} + diff --git a/Addons/HMat/Tests/testConnectClustering.cpp b/Addons/HMat/Tests/testConnectClustering.cpp index c888cf3a882534fdaf4022bf6da3ec4fd022d050..39c829ddd14d62411ebeafa1fc260491b4ec4ace 100644 --- a/Addons/HMat/Tests/testConnectClustering.cpp +++ b/Addons/HMat/Tests/testConnectClustering.cpp @@ -32,6 +32,7 @@ void TestPartitions(const PartitionerClass& partitioner, std::unique_ptr partitions(new int[nbPartitions]); partitioner.getPartitions(nbPartitions, partitions.get()); + if(coords!=nullptr) { char coordfilename[1024]; sprintf(coordfilename, "%s/%s-coord-%d.csv", outputdir, config, idFile); @@ -81,51 +82,84 @@ int main(int argc, char** argv){ "Dim of the matrix." }; - FHelpDescribeAndExit(argc, argv,"Test the bisection.",SvgOutParam,DimParam,FParameterDefinitions::OctreeHeight, + FHelpDescribeAndExit(argc, argv,"Test the bisection.",SvgOutParam,DimParam, + FParameterDefinitions::InputFileOne, + FParameterDefinitions::InputFileTwow, FParameterDefinitions::NbParticles); const int nbParticles = (FParameters::getValue(argc, argv, FParameterDefinitions::NbParticles.options, 3000) & ~1); const char* outputdir = FParameters::getStr(argc, argv, SvgOutParam.options, "/tmp/"); - typedef double FReal; + typedef float FReal; - ///////////////////////////////////////////////////////////////// - - std::cout << "Create spheres\n"; + //////////////////////////////////////////////////////////////////// - const FReal radius = 0.5; - const FReal distanceBetweenSpheres = 0.4; - const int nbPointsSphere = nbParticles/2; + const char* distanceFilename = FParameters::getStr(argc, argv, FParameterDefinitions::InputFileOne.options, "../Addons/HMat/Data/unitCube1000.bin"); - std::cout << "\tradius = " << radius << "\n"; - std::cout << "\tdistanceBetweenSpheres = " << distanceBetweenSpheres << "\n"; - std::cout << "\tnbPointsSphere = " << nbPointsSphere << "\n"; + //////////////////////////////////////////////////////////////////// - std::unique_ptr spherePoints(new FReal[nbParticles*4]); - unifRandonPointsOnSphere(nbPointsSphere, radius, spherePoints.get()) ; + std::cout << "Load distances file " << distanceFilename << "\n"; - for(int idxPoint = 0 ; idxPoint < nbPointsSphere ; ++idxPoint){ - spherePoints[(idxPoint+nbPointsSphere)*4 + 0] = spherePoints[idxPoint*4 + 0] + radius*2 + distanceBetweenSpheres; - spherePoints[(idxPoint+nbPointsSphere)*4 + 1] = spherePoints[idxPoint*4 + 1] + radius*2 + distanceBetweenSpheres; - spherePoints[(idxPoint+nbPointsSphere)*4 + 2] = spherePoints[idxPoint*4 + 2] + radius*2 + distanceBetweenSpheres; + std::unique_ptr particles(nullptr); + std::unique_ptr distances; + int dim = 0; + { + int readNbRows = 0; + int readNbCols = 0; + FReal* distanceValuesPtr = nullptr; + FAssertLF(FMatrixIO::read(distanceFilename, &distanceValuesPtr, &readNbRows, &readNbCols)); + FAssertLF(readNbRows == readNbCols); + dim = readNbRows; + distances.reset(distanceValuesPtr); } - ///////////////////////////////////////////////////////////////// - - std::cout << "Compute distance\n"; - const int dim = nbParticles; - std::unique_ptr distances(new FReal[nbParticles*nbParticles]); - for(int idxCol = 0 ; idxCol < nbParticles ; ++idxCol){ - for(int idxRow = 0 ; idxRow < nbParticles ; ++idxRow){ - const FReal diffx = spherePoints[idxCol*4 + 0] - spherePoints[idxRow*4 + 0]; - const FReal diffy = spherePoints[idxCol*4 + 1] - spherePoints[idxRow*4 + 1]; - const FReal diffz = spherePoints[idxCol*4 + 2] - spherePoints[idxRow*4 + 2]; - distances[idxCol*nbParticles+idxRow] = FMath::Sqrt((diffx*diffx) + (diffy*diffy) + (diffz*diffz)); - } + const FSize displaySize=10; + std::cout<<"\nD=["< particles(new FReal[nbParticles*4]); +// unifRandonPointsOnSphere(nbPointsSphere, radius, particles.get()) ; +// +// for(int idxPoint = 0 ; idxPoint < nbPointsSphere ; ++idxPoint){ +// particles[(idxPoint+nbPointsSphere)*4 + 0] = particles[idxPoint*4 + 0] + radius*2 + distanceBetweenSpheres; +// particles[(idxPoint+nbPointsSphere)*4 + 1] = particles[idxPoint*4 + 1] + radius*2 + distanceBetweenSpheres; +// particles[(idxPoint+nbPointsSphere)*4 + 2] = particles[idxPoint*4 + 2] + radius*2 + distanceBetweenSpheres; +// } +// +// ///////////////////////////////////////////////////////////////// +// +// std::cout << "Compute distance\n"; +// +// const int dim = nbParticles; +// std::unique_ptr distances(new FReal[nbParticles*nbParticles]); +// for(int idxCol = 0 ; idxCol < nbParticles ; ++idxCol){ +// for(int idxRow = 0 ; idxRow < nbParticles ; ++idxRow){ +// const FReal diffx = particles[idxCol*4 + 0] - particles[idxRow*4 + 0]; +// const FReal diffy = particles[idxCol*4 + 1] - particles[idxRow*4 + 1]; +// const FReal diffz = particles[idxCol*4 + 2] - particles[idxRow*4 + 2]; +// distances[idxCol*nbParticles+idxRow] = FMath::Sqrt((diffx*diffx) + (diffy*diffy) + (diffz*diffz)); +// } +// } +// +// ///////////////////////////////////////////////////////////////// FReal distMin = std::numeric_limits::max(); FReal distMax = std::numeric_limits::min(); @@ -140,14 +174,14 @@ int main(int argc, char** argv){ std::cout << "Dist max " << distMax << "\n"; ///////////////////////////////////////////////////////////////// - const FReal stepThresh = (distMax-distMin)/10; - for(FReal thresh = distMin ; thresh <= distMax ; thresh += stepThresh){ + const FReal stepThresh = (distMax-distMin)/100; + for(FReal thresh = distMin ; thresh <= distMax/10 ; thresh += stepThresh){ std::cout << "Test FConnexClustering for thresh " << thresh << "\n"; FConnexClustering partitioner(dim, distances.get(), thresh); std::cout << "\tGot " << partitioner.getNbPartitions() << " partitions\n"; TestPartitions>(partitioner,outputdir, int((thresh-distMin)/stepThresh), - dim, spherePoints.get(), "FConnexClustering"); + dim, particles.get(), "FConnexClustering"); } return 0;