diff --git a/Addons/HMat/Src/Clustering/FCCLKCluster.hpp b/Addons/HMat/Src/Clustering/FCCLKCluster.hpp index 8f91980ff02266ce7c170a4a43a24a6a1defe5a2..bb895f199d461f473a706927145b188d0d9a0170 100644 --- a/Addons/HMat/Src/Clustering/FCCLKCluster.hpp +++ b/Addons/HMat/Src/Clustering/FCCLKCluster.hpp @@ -23,7 +23,8 @@ extern "C" { namespace CCL { enum ClusterCenterMethod { CCL_CCM_ARITHMETIC_MEAN, - CCL_CCM_MEDIAN + CCL_CCM_MEDIAN, + CCL_CCM_DUMMY }; inline char ClusterCenterMethodToChar(const ClusterCenterMethod method){ @@ -45,7 +46,8 @@ namespace CCL { CCL_DIST_MEDIAN, CCL_DIST_SHORTEST, CCL_DIST_LONGEST, - CCL_DIST_AVG + CCL_DIST_AVG, + CCL_DIST_DUMMY }; inline char DistanceToChar(const Distance method){ @@ -67,7 +69,7 @@ namespace CCL { break; default: break; - } + } return '?'; } } @@ -77,40 +79,82 @@ template <class FReal> class FCCLKCluster { protected: const int nbPartitions; - const int dim; + const int nbElements; + const int nbDim; + const int nbPass; //< Number of call to EM algorithm CCL::ClusterCenterMethod method; + CCL::Distance distance; int* partitions; public: - FCCLKCluster(const int inNbPartitions, const int inDim, const FReal inDistMat[], const CCL::ClusterCenterMethod inMethod) - : nbPartitions(inNbPartitions), dim(inDim), method(inMethod), partitions(nullptr) { + FCCLKCluster(const int inNbPartitions, const int inNbElements, const FReal inDistMat[], const int inNbPass = 0) + : nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(0), nbPass(inNbPass), method(CCL::CCL_CCM_DUMMY), distance(CCL::CCL_DIST_DUMMY), partitions(nullptr) { - double** distMatPtrs = new double*[dim]; + double** distMatPtrs = new double*[nbElements]; // Build mask, everyone is here - for(int idxRow = 0 ; idxRow < dim ; ++idxRow){ + for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){ distMatPtrs[idxRow] = new double[idxRow+1]; for(int idxCol = 0 ; idxCol <= idxRow ; ++idxCol){ - distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*dim + idxRow]); + distMatPtrs[idxRow][idxCol] = double(inDistMat[idxCol*nbElements + idxRow]); } } // allocate partitions - partitions = new int[dim]; + partitions = new int[nbElements]; - // Number of call to EM algorithm - const int nbPass = 1; // Errors - double* error = new double[dim]; + double* error = new double[nbElements]; // Nb of times the optimal clustering was found - int* ifound = new int[dim]; + int* ifound = new int[nbElements]; - kmedoids (nbPartitions, dim, distMatPtrs, nbPass, partitions, error, ifound); + kmedoids (nbPartitions, nbElements, distMatPtrs, nbPass, partitions, error, ifound); - for(int idxRow = 0 ; idxRow < dim ; ++idxRow){ + for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){ delete[] distMatPtrs[idxRow]; } delete[] distMatPtrs; + + } + + FCCLKCluster(const int inNbPartitions, const int inNbElements, const int inNbDim, const FReal inDataMat[], const CCL::ClusterCenterMethod inMethod, const CCL::Distance inDistance, const int inNbPass = 0) + : nbPartitions(inNbPartitions), nbElements(inNbElements), nbDim(inNbDim), nbPass(inNbPass), method(inMethod), distance(inDistance), partitions(nullptr) { + + double** dataMatPtrs = new double*[nbElements]; + int** mask = new int*[nbElements]; + + // Build mask, everyone is here + for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){ + mask[idxRow] = new int[idxRow+1]; + dataMatPtrs[idxRow] = new double[nbDim]; + for(int idxCol = 0 ; idxCol < nbDim ; ++idxCol){ + mask[idxRow][idxCol] = 1; + dataMatPtrs[idxRow][idxCol] = double(inDataMat[idxCol*nbElements + idxRow]); + } + } + + // allocate partitions + partitions = new int[nbElements]; + + // Errors + double* error = new double[nbElements]; + // Nb of times the optimal clustering was found + int* ifound = new int[nbElements]; + + // Weights + double* weights = new double[nbElements]; + for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow) + weights[idxRow]=double(1.0); + + kcluster(nbPartitions, nbElements, nbDim, dataMatPtrs, mask, weights, 0, nbPass, ClusterCenterMethodToChar(method), DistanceToChar(distance), partitions, error, ifound); + + for(int idxRow = 0 ; idxRow < nbElements ; ++idxRow){ + delete[] mask[idxRow]; + delete[] dataMatPtrs[idxRow]; + } + delete[] mask; + delete[] dataMatPtrs; + } ~FCCLKCluster(){ @@ -123,18 +167,18 @@ public: FAssertLF(inNbPartitions == nbPartitions); // Copy partitions - int* sortedPartitions = new int[dim]; - for(int idx = 0 ; idx < dim ; ++idx){ + int* sortedPartitions = new int[nbElements]; + for(int idx = 0 ; idx < nbElements ; ++idx){ sortedPartitions[idx]=partitions[idx]; } // sort partitions - std::sort(sortedPartitions,sortedPartitions+dim); + std::sort(sortedPartitions,sortedPartitions+nbElements); // Map partitions to 0..nbPartitions int counterPartition=0; int* mapPartitions = new int[inNbPartitions]; int currentPartition=sortedPartitions[0]; - for(int idx = 0 ; idx < dim ; ++idx){ + for(int idx = 0 ; idx < nbElements ; ++idx){ mapPartitions[counterPartition]=currentPartition; if(sortedPartitions[idx+1]!=currentPartition){ currentPartition=sortedPartitions[idx+1]; @@ -149,7 +193,7 @@ public: inNbIdxInPartitions[idxPartition]=0; - for(int idx = 0 ; idx < dim ; ++idx){ + for(int idx = 0 ; idx < nbElements ; ++idx){ if(partitions[idx]==mapPartitions[idxPartition]) inNbIdxInPartitions[idxPartition]+=1; @@ -157,7 +201,7 @@ public: } totalGiven +=inNbIdxInPartitions[idxPartition]; } - FAssertLF(totalGiven == dim); + FAssertLF(totalGiven == nbElements); return 0; // no empty partition in kclusters/kmedoids algorithms } diff --git a/Addons/HMat/Tests/testCCLKCluster.cpp b/Addons/HMat/Tests/testCCLKCluster.cpp index 15fd659a7ca4a91f9652947140470ab3cf2788a8..69ead3b07c525aca07bee2cbec792707fa0df8c1 100644 --- a/Addons/HMat/Tests/testCCLKCluster.cpp +++ b/Addons/HMat/Tests/testCCLKCluster.cpp @@ -1,6 +1,7 @@ // @SCALFMM_PRIVATE +// HMAT specific includes #include "../Src/Clustering/FCCLKCluster.hpp" #include "../Src/Utils/FMatrixIO.hpp" @@ -9,6 +10,10 @@ #include "../Src/Viewers/FDenseBlockWrapper.hpp" #include "../Src/Blocks/FDenseBlock.hpp" +// ScalFMM's include +#include "Files/FFmaGenericLoader.hpp" // load grid +#include "Utils/FGlobal.hpp" +#include "Utils/FTic.hpp" #include "Utils/FParameters.hpp" #include "Utils/FParameterNames.hpp" @@ -19,84 +24,216 @@ int main(int argc, char** argv){ {"-fout", "--out", "-out"} , "Svg output directory." }; + static const FParameterNames ElemParam = { + {"-nE", "-nbElements", "-elem"} , + "Nbof elements to partition." + }; static const FParameterNames DimParam = { - {"-N", "-nb", "-dim"} , - "Dim of the matrix." + {"-nD", "-nbDim", "-dim"} , + "Dimension of ambiant space." }; static const FParameterNames NbPartitionsParam = { - {"-P", "-nbPartitions", "-nbClusters"} , + {"-Part", "-nbPartitions", "-nbClusters"} , + "Dim of the matrix." + }; + static const FParameterNames NbPassesParam = { + {"-Pass", "-nbPasses", "-nbPass"} , "Dim of the matrix." }; + //FHelpDescribeAndExit(argc, argv,"Test the k-means, k-medians and k-medoids algorithms from CCL (using 2 methods for distance computation) on 3D grid (unitCube).",SvgOutParam,ElemParam,DimParam,NbPartitionsParam,NbPassesParam,FParameterDefinitions::InputFile); - FHelpDescribeAndExit(argc, argv,"Test the k-medoid algorithm from CCL (using 2 methods for cluster center computation).",SvgOutParam,DimParam,NbPartitionsParam,FParameterDefinitions::InputFile); + //////////////////////////////////////////////////////////////////// + /// Command line examples + // Partition the unitCube with 1000 particles in 8 partitions (use 5 iterations of EM algorithm) + // ./Addons/HMat/testCCLKCluster -binin -nE 1000 -d unitCube -Part 8 -Pass 5 + // > Results show that k-medoids seems to work better for unitCube1000 (use fewer pass, but still more costly) + // + // + //////////////////////////////////////////////////////////////////// + /// Parameters + // size of the grid + const int nbElements = FParameters::getValue(argc,argv,"-nE", 1000); - const char* filename = FParameters::getStr(argc, argv, FParameterDefinitions::InputFile.options, "../Addons/HMat/Data/unitCube1000.bin"); + // size of the grid + const int nbDim = FParameters::getValue(argc,argv,"-nD", 3); - typedef float FReal; + // Define number of partitions + const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4); + // Define number of passes + const int nbPasses = FParameters::getValue(argc, argv, NbPassesParam.options, 5); + + // Precision + typedef double FReal; + if(sizeof(FReal)==4) + std::cout<< "Precision: Single Float" <<std::endl; + if(sizeof(FReal)==8) + std::cout<< "Precision: Double Float" <<std::endl; + + //////////////////////////////////////////////////////////////////// + /// Files and Directories + + // Output directory (SVG vizualisation of partitions) const char* outputdir = FParameters::getStr(argc, argv, SvgOutParam.options, "/tmp/"); + // Data path + const std::string ioPath = FParameters::getStr(argc,argv,"-path", std::string("../Addons/HMat/Data/").c_str()); - int readNbRows = 0; - int readNbCols = 0; - // Read distances - FReal* distances = nullptr; - FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols)); - FAssertLF(readNbRows == readNbCols); - const int dim = readNbRows; + // Geometry + const std::string geometryName(FParameters::getStr(argc,argv,"-d", "unitCube")); - // Define number of partitions - const int nbPartitions = FParameters::getValue(argc, argv, NbPartitionsParam.options, 4); - - // Test Cluster Center Method ARITHMETIC_MEAN + // Distribution + std::ostringstream oss_nbElements; oss_nbElements << nbElements; + const std::string distributionName(geometryName + oss_nbElements.str()); + + // Compute partitions from distance matrix { - FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_ARITHMETIC_MEAN); - std::unique_ptr<int[]> partitions(new int[nbPartitions]); - partitioner.getPartitions(nbPartitions,partitions.get()); + const char* filename = (ioPath + distributionName + ".bin").c_str(); + + std::cout << "Distance matrix read from file:" << filename << std::endl; + + int readNbRows = 0; + int readNbCols = 0; + // Read distances + FReal* distances = nullptr; + FAssertLF(FMatrixIO::read(filename, &distances, &readNbRows, &readNbCols)); + FAssertLF(readNbRows == readNbCols); + FAssertLF(readNbRows == nbElements); + + + // Test Cluster Center Method K-MEDOIDS { - typedef FDenseBlock<FReal> CellClass; - typedef FPartitionsMapping<FReal, CellClass> GridClass; + FTic timeKMedoids; + double tKMedoids; + timeKMedoids.tic(); - GridClass bissection(dim, partitions.get(), nbPartitions); + FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, distances, nbPasses); - char svgName[1024]; - sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_ARITHMETIC_MEAN", nbPartitions); - FSvgRect output(svgName, dim); - std::cout << "\tSave svg to " << svgName << "\n"; + tKMedoids = timeKMedoids.tacAndElapsed(); + std::cout << "... took @tKMedoids = "<< tKMedoids <<"\n"; + + std::unique_ptr<int[]> partitions(new int[nbPartitions]); + partitioner.getPartitions(nbPartitions,partitions.get()); + { + typedef FDenseBlock<FReal> CellClass; + typedef FPartitionsMapping<FReal, CellClass> GridClass; + + GridClass bissection(nbElements, partitions.get(), nbPartitions); + + char svgName[1024]; + sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDOIDS_DIST_MEAN", nbPartitions); + FSvgRect output(svgName, nbElements); + std::cout << "\tSave svg to " << svgName << "\n"; + + bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ + output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); + }); + } - bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ - output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); - }); } } - // Test Cluster Center Method MEDIAN + + // Compute partitions from data matrix (i.e. grid) { - FCCLKCluster<FReal> partitioner(nbPartitions, dim, distances, CCL::CCL_CCM_MEDIAN); - std::unique_ptr<int[]> partitions(new int[nbPartitions]); - partitioner.getPartitions(nbPartitions,partitions.get()); + // Read geometry + std::string distributionFileName = ioPath + distributionName; + if( FParameters::existParameter(argc, argv, FParameterDefinitions::InputBinFormat.options)){ + distributionFileName += ".bfma"; + } + else { + distributionFileName += ".fma"; + } + + std::cout << "Data matrix read from file:" << distributionFileName << std::endl; + + // open particle file + FFmaGenericLoader<FReal> loader(distributionFileName.c_str()); + if(!loader.isOpen()) throw std::runtime_error("Particle distribution file couldn't be opened!"); + + //////////////////////////////////////////////////////////////////// + /// Load grid from distribution file + //FPoint<FReal>* grid = new FPoint<FReal>[nbElements]; + FReal* grid = new FReal[nbElements*nbDim]; + + for(FSize idxPart = 0 ; idxPart < nbElements ; ++idxPart){ + FPoint<FReal> position; + FReal physicalValue = 0.0; + loader.fillParticle(&position,&physicalValue); + //grid[idxPart]=position; + grid[idxPart+0*nbElements]=position.getX(); + grid[idxPart+1*nbElements]=position.getY(); + grid[idxPart+2*nbElements]=position.getZ(); + } + + + // Test Cluster Center Method K-MEANS { - typedef FDenseBlock<FReal> CellClass; - typedef FPartitionsMapping<FReal, CellClass> GridClass; + FTic timeKMeans; + double tKMeans; + timeKMeans.tic(); + + FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_ARITHMETIC_MEAN, CCL::CCL_DIST_MEAN, nbPasses); + + tKMeans = timeKMeans.tacAndElapsed(); + std::cout << "... took @tKMeans = "<< tKMeans <<"\n"; + + + std::unique_ptr<int[]> partitions(new int[nbPartitions]); + partitioner.getPartitions(nbPartitions,partitions.get()); + { + typedef FDenseBlock<FReal> CellClass; + typedef FPartitionsMapping<FReal, CellClass> GridClass; + + GridClass bissection(nbElements, partitions.get(), nbPartitions); - GridClass bissection(dim, partitions.get(), nbPartitions); + char svgName[1024]; + sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEANS_DIST_MEAN", nbPartitions); + FSvgRect output(svgName, nbElements); + std::cout << "\tSave svg to " << svgName << "\n"; - char svgName[1024]; - sprintf(svgName, "%s/%s-%d.svg", outputdir, "CCL_CCM_MEDIAN", nbPartitions); - FSvgRect output(svgName, dim); - std::cout << "\tSave svg to " << svgName << "\n"; + bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ + output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); + }); + } - bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ - output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); - }); } - } + // Test Cluster Center Method K-MEDIANS + { + FTic timeKMedians; + double tKMedians; + timeKMedians.tic(); + + FCCLKCluster<FReal> partitioner(nbPartitions, nbElements, nbDim, grid, CCL::CCL_CCM_MEDIAN, CCL::CCL_DIST_MEAN, nbPasses); + + tKMedians = timeKMedians.tacAndElapsed(); + std::cout << "... took @tKMedians = "<< tKMedians <<"\n"; + + std::unique_ptr<int[]> partitions(new int[nbPartitions]); + partitioner.getPartitions(nbPartitions,partitions.get()); + { + typedef FDenseBlock<FReal> CellClass; + typedef FPartitionsMapping<FReal, CellClass> GridClass; + GridClass bissection(nbElements, partitions.get(), nbPartitions); + + char svgName[1024]; + sprintf(svgName, "%s/%s-%s-P%d.svg", outputdir, distributionName.c_str(), "CCL_KMEDIANS_DIST_MEAN", nbPartitions); + FSvgRect output(svgName, nbElements); + std::cout << "\tSave svg to " << svgName << "\n"; + + bissection.forAllBlocksDescriptor([&](const FBlockDescriptor& info){ + output.addRectWithLegend(info.col, info.row, info.nbCols, info.nbRows, info.level); + }); + } + } + + }