Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 619d5875 authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files
parents 95094e4b 506f3a3f
No related branches found
No related tags found
No related merge requests found
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
//
// @SCALFMM_PRIVATE
//
#ifndef FACABLOCK_HPP
#define FACABLOCK_HPP
#include "Utils/FBlas.hpp"
#include "Utils/FAca.hpp"
/*! Choose either \a FULLY_PIVOTED_ACA or \a PARTIALLY_PIVOTED_ACA */
#define FULLY_PIVOTED_ACA
//#define PARTIALLY_PIVOTED_ACA
/*! Choose \a RECOMPRESSED_ACA */
//#define RECOMPRESSED_ACA
template <class FReal, int ORDER = 14>
class FACABlock{
protected:
// members
FReal* block;
FReal* U;
FReal* VT;
int nbRows;
int nbCols;
int level;
unsigned int rank;
FReal accuracy;
FACABlock(const FACABlock&) = delete;
FACABlock& operator=(const FACABlock&) = delete;
public:
FACABlock()
: block(nullptr), U(nullptr), VT(nullptr), nbRows(0), nbCols(0), level(0), rank(0), accuracy(FMath::pow(10.0,static_cast<FReal>(-ORDER))) {
}
// ctor
template <class ViewerClass>
void fill(const ViewerClass& viewer, const int inLevel){
clear();
// Allocate memory
level = inLevel;
nbRows = viewer.getNbRows();
nbCols = viewer.getNbCols();
#if (defined FULLY_PIVOTED_ACA)
block = new FReal[nbRows*nbCols];
for(int idxRow = 0 ; idxRow < nbRows ; ++idxRow){
for(int idxCol = 0 ; idxCol < nbCols ; ++idxCol){
block[idxCol*nbRows+idxRow] = viewer.getValue(idxRow,idxCol);
}
}
#endif
// SVD specific (col major)
rank = std::min(nbRows,nbCols);
//S = new FReal[rank];
FReal* _U ;// = new FReal[nbRows*nbCols]; // Call to computeSVD() copies block into U
FReal* _V;// = new FReal[rank*nbCols];
#if (defined FULLY_PIVOTED_ACA)
// Perform fully pivoted ACA
FAca::fACA<FReal>(block, nbRows, nbCols, accuracy, _U, _V, rank);
#elif (defined PARTIAL_PIVOTED_ACA)
// TODO
// Perform partially pivoted ACA
//FAca::fACA<FReal>(viewer, nbRows, nbCols, accuracy, _U, _V, rank);
#endif
// display rank
std::cout << "rank after ACA=" << rank << std::endl;
#if (defined RECOMPRESSED_ACA)
// Recompression by QR+SVD
FAca::recompress(_U,_V,nbRows,nbCols,accuracy,U,VT,rank);
// display rank after recompression
std::cout << " rank after QR+SVD=" << rank << std::endl;
#else
// Resize U and VT
U = new FReal[nbRows*rank];
for(int idxRow = 0 ; idxRow < nbRows ; ++idxRow)
for(int idxCol = 0 ; idxCol < rank ; ++idxCol)
U[idxCol*nbRows+idxRow] = _U[idxCol*nbRows+idxRow];
VT = new FReal[rank*nbCols];
for(int idxRow = 0 ; idxRow < rank ; ++idxRow)
for(int idxCol = 0 ; idxCol < nbCols ; ++idxCol)
VT[idxCol*rank+idxRow] = _V[idxRow*nbCols+idxCol]; // transposition
#endif
// Free memory
delete [] _U;
delete [] _V;
};
// dtor
~FACABlock(){
// Free memory
clear();
};
void clear(){
nbRows = 0;
nbCols = 0;
level = 0;
rank = 0;
delete[] block;
block = 0;
delete[] U;
U = 0;
delete[] VT;
VT = 0;
}
void gemv(FReal res[], const FReal vec[], const FReal scale = FReal(1.)) const {
//// Apply (dense) block
//FReal* res_dense = new FReal[nbRows];
//FBlas::copy(nbRows,res,res_dense);
//FBlas::gemva(nbRows, nbCols, scale, const_cast<FReal*>(block), const_cast<FReal*>(vec), res_dense);
// Apply low-rank block
FReal* VTvec = new FReal[rank];
FBlas::setzero(rank,VTvec);
// Apply VT
FBlas::gemv(rank, nbCols, scale, const_cast<FReal*>(VT), const_cast<FReal*>(vec), VTvec);
// Apply U
FBlas::gemva(nbRows, rank, scale, const_cast<FReal*>(U), const_cast<FReal*>(VTvec), res);
}
void gemm(FReal res[], const FReal mat[], const int nbRhs, const FReal scale = FReal(1.)) const {
//// Apply (dense) block
//FBlas::gemma(nbRows, nbCols, nbRhs, scale, const_cast<FReal*>(block), nbRows, const_cast<FReal*>(mat), nbCols, res, nbRows);
// Apply low-rank block
FReal* VTmat = new FReal[nbCols*nbRhs];
// Apply VT
FBlas::gemm(rank, nbCols, nbRhs, scale, const_cast<FReal*>(VT), rank, const_cast<FReal*>(mat), nbCols, VTmat, rank);
// Apply U
FBlas::gemma(nbRows, rank, nbRhs, scale, const_cast<FReal*>(U), nbRows, const_cast<FReal*>(VTmat), rank, res, nbRows);
}
};
#endif // FACABLOCK_HPP
// [--END--]
......@@ -20,6 +20,8 @@
#include "../Src/Viewers/FMatDense.hpp"
#include "../Src/Blocks/FDenseBlock.hpp"
#include "../Src/Blocks/FSVDBlock.hpp"
#include "../Src/Blocks/FACABlock.hpp"
#include "Utils/FParameters.hpp"
#include "Utils/FParameterNames.hpp"
......@@ -66,11 +68,11 @@ int main(int argc, char** argv){
{
std::cout << "Test Dense:\n";
//typedef FDenseBlock<FReal> LeafClass;
typedef FDenseBlock<FReal> LeafClass;
//typedef FDenseBlock<FReal> CellClass;
typedef FSVDBlock<FReal,7> LeafClass;
typedef FSVDBlock<FReal,7> CellClass;
//typedef FSVDBlock<FReal,7> CellClass;
typedef FACABlock<FReal,7> CellClass;
typedef FStaticDiagonalBisection<FReal, LeafClass, CellClass> GridClass;
GridClass grid(dim, height);
......@@ -98,10 +100,11 @@ int main(int argc, char** argv){
std::cout << "Test Dense with partitions:\n";
//typedef FDenseBlock<FReal> LeafClass;
typedef FDenseBlock<FReal> LeafClass;
//typedef FDenseBlock<FReal> CellClass;
typedef FSVDBlock<FReal,7> LeafClass;
typedef FSVDBlock<FReal,7> CellClass;
//typedef FSVDBlock<FReal,7> CellClass;
typedef FACABlock<FReal,7> CellClass;
typedef FStaticDiagonalBisection<FReal, LeafClass, CellClass> GridClass;
const int nbPartitions = FMath::pow2(height-1);
......
......@@ -21,10 +21,14 @@
#include "Utils/FBlas.hpp"
#include "FChebTensor.hpp"
#include "../Interpolation/FInterpSymmetries.hpp"
#include "FChebM2LHandler.hpp"
#include "Utils/FAca.hpp"
/**
* @author Matthias Messner (matthias.matthias@inria.fr)
* Please read the license
......@@ -40,221 +44,6 @@
/*! The fully pivoted adaptive cross approximation (fACA) compresses a
far-field interaction as \f$K\sim UV^\top\f$. The fACA requires all entries
to be computed beforehand, then the compression follows in
\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
\f$\varepsilon\f$. The matrix K will be destroyed as a result.
@param[in] K far-field to be approximated
@param[in] nx number of rows
@param[in] ny number of cols
@param[in] eps prescribed accuracy
@param[out] U matrix containing \a k column vectors
@param[out] V matrix containing \a k row vectors
@param[out] k final low-rank depends on prescribed accuracy \a eps
*/
template <class FReal>
void fACA(FReal *const K,
const unsigned int nx, const unsigned int ny,
const double eps, FReal* &U, FReal* &V, unsigned int &k)
{
// control vectors (true if not used, false if used)
bool *const r = new bool[nx];
bool *const c = new bool[ny];
for (unsigned int i=0; i<nx; ++i) r[i] = true;
for (unsigned int j=0; j<ny; ++j) c[j] = true;
// compute Frobenius norm of original Matrix K
FReal norm2K = 0;
for (unsigned int j=0; j<ny; ++j) {
const FReal *const colK = K + j*nx;
norm2K += FBlas::scpr(nx, colK, colK);
}
// initialize rank k and UV'
k = 0;
const unsigned int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk];
V = new FReal[ny * maxk];
FBlas::setzero(nx*maxk, U);
FBlas::setzero(ny*maxk, V);
FReal norm2R;
////////////////////////////////////////////////
// start fully pivoted ACA
do {
// find max(K) and argmax(K)
FReal maxK = 0.;
unsigned int pi=0, pj=0;
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
const FReal *const colK = K + j*nx;
for (unsigned int i=0; i<nx; ++i)
if (r[i] && maxK < FMath::Abs(colK[i])) {
maxK = FMath::Abs(colK[i]);
pi = i;
pj = j;
}
}
// copy pivot cross into U and V
FReal *const colU = U + k*nx;
FReal *const colV = V + k*ny;
const FReal pivot = K[pj*nx + pi];
for (unsigned int i=0; i<nx; ++i) if (r[i]) colU[i] = K[pj*nx + i];
for (unsigned int j=0; j<ny; ++j) if (c[j]) colV[j] = K[j *nx + pi] / pivot;
// don't use these cols and rows anymore
c[pj] = false;
r[pi] = false;
// subtract k-th outer product from K
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
FReal *const colK = K + j*nx;
FBlas::axpy(nx, FReal(-1. * colV[j]), colU, colK);
}
// compute Frobenius norm of updated K
norm2R = 0.0;
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
const FReal *const colK = K + j*nx;
norm2R += FBlas::scpr(nx, colK, colK);
}
// increment rank k
++k ;
} while (norm2R > eps*eps * norm2K);
////////////////////////////////////////////////
delete [] r;
delete [] c;
}
/*! The partially pivoted adaptive cross approximation (pACA) compresses a
far-field interaction as \f$K\sim UV^\top\f$. The pACA computes the matrix
entries on the fly, as they are needed. The compression follows in
\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
\f$\varepsilon\f$. The matrix K will be destroyed as a result.
@tparam ComputerType the functor type which allows to compute matrix entries
@param[in] Computer the entry-computer functor
@param[in] eps prescribed accuracy
@param[in] nx number of rows
@param[in] ny number of cols
@param[out] U matrix containing \a k column vectors
@param[out] V matrix containing \a k row vectors
@param[out] k final low-rank depends on prescribed accuracy \a eps
*/
template <class FReal, typename ComputerType>
void pACA(const ComputerType& Computer,
const unsigned int nx, const unsigned int ny,
const FReal eps, FReal* &U, FReal* &V, unsigned int &k)
{
// control vectors (true if not used, false if used)
bool *const r = new bool[nx];
bool *const c = new bool[ny];
for (unsigned int i=0; i<nx; ++i) r[i] = true;
for (unsigned int j=0; j<ny; ++j) c[j] = true;
// initialize rank k and UV'
k = 0;
const FReal eps2 = eps * eps;
const unsigned int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk];
V = new FReal[ny * maxk];
// initialize norm
FReal norm2S(0.);
FReal norm2uv(0.);
////////////////////////////////////////////////
// start partially pivoted ACA
unsigned int J = 0, I = 0;
do {
FReal *const colU = U + nx*k;
FReal *const colV = V + ny*k;
////////////////////////////////////////////
// compute row I and its residual
Computer(I, I+1, 0, ny, colV);
r[I] = false;
for (unsigned int l=0; l<k; ++l) {
FReal *const u = U + nx*l;
FReal *const v = V + ny*l;
FBlas::axpy(ny, FReal(-1. * u[I]), v, colV);
}
// find max of residual and argmax
FReal maxval = 0.;
for (unsigned int j=0; j<ny; ++j) {
const FReal abs_val = FMath::Abs(colV[j]);
if (c[j] && maxval < abs_val) {
maxval = abs_val;
J = j;
}
}
// find pivot and scale column of V
const FReal pivot = FReal(1.) / colV[J];
FBlas::scal(ny, pivot, colV);
////////////////////////////////////////////
// compute col J and its residual
Computer(0, nx, J, J+1, colU);
c[J] = false;
for (unsigned int l=0; l<k; ++l) {
FReal *const u = U + nx*l;
FReal *const v = V + ny*l;
FBlas::axpy(nx, FReal(-1. * v[J]), u, colU);
}
// find max of residual and argmax
maxval = 0.0;
for (unsigned int i=0; i<nx; ++i) {
const FReal abs_val = FMath::Abs(colU[i]);
if (r[i] && maxval < abs_val) {
maxval = abs_val;
I = i;
}
}
////////////////////////////////////////////
// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
FReal normuuvv(0.);
for (unsigned int l=0; l<k; ++l)
normuuvv += FBlas::scpr(nx, colU, U + nx*l) * FBlas::scpr(ny, V + ny*l, colV);
norm2uv = FBlas::scpr(nx, colU, colU) * FBlas::scpr(ny, colV, colV);
norm2S += norm2uv + 2*normuuvv;
////////////////////////////////////////////
// increment low-rank
++k;
} while (norm2uv > eps2 * norm2S);
delete [] r;
delete [] c;
}
/*! Precomputes the 16 far-field interactions (due to symmetries in their
arrangement all 316 far-field interactions can be represented by
permutations of the 16 we compute in this function). Depending on whether
......@@ -338,9 +127,9 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal
unsigned int rank;
#ifdef FULLY_PIVOTED_ACASVD
fACA(U, nnodes, nnodes, Epsilon, UU, VV, rank);
FAca::fACA(U, nnodes, nnodes, Epsilon, UU, VV, rank);
#else
pACA(Computer, nnodes, nnodes, Epsilon, UU, VV, rank);
FAca::pACA(Computer, nnodes, nnodes, Epsilon, UU, VV, rank);
#endif
// QR decomposition
......
// ===================================================================================
// Copyright ScalFmm 2011 INRIA
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FACA_HPP
#define FACA_HPP
#include "FGlobal.hpp"
#include "FSvd.hpp"
/**
* @class FAca
*
* @brief This class provides the function to perform fully or partially pivoted ACA.
*
*/
namespace FAca {
/*! The fully pivoted adaptive cross approximation (fACA) compresses a
far-field interaction as \f$K\sim UV^\top\f$. The fACA requires all entries
to be computed beforehand, then the compression follows in
\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
\f$\varepsilon\f$. The matrix K will be destroyed as a result.
@param[in] K far-field to be approximated
@param[in] nx number of rows
@param[in] ny number of cols
@param[in] eps prescribed accuracy
@param[out] U matrix containing \a k column vectors
@param[out] V matrix containing \a k row vectors
@param[out] k final low-rank depends on prescribed accuracy \a eps
*/
template <class FReal>
void fACA(FReal *const K,
const unsigned int nx, const unsigned int ny,
const double eps, FReal* &U, FReal* &V, unsigned int &k)
{
// control vectors (true if not used, false if used)
bool *const r = new bool[nx];
bool *const c = new bool[ny];
for (unsigned int i=0; i<nx; ++i) r[i] = true;
for (unsigned int j=0; j<ny; ++j) c[j] = true;
// compute Frobenius norm of original Matrix K
FReal norm2K = 0;
for (unsigned int j=0; j<ny; ++j) {
const FReal *const colK = K + j*nx;
norm2K += FBlas::scpr(nx, colK, colK);
}
// initialize rank k and UV'
k = 0;
const unsigned int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk];
V = new FReal[ny * maxk];
FBlas::setzero(nx*maxk, U);
FBlas::setzero(ny*maxk, V);
FReal norm2R;
////////////////////////////////////////////////
// start fully pivoted ACA
do {
// find max(K) and argmax(K)
FReal maxK = 0.;
unsigned int pi=0, pj=0;
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
const FReal *const colK = K + j*nx;
for (unsigned int i=0; i<nx; ++i)
if (r[i] && maxK < FMath::Abs(colK[i])) {
maxK = FMath::Abs(colK[i]);
pi = i;
pj = j;
}
}
// copy pivot cross into U and V
FReal *const colU = U + k*nx;
FReal *const colV = V + k*ny;
const FReal pivot = K[pj*nx + pi];
for (unsigned int i=0; i<nx; ++i) if (r[i]) colU[i] = K[pj*nx + i];
for (unsigned int j=0; j<ny; ++j) if (c[j]) colV[j] = K[j *nx + pi] / pivot;
// don't use these cols and rows anymore
c[pj] = false;
r[pi] = false;
// subtract k-th outer product from K
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
FReal *const colK = K + j*nx;
FBlas::axpy(nx, FReal(-1. * colV[j]), colU, colK);
}
// compute Frobenius norm of updated K
norm2R = 0.0;
for (unsigned int j=0; j<ny; ++j)
if (c[j]) {
const FReal *const colK = K + j*nx;
norm2R += FBlas::scpr(nx, colK, colK);
}
// increment rank k
++k ;
} while (norm2R > eps*eps * norm2K);
////////////////////////////////////////////////
delete [] r;
delete [] c;
}
/*! The partially pivoted adaptive cross approximation (pACA) compresses a
far-field interaction as \f$K\sim UV^\top\f$. The pACA computes the matrix
entries on the fly, as they are needed. The compression follows in
\f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy
\f$\varepsilon\f$.
@tparam ComputerType the functor type which allows to compute matrix entries
@param[in] Computer the entry-computer functor
@param[in] eps prescribed accuracy
@param[in] nx number of rows
@param[in] ny number of cols
@param[out] U matrix containing \a k column vectors
@param[out] V matrix containing \a k row vectors
@param[out] k final low-rank depends on prescribed accuracy \a eps
*/
template <class FReal, typename ComputerType>
void pACA(const ComputerType& Computer,
const unsigned int nx, const unsigned int ny,
const FReal eps, FReal* &U, FReal* &V, unsigned int &k)
{
// control vectors (true if not used, false if used)
bool *const r = new bool[nx];
bool *const c = new bool[ny];
for (unsigned int i=0; i<nx; ++i) r[i] = true;
for (unsigned int j=0; j<ny; ++j) c[j] = true;
// initialize rank k and UV'
k = 0;
const FReal eps2 = eps * eps;
const unsigned int maxk = (nx + ny) / 2;
U = new FReal[nx * maxk];
V = new FReal[ny * maxk];
// initialize norm
FReal norm2S(0.);
FReal norm2uv(0.);
////////////////////////////////////////////////
// start partially pivoted ACA
unsigned int J = 0, I = 0;
do {
FReal *const colU = U + nx*k;
FReal *const colV = V + ny*k;
////////////////////////////////////////////
// compute row I and its residual
Computer(I, I+1, 0, ny, colV);
r[I] = false;
for (unsigned int l=0; l<k; ++l) {
FReal *const u = U + nx*l;
FReal *const v = V + ny*l;
FBlas::axpy(ny, FReal(-1. * u[I]), v, colV);
}
// find max of residual and argmax
FReal maxval = 0.;
for (unsigned int j=0; j<ny; ++j) {
const FReal abs_val = FMath::Abs(colV[j]);
if (c[j] && maxval < abs_val) {
maxval = abs_val;
J = j;
}
}
// find pivot and scale column of V
const FReal pivot = FReal(1.) / colV[J];
FBlas::scal(ny, pivot, colV);
////////////////////////////////////////////
// compute col J and its residual
Computer(0, nx, J, J+1, colU);
c[J] = false;
for (unsigned int l=0; l<k; ++l) {
FReal *const u = U + nx*l;
FReal *const v = V + ny*l;
FBlas::axpy(nx, FReal(-1. * v[J]), u, colU);
}
// find max of residual and argmax
maxval = 0.0;
for (unsigned int i=0; i<nx; ++i) {
const FReal abs_val = FMath::Abs(colU[i]);
if (r[i] && maxval < abs_val) {
maxval = abs_val;
I = i;
}
}
////////////////////////////////////////////
// increment Frobenius norm: |Sk|^2 += |uk|^2 |vk|^2 + 2 sumj ukuj vjvk
FReal normuuvv(0.);
for (unsigned int l=0; l<k; ++l)
normuuvv += FBlas::scpr(nx, colU, U + nx*l) * FBlas::scpr(ny, V + ny*l, colV);
norm2uv = FBlas::scpr(nx, colU, colU) * FBlas::scpr(ny, colV, colV);
norm2S += norm2uv + 2*normuuvv;
////////////////////////////////////////////
// increment low-rank
++k;
} while (norm2uv > eps2 * norm2S);
delete [] r;
delete [] c;
}
/*! Perform QR+SVD recompression of the U and V returned by ACA.
This allows to avoid potential redundancies in U and V,
since orthogonality is not ensured by ACA.
@param[in] UU nx \times rank array returned by ACA
@param[in] VV ny \times rank array returned by ACA
@param[in] nx number of rows
@param[in] ny number of cols
@param[in] eps prescribed accuracy
@param[out] K rank\times (nx+ny) array containing U and V after recompression
@param[out] rank final low-rank depends on prescribed accuracy \a eps
*/
template <class FReal>
void recompress(FReal *const UU, FReal *const VV,
const unsigned int nx, const unsigned int ny,
const double eps, FReal* &U, FReal* &VT, unsigned int &rank)
{
// needed for the SVD
int INFO;
// QR decomposition (UU=QuRu & VV=QvRv, thus UUVV^t=Qu(RuRv^t)Qv^t)
FReal* phi = new FReal [rank*rank];
{
// needed for QR
const unsigned int LWORK = 2*4*rank; // for square matrices
//const unsigned int LWORK = 2*std::max(3*minMN+maxMN, 5*minMN);
FReal *const WORK = new FReal [LWORK];
// QR of U and V
FReal* tauU = new FReal [rank];
INFO = FBlas::geqrf(nx, rank, UU, tauU, LWORK, WORK);
assert(INFO==0);
FReal* tauV = new FReal [rank];
INFO = FBlas::geqrf(ny, rank, VV, tauV, LWORK, WORK);
assert(INFO==0);
// phi = Ru Rv'
FReal* rU = new FReal [2 * rank*rank];
FReal* rV = rU + rank*rank;
FBlas::setzero(2 * rank*rank, rU);
for (unsigned int l=0; l<rank; ++l) {
FBlas::copy(l+1, UU + l*nx, rU + l*rank);
FBlas::copy(l+1, VV + l*ny, rV + l*rank);
}
FBlas::gemmt(rank, rank, rank, FReal(1.), rU, rank, rV, rank, phi, rank);
delete [] rU;
// get Qu and Qv
INFO = FBlas::orgqr(nx, rank, UU, tauU, LWORK, WORK); // Qu -> UU
assert(INFO==0);
INFO = FBlas::orgqr(ny, rank, VV, tauV, LWORK, WORK); // Qv -> VV
assert(INFO==0);
delete [] tauU;
delete [] tauV;
}
const unsigned int aca_rank = rank;
// SVD (of phi=RuRv^t, thus phi=UrSVr^t)
FReal *const VrT = new FReal [aca_rank*aca_rank];
FReal *const Sr = new FReal [aca_rank];
{
// needed for SVD
const unsigned int LWORK = 2*4*aca_rank; // for square matrices
//const unsigned int LWORK = 2*std::max(3*minMN+maxMN, 5*minMN);
FReal *const WORK = new FReal [LWORK];
INFO = FBlas::gesvd(aca_rank, aca_rank, phi, Sr, VrT, aca_rank, LWORK, WORK); // Ur -> phi // Sr -> Sr // Vr^t -> VrT
if (INFO!=0){
std::stringstream stream;
stream << INFO;
throw std::runtime_error("SVD did not converge with " + stream.str());
}
rank = FSvd::getRank(Sr, aca_rank, eps);
}
// store Qu(UrS) in U and Vr^t(Qv^t) in VT
{
// allocate
assert(U ==nullptr);
assert(VT==nullptr);
U = new FReal[rank*nx];
VT = new FReal[rank*ny];
// (Ur Sigma)
for (unsigned int r=0; r<rank; ++r)
FBlas::scal(aca_rank, Sr[r], phi + r*aca_rank);
delete [] Sr;
// Qu (Ur Sr) in U
FBlas::gemm(nx, aca_rank, rank, FReal(1.), UU, nx, phi, aca_rank, U, nx);
delete [] phi;
// VT=Vr^t Qv^t
FBlas::gemmt(rank, aca_rank, ny, FReal(1.), VrT, aca_rank, VV, ny, VT, rank);
delete [] VrT;
}
}
};
#endif /* FACA_HPP */
\ No newline at end of file
// ===================================================================================
// Copyright ScalFmm 2011 INRIA
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FSVD_HPP
#define FSVD_HPP
#include "FGlobal.hpp"
#include "Kernels/Chebyshev/FChebTensor.hpp"
/**
* @class FSvd
*
* @brief This class provides some functions related to SVD.
*
*/
namespace FSvd {
/**
* Computes the low-rank \f$k\f$ based on \f$\|K-U\Sigma_kV^\top\|_F \le
* \epsilon \|K\|_F\f$, ie., the truncation rank of the singular value
* decomposition. With the definition of the Frobenius norm \f$\|K\|_F =
* \left(\sum_{i=1}^N \sigma_i^2\right)^{\frac{1}{2}}\f$ the determination of
* the low-rank follows as \f$\|K-U\Sigma_kV^\top\|_F^2 = \sum_{i=k+1}^N
* \sigma_i^2 \le \epsilon^2 \sum_{i=1}^N \sigma_i^2 = \epsilon^2
* \|K\|_F^2\f$.
*
* @param[in] singular_values array of singular values ordered as \f$\sigma_1
* \ge \sigma_2 \ge \dots \ge \sigma_N\f$
* @param[in] eps accuracy \f$\epsilon\f$
*/
template <class FReal, int ORDER>
unsigned int getRank(const FReal singular_values[], const double eps)
{
enum {nnodes = TensorTraits<ORDER>::nnodes};
FReal nrm2(0.);
for (unsigned int k=0; k<nnodes; ++k)
nrm2 += singular_values[k] * singular_values[k];
FReal nrm2k(0.);
for (unsigned int k=nnodes; k>0; --k) {
nrm2k += singular_values[k-1] * singular_values[k-1];
if (nrm2k > eps*eps * nrm2) return k;
}
throw std::runtime_error("rank cannot be larger than nnodes");
return 0;
}
template <class FReal>
unsigned int getRank(const FReal singular_values[], const unsigned int size, const double eps)
{
const FReal nrm2 = FBlas::scpr(size, singular_values, singular_values);
FReal nrm2k(0.);
for (unsigned int k=size; k>0; --k) {
nrm2k += singular_values[k-1] * singular_values[k-1];
if (nrm2k > eps*eps * nrm2) return k;
}
throw std::runtime_error("rank cannot be larger than size");
return 0;
}
};
#endif /* FSVD_HPP */
\ No newline at end of file
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
// ==== CMAKE =====
// @FUSE_MPI
// @FUSE_FFT
// @FUSE_BLAS
// ================
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include "../../Src/Kernels/Rotation/FRotationCell.hpp"
#include "../../Src/Kernels/Rotation/FRotationKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifCell.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp"
#include "../../Src/Utils/FParameters.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Containers/FOctree.hpp"
#include "../../Src/Containers/FVector.hpp"
#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/Files/FMpiTreeBuilder.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/Core/FFmmAlgorithmThread.hpp"
#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
#include "../../Src/BalanceTree/FLeafBalance.hpp"
#include "../../Src/Utils/FParameterNames.hpp"
/**
* This program runs the FMM Algorithm Distributed with the uniform kernel
*/
// Simply create particles and try the kernels
int main(int argc, char* argv[])
{
FHelpDescribeAndExit(argc, argv,
"Test with MPI the uniform FMM and compare it to the direct computation for debugging purpose.",
FParameterDefinitions::NbParticles, FParameterDefinitions::OctreeHeight,
FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads);
typedef double FReal;
const unsigned int ORDER = 5;
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FSimpleLeaf<FReal, ContainerClass > LeafClass;
// typedef FRotationCell<FReal,ORDER> CellClass;
// typedef FRotationKernel<FReal,CellClass,ContainerClass,ORDER> KernelClass;
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
typedef FUnifCell<FReal,ORDER> CellClass;
typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FOctree<FReal,CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
FMpi app(argc,argv);
const FSize nbParticles = FParameters::getValue(argc,argv, FParameterDefinitions::NbParticles.options, 10000000ULL);
const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, omp_get_max_threads());
FTic time;
std::cout << ">> This executable has to be used to test Proc Uniform Algorithm. \n";
omp_set_num_threads(NbThreads);
std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
// init particles position and physical value
struct TestParticle{
FPoint<FReal> position;
FReal physicalValue;
const FPoint<FReal>& getPosition(){
return position;
}
};
// open particle file
std::cout << "Creating : " << nbParticles << "\n" << std::endl;
FRandomLoader<FReal> loader(nbParticles, 1.0, FPoint<FReal>(0,0,0), app.global().processId());
OctreeClass tree(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox());
time.tic();
TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
memset(particles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
loader.fillParticle(&particles[idxPart].position);
particles[idxPart].physicalValue = 1.0;
}
FVector<TestParticle> finalParticles;
FLeafBalance balancer;
FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(app.global(),particles,
loader.getNumberOfParticles(),
tree.getBoxCenter(),
tree.getBoxWidth(),tree.getHeight(),
&finalParticles, &balancer);
{ // -----------------------------------------------------
std::cout << "Creating & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl;
std::cout << "For a total of " << loader.getNumberOfParticles() * app.global().processCount() << " particles ..." << std::endl;
std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
time.tic();
for(FSize idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){
// put in tree
tree.insert(finalParticles[idxPart].position, idxPart, finalParticles[idxPart].physicalValue);
}
time.tac();
std::cout << "Done " << "(@Creating and Inserting Particles = "
<< time.elapsed() << "s)." << std::endl;
} // -----------------------------------------------------
delete[] particles;
particles = 0;
{ // -----------------------------------------------------
std::cout << "Uniform FMM (ORDER="<< ORDER << ") ... " << std::endl;
time.tic();
// KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox());
// Create Matrix Kernel
const MatrixKernelClass MatrixKernel;
KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel);
FmmClass algorithm(app.global(),&tree, &kernels);
time.tac();
std::cout << "Done " << "(@Init = " << time.elapsed() << "s)." << std::endl;
time.tic();
algorithm.execute();
time.tac();
std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl;
} // -----------------------------------------------------
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment