Mentions légales du service

Skip to content
Snippets Groups Projects
Commit cb425050 authored by Matthias Messner's avatar Matthias Messner
Browse files

some optimizations in the M2L

parent 2fa2c86c
No related branches found
No related tags found
No related merge requests found
...@@ -154,6 +154,57 @@ public: ...@@ -154,6 +154,57 @@ public:
void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[343],
const int NumSourceCells,
const int TreeLevel)
{
// permute and copy multipole expansion
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = SymHandler->pindices[idx];
const unsigned int count = (countExp[pidx])++;
FReal *const mul = Mul[pidx] + count*nnodes;
const unsigned int *const pvec = SymHandler->pvectors[idx];
const FReal *const MultiExp = SourceCells[idx]->getMultipole();
for (unsigned int n=0; n<nnodes; ++n)
mul[pvec[n]] = MultiExp[n];
}
}
// multiply (mat-mat-mul)
FReal Compressed [nnodes * 30];
const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));
const FReal scale(AbstractBaseClass::MatrixKernel->getScaleFactor(CellWidth));
for (unsigned int pidx=0; pidx<343; ++pidx) {
const unsigned int count = countExp[pidx];
if (count) {
const unsigned int rank = SymHandler->LowRank[pidx];
FBlas::gemtm(nnodes, rank, count, FReal(1.),
SymHandler->K[pidx]+rank*nnodes, nnodes, Mul[pidx], nnodes, Compressed, rank);
FBlas::gemm( nnodes, rank, count, scale,
SymHandler->K[pidx], nnodes, Compressed, rank, Loc[pidx], nnodes);
}
}
// permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal();
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = SymHandler->pindices[idx];
const unsigned int count = (countExp[pidx])++;
const FReal *const loc = Loc[pidx] + count*nnodes;
const unsigned int *const pvec = SymHandler->pvectors[idx];
for (unsigned int n=0; n<nnodes; ++n)
LocalExpansion[n] += loc[pvec[n]];
}
}
}
/*
void M2L(CellClass* const FRestrict TargetCell, void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[343], const CellClass* SourceCells[343],
const int NumSourceCells, const int NumSourceCells,
...@@ -198,8 +249,8 @@ public: ...@@ -198,8 +249,8 @@ public:
} }
} }
} }
*/
/* /*
void M2L(CellClass* const FRestrict TargetCell, void M2L(CellClass* const FRestrict TargetCell,
......
...@@ -99,8 +99,8 @@ class FChebSymmetries ...@@ -99,8 +99,8 @@ class FChebSymmetries
const unsigned int getPermutationArrayAndIndex(const int i, const int j, const int k, unsigned int getPermutationArrayAndIndex(const int i, const int j, const int k,
unsigned int permutation[nnodes]) const unsigned int permutation[nnodes]) const
{ {
// find right quadrant index (if < 0 then 0, else 1) // find right quadrant index (if < 0 then 0, else 1)
const unsigned int qidx = getQuadIdx(i,j,k); const unsigned int qidx = getQuadIdx(i,j,k);
......
...@@ -105,8 +105,8 @@ int main(int argc, char* argv[]) ...@@ -105,8 +105,8 @@ int main(int argc, char* argv[])
typedef FChebMatrixKernelR MatrixKernelClass; typedef FChebMatrixKernelR MatrixKernelClass;
typedef FChebCell<ORDER> CellClass; typedef FChebCell<ORDER> CellClass;
typedef FOctree<ParticleClass,CellClass,ContainerClass,LeafClass> OctreeClass; typedef FOctree<ParticleClass,CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FChebKernel<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; //typedef FChebKernel<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
//typedef FChebSymKernel<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; typedef FChebSymKernel<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
//typedef FFmmAlgorithm<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; //typedef FFmmAlgorithm<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
typedef FFmmAlgorithmThread<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; typedef FFmmAlgorithmThread<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment