Commit 45b9d50e authored by berenger-bramas's avatar berenger-bramas
Browse files

Progress in the development of the new kernel.

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@293 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 57e8751a
......@@ -6,6 +6,9 @@
#include "../Utils/FGlobal.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FMemUtils.hpp"
#include "../Containers/FTreeCoordinate.hpp"
#include "FHarmonic.hpp"
......@@ -18,15 +21,78 @@
template< class ParticleClass, class CellClass, class ContainerClass>
class FElecForcesKernels : public FAbstractKernels<ParticleClass,CellClass,ContainerClass> {
int devP;
int treeHeight;
FHarmonic harmonic;
FComplexe* preL2LTransitions;
FComplexe* preM2MTransitions;
FComplexe* preM2LTransitions;
int* preExpRedirJ;
int indexTransition(const int level, const int child){
return level * 8 * harmonic.getExpSize() + child * harmonic.getExpSize();
}
void init(){
preL2LTransitions = new FComplexe[treeHeight * 8 * harmonic.getExpSize()];
preM2MTransitions = new FComplexe[treeHeight * 8 * harmonic.getExpSize()];
FReal treeWidthAtLevel = this->treeWidthAtRoot/2;
for(int idxLevel = 0 ; idxLevel < this->TreeHeight - 1 ; ++idxLevel ){
const F3DPosition father(treeWidthAtLevel,treeWidthAtLevel,treeWidthAtLevel);
treeWidthAtLevel /= 2;
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild ){
FTreeCoordinate childBox;
childBox.setPositionFromMorton(idxChild,1);
const F3DPosition M2MVector (
father.getX() - (treeWidthAtLevel * FReal(1 + (childBox.getX() * 2))),
father.getY() - (treeWidthAtLevel * FReal(1 + (childBox.getY() * 2))),
father.getZ() - (treeWidthAtLevel * FReal(1 + (childBox.getZ() * 2)))
);
harmonic.computeInner(FSpherical(M2MVector));
copyall<FComplexe>(&preM2MTransitions[indexTransition(idxLevel,idxChild)], harmonic.result(), harmonic.getExpSize());
const F3DPosition L2LVector (
(treeWidthAtLevel * FReal(1 + (childBox.getX() * 2))) - father.getX(),
(treeWidthAtLevel * FReal(1 + (childBox.getY() * 2))) - father.getY(),
(treeWidthAtLevel * FReal(1 + (childBox.getZ() * 2))) - father.getZ()
);
harmonic.computeInner(FSpherical(L2LVector));
copyall<FComplexe>(&preL2LTransitions[indexTransition(idxLevel,idxChild)], harmonic.result(), harmonic.getExpSize());
}
}
preM2LTransitions = new FComplexe[treeHeight * (7 * 7 * 7) * ];
preExpRedirJ = new int[2 * devP + 1];
for( int h = 0; h <= (2 * devP) ; ++h ){
preExpRedirJ[h] = static_cast<int>( h * ( h + 1 ) * 0.5 );
}
}
public:
FElecForcesKernels(const int inDevP)
: devP(inDevP), harmonic(inDevP) {
/** Kernel constructor */
FElecForcesKernels(const int inDevP, const int inTreeHeight)
: devP(inDevP), treeHeight(inTreeHeight), harmonic(inDevP),
preL2LTransitions(0), preM2MTransitions(0), preM2LTransitions(0), preExpRedirJ(0) {
init();
}
/** Default destructor */
~FElecForcesKernels(){
delete[] preL2LTransitions;
delete[] preM2MTransitions;
delete[] preM2LTransitions;
delete[] preExpRedirJ;
}
/** P2M with a cell and all its particles */
......@@ -43,19 +109,41 @@ public:
}
}
/** Do nothing */
void M2M(CellClass* const FRestrict , const CellClass*const FRestrict *const FRestrict , const int ) {
/** M2M with a cell and all its child */
void M2M(CellClass* const FRestrict inPole, const CellClass *const FRestrict *const FRestrict inChild, const int inLevel) {
FComplexe* const multipole_exp_target = inPole->getMultipole();
// iter on each child and process M2M
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(inChild[idxChild]){
multipoleToMultipole(multipole_exp_target, inChild[idxChild]->getMultipole(), preM2MTransitions[indexTransition(inLevel,idxChild)]);
}
}
}
/** Do nothing */
virtual void M2L(CellClass* const FRestrict , const CellClass* [], const int , const int ) {
/** M2L with a cell and all the existing neighbors */
void M2L(CellClass* const FRestrict pole, const CellClass* distantNeighbors[189],
const int size, const int inLevel) {
const FTreeCoordinate coordCenterHalphSizeDim(pole->getCoordinate(), halphSize1Dim);
// For all neighbors compute M2L
for(int idxSize = 0 ; idxSize < size ; ++idxSize){
const FTreeCoordinate& coordNeighbors = distantNeighbors[idxSize]->getCoordinate();
const FComplexe* const transitionVector = preM2LTransitions[inLevel]
[(coordCenterHalphSizeDim.getX() - coordNeighbors.getX())]
[(coordCenterHalphSizeDim.getY() - coordNeighbors.getY())]
[(coordCenterHalphSizeDim.getZ() - coordNeighbors.getZ())];
multipoleToLocal(pole->getLocal(), distantNeighbors[idxSize]->getMultipole(), transitionVector);
}
}
/** Do nothing */
void L2L(const CellClass* const FRestrict , CellClass* FRestrict *const FRestrict , const int ) {
/** L2L with a cell and all its child */
void L2L(const CellClass* const FRestrict pole, CellClass* FRestrict *const FRestrict child, const int inLevel) {
// iter on each child and process L2L
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(child[idxChild]){
localToLocal(child[idxChild]->getLocal(), pole->getLocal(), preL2LTransitions[indexTransition(inLevel,idxChild)]);
}
}
}
/** L2P with a cell and all its particles */
......@@ -191,7 +279,7 @@ public:
///////////////////////////////////////////////////////////////////////////////
// Computation
///////////////////////////////////////////////////////////////////////////////
private:
/** P2M computation
*/
......@@ -213,6 +301,188 @@ public:
}
}
void multipoleToMultipole(FComplexe* const multipole_exp_target,
const FComplexe* const multipole_exp_src, const FComplexe* const M2M_transfer){
for(int n = 0 ; n <= FMB_Info_P ; ++n ){
// l<0 // (-1)^l
FReal pow_of_minus_1_for_l = static_cast<FReal>( n % 2 ? -1.0 : 1.0);
// O_n^l : here points on the source multipole expansion term of degree n and order |l|
const FComplexe* p_src_exp_term = multipole_exp_src + preExpRedirJ[n]+n;
int l = -n;
for(; l<0 ; ++l, --p_src_exp_term, pow_of_minus_1_for_l = -pow_of_minus_1_for_l){
for(int j = n ; j<= FMB_Info_P ; ++j ){
// M_j^k
FComplexe *p_target_exp_term = multipole_exp_target + preExpRedirJ[j];
// Inner_{j-n}^{k-l} : here points on the M2M transfer function/expansion term of degree n-j and order |k-l|
const FComplexe *p_Inner_term= M2M_transfer + preExpRedirJ[j-n]-l /* k==0 */;
// since n-j+l<0
for(int k=0 ; k <= (j-n+l) ; ++k, ++p_target_exp_term, ++p_Inner_term){ // l<0 && k>=0 => k-l>0
p_target_exp_term->incReal( pow_of_minus_1_for_l *
((p_src_exp_term->getReal() * p_Inner_term->getReal()) +
(p_src_exp_term->getImag() * p_Inner_term->getImag())));
p_target_exp_term->incImag( pow_of_minus_1_for_l *
((p_src_exp_term->getReal() * p_Inner_term->getImag()) -
(p_src_exp_term->getImag() * p_Inner_term->getReal())));
} // for k
} // for j
} // for l
// l>=0
for(; l <= n ; ++l, ++p_src_exp_term, pow_of_minus_1_for_l = -pow_of_minus_1_for_l){
for( int j=n ; j <= FMB_Info_P ; ++j ){
// (-1)^k
FReal pow_of_minus_1_for_k = static_cast<FReal>( FMath::Max(0,n-j+l) %2 ? -1.0 : 1.0 );
// M_j^k
FComplexe *p_target_exp_term = multipole_exp_target + preExpRedirJ[j] + FMath::Max(0,n-j+l);
// Inner_{j-n}^{k-l} : here points on the M2M transfer function/expansion term of degree n-j and order |k-l|
const FComplexe *p_Inner_term = M2M_transfer + preExpRedirJ[j-n] + l - FMath::Max(0,n-j+l);// -(k-l)
int k = FMath::Max(0,n-j+l);
for(; k <= (j-n+l) && (k-l) < 0 ; ++k, ++p_target_exp_term, --p_Inner_term, pow_of_minus_1_for_k = -pow_of_minus_1_for_k){ /* l>=0 && k-l<0 */
p_target_exp_term->incReal( pow_of_minus_1_for_k * pow_of_minus_1_for_l *
((p_src_exp_term->getReal() * p_Inner_term->getReal()) +
(p_src_exp_term->getImag() * p_Inner_term->getImag())));
p_target_exp_term->incImag(pow_of_minus_1_for_k * pow_of_minus_1_for_l *
((p_src_exp_term->getImag() * p_Inner_term->getReal()) -
(p_src_exp_term->getReal() * p_Inner_term->getImag())));
} // for k
for(; k <= (j - n + l) ; ++k, ++p_target_exp_term, ++p_Inner_term){ // l>=0 && k-l>=0
p_target_exp_term->incReal(
(p_src_exp_term->getReal() * p_Inner_term->getReal()) -
(p_src_exp_term->getImag() * p_Inner_term->getImag()));
p_target_exp_term->incImag(
(p_src_exp_term->getImag() * p_Inner_term->getReal()) +
(p_src_exp_term->getReal() * p_Inner_term->getImag()));
} // for k
} // for j
} // for l
} // for n
}
/** M2L
*/
void multipoleToLocal(FComplexe*const local_exp, const FComplexe* const multipole_exp_src,
const FComplexe* const M2L_transfer){
FComplexe* p_target_exp_term = local_exp;
// L_j^k
int start_for_j = 0;
// HPMSTART(51, "M2L computation (loops)");
for (int j = start_for_j ; j <= FMB_Info_P ; ++j){
int stop_for_n = devP;
//stop_for_n = FMB_Info_P - j;
// (-1)^k
FReal pow_of_minus_1_for_k = 1.0;
for (int k = 0 ; k <= j ; ++k, pow_of_minus_1_for_k = -pow_of_minus_1_for_k, ++p_target_exp_term){
// (-1)^n
FReal pow_of_minus_1_for_n = 1.0;
for (int n = 0 ; n <= stop_for_n ; ++n, pow_of_minus_1_for_n = -pow_of_minus_1_for_n){
// O_n^l : here points on the source multipole expansion term of degree n and order |l|
const FComplexe *p_src_exp_term = multipole_exp_src + preExpRedirJ[n] + n;
// Outer_{j+n}^{-k-l} : here points on the M2L transfer function/expansion term of degree j+n and order |-k-l|
const FComplexe *p_Outer_term = M2L_transfer + preExpRedirJ[n+j] + k+n;
FReal pow_of_minus_1_for_l = pow_of_minus_1_for_n; // (-1)^l
// We start with l=n (and not l=-n) so that we always set p_Outer_term to a correct value in the first loop.
int l=n;
for ( ; l>0 ; --l, pow_of_minus_1_for_l = -pow_of_minus_1_for_l, --p_src_exp_term, --p_Outer_term){ // we have -k-l<0 and l>0
p_target_exp_term->incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k *
((p_src_exp_term->getReal() * p_Outer_term->getReal()) +
(p_src_exp_term->getImag() * p_Outer_term->getImag())));
p_target_exp_term->incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k *
((p_src_exp_term->getImag() * p_Outer_term->getReal()) -
(p_src_exp_term->getReal() * p_Outer_term->getImag())));
}
for (; l>=-n && -k-l<0 ; --l, pow_of_minus_1_for_l = -pow_of_minus_1_for_l, ++p_src_exp_term, --p_Outer_term){ // we have -k-l<0 and l<=0
p_target_exp_term->incReal( pow_of_minus_1_for_k *
((p_src_exp_term->getReal() * p_Outer_term->getReal()) -
(p_src_exp_term->getImag() * p_Outer_term->getImag())));
p_target_exp_term->decImag( pow_of_minus_1_for_k *
((p_src_exp_term->getImag() * p_Outer_term->getReal()) +
(p_src_exp_term->getReal() * p_Outer_term->getImag())));
}
for (; l>=-n; --l, pow_of_minus_1_for_l = -pow_of_minus_1_for_l, ++p_src_exp_term, ++p_Outer_term){ // we have -k-l>=0 and l<=0
p_target_exp_term->incReal( pow_of_minus_1_for_l *
((p_src_exp_term->getReal() * p_Outer_term->getReal()) +
(p_src_exp_term->getImag() * p_Outer_term->getImag())));
p_target_exp_term->incImag( pow_of_minus_1_for_l *
((p_src_exp_term->getReal() * p_Outer_term->getImag()) -
(p_src_exp_term->getImag() * p_Outer_term->getReal())));
}
}
}
}
}
/** L2L
*/
void localToLocal(FComplexe* const FRestrict local_exp_target, const FComplexe* const FRestrict local_exp_src,
const FComplexe* const FRestrict L2L_tranfer){
// L_j^k
FComplexe* p_target_exp_term = local_exp_target;
for (int j=0 ; j<= FMB_Info_P ; ++j){
// (-1)^k
FReal pow_of_minus_1_for_k = 1.0;
for (int k=0 ; k <= j ; ++k, pow_of_minus_1_for_k = -pow_of_minus_1_for_k, ++p_target_exp_term){
for (int n=j; n<=FMB_Info_P;++n){
// O_n^l : here points on the source multipole expansion term of degree n and order |l|
const FComplexe* p_src_exp_term = local_exp_src + preExpRedirJ[n] + n-j+k;
//printf("preExpRedirJ[n] + n-j+k %d\n", preExpRedirJ[n] + n-j+k);
int l = n-j+k;
// Inner_{n-j}^{l-k} : here points on the L2L transfer function/expansion term of degree n-j and order |l-k|
const FComplexe* p_Inner_term = L2L_tranfer + preExpRedirJ[n-j] + l-k;
for ( ; l-k>0; --l, --p_src_exp_term, --p_Inner_term){ /* l>0 && l-k>0 */
p_target_exp_term->incReal( (p_src_exp_term->getReal() * p_Inner_term->getReal()) -
(p_src_exp_term->getImag() * p_Inner_term->getImag()));
p_target_exp_term->incImag( (p_src_exp_term->getImag() * p_Inner_term->getReal()) +
(p_src_exp_term->getReal() * p_Inner_term->getImag()));
}
// (-1)^l
FReal pow_of_minus_1_for_l = static_cast<FReal>((l%2) ? -1.0 : 1.0);
for (; l>0 && l>=j-n+k; --l, pow_of_minus_1_for_l = -pow_of_minus_1_for_l, --p_src_exp_term, ++p_Inner_term){ /* l>0 && l-k<=0 */
p_target_exp_term->incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k *
((p_src_exp_term->getReal() * p_Inner_term->getReal()) +
(p_src_exp_term->getImag() * p_Inner_term->getImag())));
p_target_exp_term->incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k *
((p_src_exp_term->getImag() * p_Inner_term->getReal()) -
(p_src_exp_term->getReal() * p_Inner_term->getImag())));
}
// l<=0 && l-k<=0
for (; l>=j-n+k; --l, ++p_src_exp_term, ++p_Inner_term){
p_target_exp_term->incReal( pow_of_minus_1_for_k *
((p_src_exp_term->getReal() * p_Inner_term->getReal()) -
(p_src_exp_term->getImag() * p_Inner_term->getImag())));
p_target_exp_term->decImag( pow_of_minus_1_for_k *
((p_src_exp_term->getImag() * p_Inner_term->getReal()) +
(p_src_exp_term->getReal() * p_Inner_term->getImag())));
}
}
}
}
}
/** L2P
*/
void localToParticle(ParticleClass*const particle, const F3DPosition& local_position,
const FComplexe*const local_exp){
FReal force_vector_in_local_base_x = 0;
......@@ -223,9 +493,9 @@ public:
harmonic.computeInnerTheta( spherical );
// The maximum degree used here will be P.
const FComplexe* p_Y_term = current_thread_Y+1;
const FComplexe* p_Y_theta_derivated_term = current_thread_Y_theta_derivated+1;
const FComplexe* p_local_exp_term = local_exp+1;
const FComplexe* p_Y_term = harmonic.result() + 1;
const FComplexe* p_Y_theta_derivated_term = harmonic.resultThetaDerivated() + 1;
const FComplexe* p_local_exp_term = local_exp + 1;
for (int j = 1 ; j <= devP ; ++j ){
FReal exp_term_aux_real = 0.0;
......
......@@ -48,8 +48,15 @@ class FHarmonic {
FReal* sphereHarmoInnerCoef;
FReal* sphereHarmoOuterCoef;
void init(){
harmonic = new FComplexe[expSize];
cosSin = new FComplexe[2 * devP + 1];
legendre = new FReal[expSize];
thetaDerivatedResult = new FComplexe[expSize];
sphereHarmoInnerCoef = new FReal[int(((inDevP*2)+1) * ((inDevP*2)+2) * 0.5)];
sphereHarmoOuterCoef = new FReal[devP + 1];
void sphericalHarmonicInitialize(){
FReal factOuter = 1.0;
for(int idxP = 0 ; idxP <= devP; factOuter *= FReal(++idxP) ){
sphereHarmoOuterCoef[idxP] = factOuter;
......@@ -111,21 +118,23 @@ class FHarmonic {
}
}
FHarmonic& operator=(const FHarmonic&){ return *this;}
public:
explicit FHarmonic(const int inDevP)
: devP(inDevP),expSize(int(((inDevP)+1) * ((inDevP)+2) * 0.5)),
harmonic(0), cosSin(0), legendre(0), thetaDerivatedResult(0),
sphereHarmoInnerCoef(0), sphereHarmoOuterCoef(0) {
harmonic = new FComplexe[expSize];
cosSin = new FComplexe[2 * devP + 1];
legendre = new FReal[expSize];
init();
}
thetaDerivatedResult = new FComplexe[expSize];
sphereHarmoInnerCoef = new FReal[expSize];
sphereHarmoOuterCoef = new FReal[devP + 1];
FHarmonic(const FHarmonic& other)
: devP(other.devP),expSize(int(((other.devP)+1) * ((other.devP)+2) * 0.5)),
harmonic(0), cosSin(0), legendre(0), thetaDerivatedResult(0),
sphereHarmoInnerCoef(0), sphereHarmoOuterCoef(0) {
sphericalHarmonicInitialize();
init();
}
~FHarmonic(){
......@@ -137,11 +146,15 @@ public:
delete[] sphereHarmoOuterCoef;
}
FComplexe* data(){
int getExpSize() const{
return expSize;
}
FComplexe* result(){
return harmonic;
}
const FComplexe* data() const {
const FComplexe* result() const {
return harmonic;
}
......@@ -153,6 +166,23 @@ public:
return harmonic[index];
}
FComplexe* resultThetaDerivated(){
return thetaDerivatedResult;
}
const FComplexe* resultThetaDerivated() const {
return thetaDerivatedResult;
}
FComplexe& resultThetaDerivated(const int index){
return thetaDerivatedResult[index];
}
const FComplexe& resultThetaDerivated(const int index) const{
return thetaDerivatedResult[index];
}
void computeInner(const FSpherical& inSphere){
computeCosSin(inSphere.getPhi());
......
......@@ -46,6 +46,12 @@ namespace FMemUtils {
}
}
};
template <class TypeClass>
void copyall(TypeClass*const dest, const TypeClass*const source, const int nbElements){
for(int idx = 0 ; idx < nbElements ; ++idx){
dest[idx] = source[idx];
}
}
}
#endif // FMEMUTILS_HPP
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment