diff --git a/Src/Kernels/Interpolation/FInterpCell.hpp b/Src/Kernels/Interpolation/FInterpCell.hpp index 8b49b297b2f8ba7ba06973f957a1d00c57480ae1..3f450ee9feeacd7e5e687e4744fa9c3fa6797b75 100644 --- a/Src/Kernels/Interpolation/FInterpCell.hpp +++ b/Src/Kernels/Interpolation/FInterpCell.hpp @@ -24,7 +24,7 @@ #include "../../Components/FBasicCell.hpp" #include "../../Extensions/FExtendCellType.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" /** * @author Pierre Blanchard (pierre.blanchard@inria.fr) @@ -51,17 +51,17 @@ class FInterpCell : public FBasicCell FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion // PB: Store multipole and local expansion in Fourier space - FComplexe transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize]; - FComplexe transformed_local_exp[NLHS * NVALS * TransformedVectorSize]; + FComplex transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize]; + FComplex transformed_local_exp[NLHS * NVALS * TransformedVectorSize]; public: FInterpCell(){ memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize); memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize); memset(transformed_multipole_exp, 0, - sizeof(FComplexe) * NRHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NRHS * NVALS * TransformedVectorSize); memset(transformed_local_exp, 0, - sizeof(FComplexe) * NLHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NLHS * NVALS * TransformedVectorSize); } ~FInterpCell() {} @@ -90,20 +90,20 @@ public: } /** Get Transformed Multipole */ - const FComplexe* getTransformedMultipole(const int inRhs) const{ + const FComplex* getTransformedMultipole(const int inRhs) const{ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; } /** Get Transformed Local */ - const FComplexe* getTransformedLocal(const int inRhs) const{ + const FComplex* getTransformedLocal(const int inRhs) const{ return this->transformed_local_exp + inRhs*TransformedVectorSize; } /** Get Transformed Multipole */ - FComplexe* getTransformedMultipole(const int inRhs){ + FComplex* getTransformedMultipole(const int inRhs){ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; } /** Get Transformed Local */ - FComplexe* getTransformedLocal(const int inRhs){ + FComplex* getTransformedLocal(const int inRhs){ return this->transformed_local_exp + inRhs*TransformedVectorSize; } @@ -117,9 +117,9 @@ public: memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize); memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize); memset(transformed_multipole_exp, 0, - sizeof(FComplexe) * NRHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NRHS * NVALS * TransformedVectorSize); memset(transformed_local_exp, 0, - sizeof(FComplexe) * NLHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NLHS * NVALS * TransformedVectorSize); } /////////////////////////////////////////////////////// @@ -171,7 +171,7 @@ public: } static constexpr int GetSize(){ - return (NRHS+NLHS)*NVALS*VectorSize * (int) sizeof(FReal) + (NRHS+NLHS)*NVALS*TransformedVectorSize * (int) sizeof(FComplexe); + return (NRHS+NLHS)*NVALS*VectorSize * (int) sizeof(FReal) + (NRHS+NLHS)*NVALS*TransformedVectorSize * (int) sizeof(FComplex); } diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp index f8b0c87520599e24b249d48c444f854334327c4a..4c1d410a84dfaecb77c554dc7e1b6c372ffcf8b4 100644 --- a/Src/Kernels/P2P/FP2P.hpp +++ b/Src/Kernels/P2P/FP2P.hpp @@ -34,10 +34,10 @@ namespace FP2P { */ template inline void MutualParticles(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal sourcePhysicalValue, - FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ // Compute kernel of interaction... const FPoint sourcePoint(sourceX,sourceY,sourceZ); @@ -75,9 +75,9 @@ inline void MutualParticles(const FReal sourceX,const FReal sourceY,const FReal */ template inline void NonMutualParticles(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal sourcePhysicalValue, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ // Compute kernel of interaction... const FPoint sourcePoint(sourceX,sourceY,sourceZ); @@ -112,7 +112,7 @@ inline void NonMutualParticles(const FReal sourceX,const FReal sourceY,const FRe */ template inline void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -122,158 +122,158 @@ inline void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerCl const FReal*const targetsZ = inTargets->getPositions()[2]; for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); - FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); - FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); - FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); - - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - sourcesForcesX[idxSource] -= coef[0]; - sourcesForcesY[idxSource] -= coef[1]; - sourcesForcesZ[idxSource] -= coef[2]; - sourcesPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - - - }// j - }// i - } - } - } + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); + FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); + FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); + FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); + + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); + + // potentials + FReal potentialCoef; + if(i==j) + potentialCoef = inv_distance - ri2 * inv_distance_pow3; + else + potentialCoef = - r[i] * r[j] * inv_distance_pow3; + + // forces + FReal rj2=r[j]*r[j]; + + FReal coef[3]; + for(unsigned int k = 0 ; k < 3 ; ++k) + coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel + for(unsigned int k = 0 ; k < 3 ; ++k){ + if(i==j){ + if(j==k) //i=j=k + coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + }// k + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + sourcesForcesX[idxSource] -= coef[0]; + sourcesForcesY[idxSource] -= coef[1]; + sourcesForcesZ[idxSource] -= coef[2]; + sourcesPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; + + + }// j + }// i + } + } + } } for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ - FReal dx = targetsX[idxSource] - targetsX[idxTarget]; - FReal dy = targetsY[idxSource] - targetsY[idxTarget]; - FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); - - targetsForcesX[idxSource] -= coef[0]; - targetsForcesY[idxSource] -= coef[1]; - targetsForcesZ[idxSource] -= coef[2]; - targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - }// j - }// i - - } + for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ + FReal dx = targetsX[idxSource] - targetsX[idxTarget]; + FReal dy = targetsY[idxSource] - targetsY[idxTarget]; + FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + + // potentials + FReal potentialCoef; + if(i==j) + potentialCoef = inv_distance - ri2 * inv_distance_pow3; + else + potentialCoef = - r[i] * r[j] * inv_distance_pow3; + + // forces + FReal rj2=r[j]*r[j]; + + FReal coef[3]; + for(unsigned int k = 0 ; k < 3 ; ++k) + coef[k]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); + + // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel + for(unsigned int k = 0 ; k < 3 ; ++k){ + if(i==j){ + if(j==k) //i=j=k + coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + }// k + + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); + + targetsForcesX[idxSource] -= coef[0]; + targetsForcesY[idxSource] -= coef[1]; + targetsForcesZ[idxSource] -= coef[2]; + targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; + }// j + }// i + + } } } @@ -282,7 +282,7 @@ inline void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerCl */ template inline void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -292,79 +292,79 @@ inline void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerCl const FReal*const targetsZ = inTargets->getPositions()[2]; for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - // potential - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - }// j - }// i - - } - } - } + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + // potential + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); + + // potentials + FReal potentialCoef; + if(i==j) + potentialCoef = inv_distance - ri2 * inv_distance_pow3; + else + potentialCoef = - r[i] * r[j] * inv_distance_pow3; + + // forces + FReal rj2=r[j]*r[j]; + + FReal coef[3]; + for(unsigned int k = 0 ; k < 3 ; ++k) + coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel + for(unsigned int k = 0 ; k < 3 ; ++k){ + if(i==j){ + if(j==k) //i=j=k + coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + }// k + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + }// j + }// i + + } + } + } } } @@ -390,10 +390,10 @@ inline void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerCl */ template inline void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, - FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -409,52 +409,52 @@ inline void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FRe FReal r[3]={dx,dy,dz}; for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal ri2=r[i]*r[i]; - for(unsigned int j = 0 ; j < 3 ; ++j){ - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetPhysicalValue[j] * sourcePhysicalValue[j]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetForceX[i] += coef[0]; - targetForceY[i] += coef[1]; - targetForceZ[i] += coef[2]; - targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j] ); - - sourceForceX[i] -= coef[0]; - sourceForceY[i] -= coef[1]; - sourceForceZ[i] -= coef[2]; - sourcePotential[i] += ( potentialCoef * targetPhysicalValue[j] ); - - }// j + FReal ri2=r[i]*r[i]; + for(unsigned int j = 0 ; j < 3 ; ++j){ + + // potentials + FReal potentialCoef; + if(i==j) + potentialCoef = inv_distance - ri2 * inv_distance_pow3; + else + potentialCoef = - r[i] * r[j] * inv_distance_pow3; + + // forces + FReal rj2=r[j]*r[j]; + + FReal coef[3]; + for(unsigned int k = 0 ; k < 3 ; ++k) + coef[k]= -(targetPhysicalValue[j] * sourcePhysicalValue[j]); + + // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel + for(unsigned int k = 0 ; k < 3 ; ++k){ + if(i==j){ + if(j==k) //i=j=k + coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + }// k + + targetForceX[i] += coef[0]; + targetForceY[i] += coef[1]; + targetForceZ[i] += coef[2]; + targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j] ); + + sourceForceX[i] -= coef[0]; + sourceForceY[i] -= coef[1]; + sourceForceZ[i] -= coef[2]; + sourcePotential[i] += ( potentialCoef * targetPhysicalValue[j] ); + + }// j }// i } @@ -475,7 +475,7 @@ inline void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FRe */ template inline void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -485,154 +485,154 @@ inline void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerC const FReal*const targetsZ = inTargets->getPositions()[2]; for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); - FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); - FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); - FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); - - const FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - sourcesForcesX[idxSource] -= coef[0]; - sourcesForcesY[idxSource] -= coef[1]; - sourcesForcesZ[idxSource] -= coef[2]; - sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; - - }// k - }// j - }// i - } - } - } + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); + FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); + FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); + FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); + + const FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + sourcesForcesX[idxSource] -= coef[0]; + sourcesForcesY[idxSource] -= coef[1]; + sourcesForcesZ[idxSource] -= coef[2]; + sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + + }// k + }// j + }// i + } + } + } } for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ - FReal dx = targetsX[idxSource] - targetsX[idxTarget]; - FReal dy = targetsY[idxSource] - targetsY[idxTarget]; - FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); - - targetsForcesX[idxSource] -= coef[0]; - targetsForcesY[idxSource] -= coef[1]; - targetsForcesZ[idxSource] -= coef[2]; - targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; - }// k - }// j - }// i - - } + for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ + FReal dx = targetsX[idxSource] - targetsX[idxTarget]; + FReal dy = targetsY[idxSource] - targetsY[idxTarget]; + FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); + + targetsForcesX[idxSource] -= coef[0]; + targetsForcesY[idxSource] -= coef[1]; + targetsForcesZ[idxSource] -= coef[2]; + targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + }// k + }// j + }// i + + } } } @@ -641,7 +641,7 @@ inline void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerC */ template inline void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -651,79 +651,79 @@ inline void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerC const FReal*const targetsZ = inTargets->getPositions()[2]; for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - // potential - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - }// k - }// j - }// i - - } - } - } + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + // potential + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + }// k + }// j + }// i + + } + } + } } } @@ -749,10 +749,10 @@ inline void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerC */ template inline void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, - FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock @@ -768,50 +768,50 @@ inline void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FR FReal r[3]={dx,dy,dz}; for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal ri2=r[i]*r[i]; - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - for(unsigned int k = 0 ; k < 3 ; ++k){ - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetForceX[i] += coef[0]; - targetForceY[i] += coef[1]; - targetForceZ[i] += coef[2]; - targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] ); - - sourceForceX[i] -= coef[0]; - sourceForceY[i] -= coef[1]; - sourceForceZ[i] -= coef[2]; - sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] ); - - }// k - }// j + FReal ri2=r[i]*r[i]; + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + for(unsigned int k = 0 ; k < 3 ; ++k){ + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetForceX[i] += coef[0]; + targetForceY[i] += coef[1]; + targetForceZ[i] += coef[2]; + targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] ); + + sourceForceX[i] -= coef[0]; + sourceForceY[i] -= coef[1]; + sourceForceZ[i] -= coef[2]; + sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] ); + + }// k + }// j }// i } @@ -822,7 +822,7 @@ inline void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FR #ifdef ScalFMM_USE_SSE #include "FP2PSse.hpp" #elif defined(ScalFMM_USE_AVX) -#include "FP2PAvx.hpp" +#include "FP2PAvx.h" #else #include "FP2PClassic.hpp" #endif //Includes diff --git a/Src/Kernels/Rotation/FRotationCell.hpp b/Src/Kernels/Rotation/FRotationCell.hpp index fbeb1c60aed47b7822999b5faf7df4cd81a57aac..464486dad1c3099cbf9fb9942f2b419f1dbcf54b 100755 --- a/Src/Kernels/Rotation/FRotationCell.hpp +++ b/Src/Kernels/Rotation/FRotationCell.hpp @@ -16,7 +16,7 @@ #ifndef FROTATIONCELL_HPP #define FROTATIONCELL_HPP -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "../../Utils/FMemUtils.hpp" #include "../../Extensions/FExtendCellType.hpp" @@ -43,9 +43,9 @@ protected: static const int LocalSize = ((P+2)*(P+1))/2; // Artimethique suite (n+1)*n/2 //< Multipole vector (static memory) - FComplexe multipole_exp[MultipoleSize]; //< For multipole extenssion + FComplex multipole_exp[MultipoleSize]; //< For multipole extenssion //< Local vector (static memory) - FComplexe local_exp[LocalSize]; //< For local extenssion + FComplex local_exp[LocalSize]; //< For local extenssion public: /** Default constructor @@ -75,20 +75,20 @@ public: } /** Get Multipole array */ - const FComplexe* getMultipole() const { + const FComplex* getMultipole() const { return multipole_exp; } /** Get Local array */ - const FComplexe* getLocal() const { + const FComplex* getLocal() const { return local_exp; } /** Get Multipole array */ - FComplexe* getMultipole() { + FComplex* getMultipole() { return multipole_exp; } /** Get Local array */ - FComplexe* getLocal() { + FComplex* getLocal() { return local_exp; } @@ -145,7 +145,7 @@ public: buffer.fillArray(local_exp, LocalSize); } static constexpr int GetSize(){ - return ((int) sizeof(FComplexe)) * (MultipoleSize + LocalSize); + return ((int) sizeof(FComplex)) * (MultipoleSize + LocalSize); } }; diff --git a/Src/Kernels/Rotation/FRotationKernel.hpp b/Src/Kernels/Rotation/FRotationKernel.hpp index 0335a7234fd8b56423813acd719e3ecaeacb752c..544e628b7437c36353e10ee9d63d16ba7b0db1af 100755 --- a/Src/Kernels/Rotation/FRotationKernel.hpp +++ b/Src/Kernels/Rotation/FRotationKernel.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FROTATIONKERNEL_HPP @@ -18,7 +18,7 @@ #include "Components/FAbstractKernels.hpp" #include "Utils/FSmartPointer.hpp" -#include "Utils/FComplexe.hpp" +#include "Utils/FComplex.hpp" #include "Utils/FMemUtils.hpp" #include "Utils/FSpherical.hpp" #include "Utils/FMath.hpp" @@ -71,11 +71,11 @@ class FRotationKernel : public FAbstractKernels { FSmartPointer L2LTranslationCoef; //< This contains some precalculated values for L2L translation ///////////// Rotation ///////////////////////////// - FComplexe rotationExpMinusImPhi[8][SizeArray]; //< This is the vector use for the rotation around z for the M2M (multipole) - FComplexe rotationExpImPhi[8][SizeArray]; //< This is the vector use for the rotation around z for the L2L (taylor) + FComplex rotationExpMinusImPhi[8][SizeArray]; //< This is the vector use for the rotation around z for the M2M (multipole) + FComplex rotationExpImPhi[8][SizeArray]; //< This is the vector use for the rotation around z for the L2L (taylor) - FComplexe rotationM2LExpMinusImPhi[343][SizeArray]; //< This is the vector use for the rotation around z for the M2L (multipole) - FComplexe rotationM2LExpImPhi[343][SizeArray]; //< This is the vector use for the rotation around z for the M2L (taylor) + FComplex rotationM2LExpMinusImPhi[343][SizeArray]; //< This is the vector use for the rotation around z for the M2L (multipole) + FComplex rotationM2LExpImPhi[343][SizeArray]; //< This is the vector use for the rotation around z for the M2L (taylor) ///////////// Rotation ///////////////////////////// // First we compute the size of the d{l,m,k} matrix. @@ -100,11 +100,11 @@ class FRotationKernel : public FAbstractKernels { * factorials[n] = n! with n <= 2*P */ void precomputeFactorials(){ - factorials[0] = 1; - FReal fidx = 1; - for(int idx = 1 ; idx <= P2 ; ++idx, ++fidx){ - factorials[idx] = fidx * factorials[idx-1]; - } + factorials[0] = 1; + FReal fidx = 1; + for(int idx = 1 ; idx <= P2 ; ++idx, ++fidx){ + factorials[idx] = fidx * factorials[idx-1]; + } } /** This function precompute the translation coef. @@ -118,71 +118,71 @@ class FRotationKernel : public FAbstractKernels { * the data between threads. */ void precomputeTranslationCoef(){ - {// M2M & L2L - // Allocate - M2MTranslationCoef = new FReal[treeHeight-1][P+1]; - L2LTranslationCoef = new FReal[treeHeight-1][P+1]; - // widthAtLevel represents half of the size of a box - FReal widthAtLevel = boxWidth/4; - // we go from the root to the leaf-1 - for( int idxLevel = 0 ; idxLevel < treeHeight - 1 ; ++idxLevel){ - // b is the parent-child distance = norm( vec(widthAtLevel,widthAtLevel,widthAtLevel)) - const FReal b = FMath::Sqrt(widthAtLevel*widthAtLevel*3); - // we compute b^idx iteratively - FReal bPowIdx = 1.0; - // we compute -1^idx iteratively - FReal minus_1_pow_idx = 1.0; - for(int idx = 0 ; idx <= P ; ++idx){ - // coef m2m = (-b)^j/j! - M2MTranslationCoef[idxLevel][idx] = minus_1_pow_idx * bPowIdx / factorials[idx]; - // coef l2l = b^j/j! - L2LTranslationCoef[idxLevel][idx] = bPowIdx / factorials[idx]; - // increase - bPowIdx *= b; - minus_1_pow_idx = -minus_1_pow_idx; - } - // divide by two per level - widthAtLevel /= 2; - } - } - {// M2L - // Allocate - M2LTranslationCoef = new FReal[treeHeight][343][P+1]; - // This is the width of a box at each level - FReal boxWidthAtLevel = widthAtLeafLevel; - // from leaf level to the root - for(int idxLevel = treeHeight-1 ; idxLevel > 0 ; --idxLevel){ - // we compute all possibilities - for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ - for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ - for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ - // if this is not a neighbour - if( idxX < -1 || 1 < idxX || idxY < -1 || 1 < idxY || idxZ < -1 || 1 < idxZ ){ - // compute the relative position - const FPoint relativePosition( -FReal(idxX)*boxWidthAtLevel, - -FReal(idxY)*boxWidthAtLevel, - -FReal(idxZ)*boxWidthAtLevel); - // this is the position in the index system from 0 to 343 - const int position = ((( (idxX+3) * 7) + (idxY+3))) * 7 + idxZ + 3; - // b is the distance between the two cells - const FReal b = FMath::Sqrt( (relativePosition.getX() * relativePosition.getX()) + - (relativePosition.getY() * relativePosition.getY()) + - (relativePosition.getZ() * relativePosition.getZ())); - // compute b^idx+1 iteratively - FReal bPowIdx1 = b; - for(int idx = 0 ; idx <= P ; ++idx){ - // factorials[j+l] / FMath::pow(b,j+l+1) - M2LTranslationCoef[idxLevel][position][idx] = factorials[idx] / bPowIdx1; - bPowIdx1 *= b; - } - } - } - } - } - // multiply per two at each level - boxWidthAtLevel *= FReal(2.0); - } - } + {// M2M & L2L + // Allocate + M2MTranslationCoef = new FReal[treeHeight-1][P+1]; + L2LTranslationCoef = new FReal[treeHeight-1][P+1]; + // widthAtLevel represents half of the size of a box + FReal widthAtLevel = boxWidth/4; + // we go from the root to the leaf-1 + for( int idxLevel = 0 ; idxLevel < treeHeight - 1 ; ++idxLevel){ + // b is the parent-child distance = norm( vec(widthAtLevel,widthAtLevel,widthAtLevel)) + const FReal b = FMath::Sqrt(widthAtLevel*widthAtLevel*3); + // we compute b^idx iteratively + FReal bPowIdx = 1.0; + // we compute -1^idx iteratively + FReal minus_1_pow_idx = 1.0; + for(int idx = 0 ; idx <= P ; ++idx){ + // coef m2m = (-b)^j/j! + M2MTranslationCoef[idxLevel][idx] = minus_1_pow_idx * bPowIdx / factorials[idx]; + // coef l2l = b^j/j! + L2LTranslationCoef[idxLevel][idx] = bPowIdx / factorials[idx]; + // increase + bPowIdx *= b; + minus_1_pow_idx = -minus_1_pow_idx; + } + // divide by two per level + widthAtLevel /= 2; + } + } + {// M2L + // Allocate + M2LTranslationCoef = new FReal[treeHeight][343][P+1]; + // This is the width of a box at each level + FReal boxWidthAtLevel = widthAtLeafLevel; + // from leaf level to the root + for(int idxLevel = treeHeight-1 ; idxLevel > 0 ; --idxLevel){ + // we compute all possibilities + for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ + for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ + for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ + // if this is not a neighbour + if( idxX < -1 || 1 < idxX || idxY < -1 || 1 < idxY || idxZ < -1 || 1 < idxZ ){ + // compute the relative position + const FPoint relativePosition( -FReal(idxX)*boxWidthAtLevel, + -FReal(idxY)*boxWidthAtLevel, + -FReal(idxZ)*boxWidthAtLevel); + // this is the position in the index system from 0 to 343 + const int position = ((( (idxX+3) * 7) + (idxY+3))) * 7 + idxZ + 3; + // b is the distance between the two cells + const FReal b = FMath::Sqrt( (relativePosition.getX() * relativePosition.getX()) + + (relativePosition.getY() * relativePosition.getY()) + + (relativePosition.getZ() * relativePosition.getZ())); + // compute b^idx+1 iteratively + FReal bPowIdx1 = b; + for(int idx = 0 ; idx <= P ; ++idx){ + // factorials[j+l] / FMath::pow(b,j+l+1) + M2LTranslationCoef[idxLevel][position][idx] = factorials[idx] / bPowIdx1; + bPowIdx1 *= b; + } + } + } + } + } + // multiply per two at each level + boxWidthAtLevel *= FReal(2.0); + } + } } /////////////////////////////////////////////////////// @@ -256,263 +256,263 @@ class FRotationKernel : public FAbstractKernels { * 1x1 + 3x3 + ... + (2P+1)x(2P+1) */ void precomputeRotationVectors(){ - ///////////////////////////////////////////////////////////////// - // We will need a Sqrt(factorial[x-y]*factorial[x+y]) - // so we precompute it - FReal sqrtDoubleFactorials[P+1][P+1]; - for(int l = 0 ; l <= P ; ++l ){ - for(int m = 0 ; m <= l ; ++m ){ - sqrtDoubleFactorials[l][m] = FMath::Sqrt(factorials[l-m]*factorials[l+m]); - } - } - - ///////////////////////////////////////////////////////////////// - // We compute the rotation matrix, we do not need 343 matrix - // We will compute only a part of the since we compute the inclinaison - // angle. inclinaison(+/-x,+/-y,z) = inclinaison(+/-y,+/-x,z) - // we put the negative (-theta) with a negative x - typedef FReal (*pMatrixDlmk) /*[P+1]*/[P2+1][P2+1]; - pMatrixDlmk dlmkMatrix[7][4][7]; - // Allocate matrix - for(int idxX = 0 ; idxX < 7 ; ++idxX) - for(int idxY = 0 ; idxY < 4 ; ++idxY) - for(int idxZ = 0 ; idxZ < 7 ; ++idxZ) { - dlmkMatrix[idxX][idxY][idxZ] = new FReal[P+1][P2+1][P2+1]; - } - - // First we compute special vectors: - DlmkBuild0(dlmkMatrix[0+3][0][1+3]); // theta = 0 - DlmkBuildPi(dlmkMatrix[0+3][0][-1+3]); // theta = Pi - DlmkBuild(dlmkMatrix[1+3][0][0+3],FMath::FPiDiv2); // theta = Pi/2 - DlmkInverse(dlmkMatrix[-1+3][0][0+3],dlmkMatrix[1+3][0][0+3]); // theta = -Pi/2 - // Then other angle - for(int x = 1 ; x <= 3 ; ++x){ - for(int y = 0 ; y <= x ; ++y){ - for(int z = 1 ; z <= 3 ; ++z){ - const FReal inclinaison = FSpherical(FPoint(FReal(x),FReal(y),FReal(z))).getInclination(); - DlmkBuild(dlmkMatrix[x+3][y][z+3],inclinaison); - // For inclinaison between ]pi/2;pi[ - DlmkZNegative(dlmkMatrix[x+3][y][(-z)+3],dlmkMatrix[x+3][y][z+3]); - // For inclinaison between ]pi;3pi/2[ - DlmkInverseZNegative(dlmkMatrix[(-x)+3][y][(-z)+3],dlmkMatrix[x+3][y][z+3]); - // For inclinaison between ]3pi/2;2pi[ - DlmkInverse(dlmkMatrix[(-x)+3][y][z+3],dlmkMatrix[x+3][y][z+3]); - } - } - } - - ///////////////////////////////////////////////////////////////// - // Manage angle for M2M/L2L - - const int index_P0 = atLm(P,0); - // For all possible child (morton indexing from 0 to 7) - for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ - // Retrieve relative position of child to parent - const FReal x = FReal((idxChild&4)? -boxWidth : boxWidth); - const FReal y = FReal((idxChild&2)? -boxWidth : boxWidth); - const FReal z = FReal((idxChild&1)? -boxWidth : boxWidth); - const FPoint relativePosition( x , y , z ); - // compute azimuth - const FSpherical sph(relativePosition); - - // First compute azimuth rotation - // compute the last part with l == P - { - int index_lm = index_P0; - for(int m = 0 ; m <= P ; ++m, ++index_lm ){ - const FReal mphi = (sph.getPhiZero2Pi() + FMath::FPiDiv2) * FReal(m); - // O_{l,m}( \alpha, \beta + \phi ) = e^{-i \phi m} O_{l,m}( \alpha, \beta ) - rotationExpMinusImPhi[idxChild][index_lm].setRealImag(FMath::Cos(-mphi), FMath::Sin(-mphi)); - // M_{l,m}( \alpha, \beta + \phi ) = e^{i \phi m} M_{l,m}( \alpha, \beta ) - rotationExpImPhi[idxChild][index_lm].setRealImag(FMath::Cos(mphi), FMath::Sin(mphi)); - } - } - // Then for l < P it just a copy of the previous computed vector - { - int index_lm = 0; - // for l < P - for(int l = 0 ; l < P ; ++l){ - // take the l + 1 numbers from the vector with l' = P - FMemUtils::copyall(rotationExpMinusImPhi[idxChild] + index_lm, - rotationExpMinusImPhi[idxChild] + index_P0, - l + 1); - FMemUtils::copyall(rotationExpImPhi[idxChild] + index_lm, - rotationExpImPhi[idxChild] + index_P0, - l + 1); - // index(l+1,0) = index(l,0) + l + 1 - index_lm += l + 1; - } - } - { // Then compute the inclinaison rotation - // For the child parent relation we always have a inclinaison - // for (1,1,1) or (1,1,-1) - const int dx = 1; - const int dy = 1; - const int dz = (idxChild&1)?-1:1; - - // - int index_lmk = 0; - for(int l = 0 ; l <= P ; ++l){ - for(int m = 0 ; m <= l ; ++m ){ - { // for k == 0 - const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][0+P]; - const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][0+P]; - // \sqrt{ \frac{(l-k)!(l+k)!}{(l-|m|)!(l+|m|)!} } - const FReal Ofactor = sqrtDoubleFactorials[l][0]/sqrtDoubleFactorials[l][m]; - const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][0]; - - DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * d_lmk; - DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * d_lmk; - DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * d_lmk_minusTheta; - DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * d_lmk_minusTheta; - - ++index_lmk; - } - // for 0 < k - FReal minus_1_pow_k = -1.0; - for(int k = 1 ; k <= l ; ++k){ - const FReal d_lm_minus_k = dlmkMatrix[dx+3][dy][dz+3][l][m+P][-k+P]; - const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][k+P]; - const FReal d_lm_minus_k_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][-k+P]; - const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][k+P]; - - const FReal Ofactor = sqrtDoubleFactorials[l][k]/sqrtDoubleFactorials[l][m]; - const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][k]; - - // for k negatif - DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); - DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); - DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); - DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); - ++index_lmk; - // for k positif - DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); - DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); - DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); - DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); - ++index_lmk; - - minus_1_pow_k = -minus_1_pow_k; - } - } - } - } - } - - ///////////////////////////////////////////////////////////////// - // Manage angle for M2L - // For all possible cases - for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ - for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ - for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ - // Test if it is not a neighbors - if( idxX < -1 || 1 < idxX || idxY < -1 || 1 < idxY || idxZ < -1 || 1 < idxZ ){ - // Build relative position between target and source - const FPoint relativePosition( -FReal(idxX)*boxWidth, - -FReal(idxY)*boxWidth, - -FReal(idxZ)*boxWidth); - const int position = ((( (idxX+3) * 7) + (idxY+3))) * 7 + idxZ + 3; - const FSpherical sph(relativePosition); - - // Compute azimuth rotation vector - // first compute the last part with l == P - { - int index_lm = index_P0; - for(int m = 0 ; m <= P ; ++m, ++index_lm ){ - const FReal mphi = (sph.getPhiZero2Pi() + FMath::FPiDiv2) * FReal(m); - // O_{l,m}( \alpha, \beta + \phi ) = e^{-i \phi m} O_{l,m}( \alpha, \beta ) - rotationM2LExpMinusImPhi[position][index_lm].setRealImag(FMath::Cos(-mphi), FMath::Sin(-mphi)); - // M_{l,m}( \alpha, \beta + \phi ) = e^{i \phi m} M_{l,m}( \alpha, \beta ) - rotationM2LExpImPhi[position][index_lm].setRealImag(FMath::Cos(mphi), FMath::Sin(mphi)); - } - } - // Then for l < P copy the subpart of the previous vector - { - int index_lm = 0; - for(int l = 0 ; l < P ; ++l){ - FMemUtils::copyall(rotationM2LExpMinusImPhi[position] + index_lm, - rotationM2LExpMinusImPhi[position] + index_P0, - l + 1); - FMemUtils::copyall(rotationM2LExpImPhi[position] + index_lm, - rotationM2LExpImPhi[position] + index_P0, - l + 1); - index_lm += l + 1; - } - } - // Compute inclination vector - { - // We have to find the right d_lmk matrix - int dx = 0 , dy = 0, dz = 0; - // if x == 0 && y == 0 it means we have an inclination of 0 or PI - if(idxX == 0 && idxY == 0){ - dx = 0; - dy = 0; - // no matter if z is big, we want [0][0][1] or [0][0][-1] - if( idxZ < 0 ) dz = 1; - else dz = -1; - } - // if z == 0 we have an inclination of Pi/2 - else if ( idxZ == 0){ - dx = 1; - dy = 0; - dz = 0; - } - // else we take the right indexes - else { - dx = FMath::Max(FMath::Abs(idxX),FMath::Abs(idxY)); - dy = FMath::Min(FMath::Abs(idxX),FMath::Abs(idxY)); - dz = -idxZ; - } - - int index_lmk = 0; - for(int l = 0 ; l <= P ; ++l){ - for(int m = 0 ; m <= l ; ++m ){ - { // k == 0 - const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][0+P]; - const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][0+P]; - - // \sqrt{ \frac{(l-k)!(l+k)!}{(l-|m|)!(l+|m|)!} } - const FReal Ofactor = sqrtDoubleFactorials[l][0]/sqrtDoubleFactorials[l][m]; - const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][0]; - - DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * d_lmk; - DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * d_lmk_minusTheta; - ++index_lmk; - } - FReal minus_1_pow_k = -1.0; - for(int k = 1 ; k <= l ; ++k){ - const FReal d_lm_minus_k = dlmkMatrix[dx+3][dy][dz+3][l][m+P][-k+P]; - const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][k+P]; - - const FReal d_lm_minus_k_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][-k+P]; - const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][k+P]; - - const FReal Ofactor = sqrtDoubleFactorials[l][k]/sqrtDoubleFactorials[l][m]; - const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][k]; - - DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); - DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); - ++index_lmk; - - DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); - DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); - ++index_lmk; - - minus_1_pow_k = -minus_1_pow_k; - } - } - } - } - } - } - } - } - // Deallocate matrix - for(int idxX = 0 ; idxX < 7 ; ++idxX) - for(int idxY = 0 ; idxY < 4 ; ++idxY) - for(int idxZ = 0 ; idxZ < 7 ; ++idxZ) { - delete[] dlmkMatrix[idxX][idxY][idxZ]; - } + ///////////////////////////////////////////////////////////////// + // We will need a Sqrt(factorial[x-y]*factorial[x+y]) + // so we precompute it + FReal sqrtDoubleFactorials[P+1][P+1]; + for(int l = 0 ; l <= P ; ++l ){ + for(int m = 0 ; m <= l ; ++m ){ + sqrtDoubleFactorials[l][m] = FMath::Sqrt(factorials[l-m]*factorials[l+m]); + } + } + + ///////////////////////////////////////////////////////////////// + // We compute the rotation matrix, we do not need 343 matrix + // We will compute only a part of the since we compute the inclinaison + // angle. inclinaison(+/-x,+/-y,z) = inclinaison(+/-y,+/-x,z) + // we put the negative (-theta) with a negative x + typedef FReal (*pMatrixDlmk) /*[P+1]*/[P2+1][P2+1]; + pMatrixDlmk dlmkMatrix[7][4][7]; + // Allocate matrix + for(int idxX = 0 ; idxX < 7 ; ++idxX) + for(int idxY = 0 ; idxY < 4 ; ++idxY) + for(int idxZ = 0 ; idxZ < 7 ; ++idxZ) { + dlmkMatrix[idxX][idxY][idxZ] = new FReal[P+1][P2+1][P2+1]; + } + + // First we compute special vectors: + DlmkBuild0(dlmkMatrix[0+3][0][1+3]); // theta = 0 + DlmkBuildPi(dlmkMatrix[0+3][0][-1+3]); // theta = Pi + DlmkBuild(dlmkMatrix[1+3][0][0+3],FMath::FPiDiv2); // theta = Pi/2 + DlmkInverse(dlmkMatrix[-1+3][0][0+3],dlmkMatrix[1+3][0][0+3]); // theta = -Pi/2 + // Then other angle + for(int x = 1 ; x <= 3 ; ++x){ + for(int y = 0 ; y <= x ; ++y){ + for(int z = 1 ; z <= 3 ; ++z){ + const FReal inclinaison = FSpherical(FPoint(FReal(x),FReal(y),FReal(z))).getInclination(); + DlmkBuild(dlmkMatrix[x+3][y][z+3],inclinaison); + // For inclinaison between ]pi/2;pi[ + DlmkZNegative(dlmkMatrix[x+3][y][(-z)+3],dlmkMatrix[x+3][y][z+3]); + // For inclinaison between ]pi;3pi/2[ + DlmkInverseZNegative(dlmkMatrix[(-x)+3][y][(-z)+3],dlmkMatrix[x+3][y][z+3]); + // For inclinaison between ]3pi/2;2pi[ + DlmkInverse(dlmkMatrix[(-x)+3][y][z+3],dlmkMatrix[x+3][y][z+3]); + } + } + } + + ///////////////////////////////////////////////////////////////// + // Manage angle for M2M/L2L + + const int index_P0 = atLm(P,0); + // For all possible child (morton indexing from 0 to 7) + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + // Retrieve relative position of child to parent + const FReal x = FReal((idxChild&4)? -boxWidth : boxWidth); + const FReal y = FReal((idxChild&2)? -boxWidth : boxWidth); + const FReal z = FReal((idxChild&1)? -boxWidth : boxWidth); + const FPoint relativePosition( x , y , z ); + // compute azimuth + const FSpherical sph(relativePosition); + + // First compute azimuth rotation + // compute the last part with l == P + { + int index_lm = index_P0; + for(int m = 0 ; m <= P ; ++m, ++index_lm ){ + const FReal mphi = (sph.getPhiZero2Pi() + FMath::FPiDiv2) * FReal(m); + // O_{l,m}( \alpha, \beta + \phi ) = e^{-i \phi m} O_{l,m}( \alpha, \beta ) + rotationExpMinusImPhi[idxChild][index_lm].setRealImag(FMath::Cos(-mphi), FMath::Sin(-mphi)); + // M_{l,m}( \alpha, \beta + \phi ) = e^{i \phi m} M_{l,m}( \alpha, \beta ) + rotationExpImPhi[idxChild][index_lm].setRealImag(FMath::Cos(mphi), FMath::Sin(mphi)); + } + } + // Then for l < P it just a copy of the previous computed vector + { + int index_lm = 0; + // for l < P + for(int l = 0 ; l < P ; ++l){ + // take the l + 1 numbers from the vector with l' = P + FMemUtils::copyall(rotationExpMinusImPhi[idxChild] + index_lm, + rotationExpMinusImPhi[idxChild] + index_P0, + l + 1); + FMemUtils::copyall(rotationExpImPhi[idxChild] + index_lm, + rotationExpImPhi[idxChild] + index_P0, + l + 1); + // index(l+1,0) = index(l,0) + l + 1 + index_lm += l + 1; + } + } + { // Then compute the inclinaison rotation + // For the child parent relation we always have a inclinaison + // for (1,1,1) or (1,1,-1) + const int dx = 1; + const int dy = 1; + const int dz = (idxChild&1)?-1:1; + + // + int index_lmk = 0; + for(int l = 0 ; l <= P ; ++l){ + for(int m = 0 ; m <= l ; ++m ){ + { // for k == 0 + const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][0+P]; + const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][0+P]; + // \sqrt{ \frac{(l-k)!(l+k)!}{(l-|m|)!(l+|m|)!} } + const FReal Ofactor = sqrtDoubleFactorials[l][0]/sqrtDoubleFactorials[l][m]; + const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][0]; + + DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * d_lmk; + DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * d_lmk; + DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * d_lmk_minusTheta; + DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * d_lmk_minusTheta; + + ++index_lmk; + } + // for 0 < k + FReal minus_1_pow_k = -1.0; + for(int k = 1 ; k <= l ; ++k){ + const FReal d_lm_minus_k = dlmkMatrix[dx+3][dy][dz+3][l][m+P][-k+P]; + const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][k+P]; + const FReal d_lm_minus_k_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][-k+P]; + const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][k+P]; + + const FReal Ofactor = sqrtDoubleFactorials[l][k]/sqrtDoubleFactorials[l][m]; + const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][k]; + + // for k negatif + DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); + DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); + DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); + DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); + ++index_lmk; + // for k positif + DlmkCoefOTheta[idxChild][index_lmk] = Ofactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); + DlmkCoefMTheta[idxChild][index_lmk] = Mfactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); + DlmkCoefOMinusTheta[idxChild][index_lmk] = Ofactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); + DlmkCoefMMinusTheta[idxChild][index_lmk] = Mfactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); + ++index_lmk; + + minus_1_pow_k = -minus_1_pow_k; + } + } + } + } + } + + ///////////////////////////////////////////////////////////////// + // Manage angle for M2L + // For all possible cases + for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ + for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ + for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ + // Test if it is not a neighbors + if( idxX < -1 || 1 < idxX || idxY < -1 || 1 < idxY || idxZ < -1 || 1 < idxZ ){ + // Build relative position between target and source + const FPoint relativePosition( -FReal(idxX)*boxWidth, + -FReal(idxY)*boxWidth, + -FReal(idxZ)*boxWidth); + const int position = ((( (idxX+3) * 7) + (idxY+3))) * 7 + idxZ + 3; + const FSpherical sph(relativePosition); + + // Compute azimuth rotation vector + // first compute the last part with l == P + { + int index_lm = index_P0; + for(int m = 0 ; m <= P ; ++m, ++index_lm ){ + const FReal mphi = (sph.getPhiZero2Pi() + FMath::FPiDiv2) * FReal(m); + // O_{l,m}( \alpha, \beta + \phi ) = e^{-i \phi m} O_{l,m}( \alpha, \beta ) + rotationM2LExpMinusImPhi[position][index_lm].setRealImag(FMath::Cos(-mphi), FMath::Sin(-mphi)); + // M_{l,m}( \alpha, \beta + \phi ) = e^{i \phi m} M_{l,m}( \alpha, \beta ) + rotationM2LExpImPhi[position][index_lm].setRealImag(FMath::Cos(mphi), FMath::Sin(mphi)); + } + } + // Then for l < P copy the subpart of the previous vector + { + int index_lm = 0; + for(int l = 0 ; l < P ; ++l){ + FMemUtils::copyall(rotationM2LExpMinusImPhi[position] + index_lm, + rotationM2LExpMinusImPhi[position] + index_P0, + l + 1); + FMemUtils::copyall(rotationM2LExpImPhi[position] + index_lm, + rotationM2LExpImPhi[position] + index_P0, + l + 1); + index_lm += l + 1; + } + } + // Compute inclination vector + { + // We have to find the right d_lmk matrix + int dx = 0 , dy = 0, dz = 0; + // if x == 0 && y == 0 it means we have an inclination of 0 or PI + if(idxX == 0 && idxY == 0){ + dx = 0; + dy = 0; + // no matter if z is big, we want [0][0][1] or [0][0][-1] + if( idxZ < 0 ) dz = 1; + else dz = -1; + } + // if z == 0 we have an inclination of Pi/2 + else if ( idxZ == 0){ + dx = 1; + dy = 0; + dz = 0; + } + // else we take the right indexes + else { + dx = FMath::Max(FMath::Abs(idxX),FMath::Abs(idxY)); + dy = FMath::Min(FMath::Abs(idxX),FMath::Abs(idxY)); + dz = -idxZ; + } + + int index_lmk = 0; + for(int l = 0 ; l <= P ; ++l){ + for(int m = 0 ; m <= l ; ++m ){ + { // k == 0 + const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][0+P]; + const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][0+P]; + + // \sqrt{ \frac{(l-k)!(l+k)!}{(l-|m|)!(l+|m|)!} } + const FReal Ofactor = sqrtDoubleFactorials[l][0]/sqrtDoubleFactorials[l][m]; + const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][0]; + + DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * d_lmk; + DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * d_lmk_minusTheta; + ++index_lmk; + } + FReal minus_1_pow_k = -1.0; + for(int k = 1 ; k <= l ; ++k){ + const FReal d_lm_minus_k = dlmkMatrix[dx+3][dy][dz+3][l][m+P][-k+P]; + const FReal d_lmk = dlmkMatrix[dx+3][dy][dz+3][l][m+P][k+P]; + + const FReal d_lm_minus_k_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][-k+P]; + const FReal d_lmk_minusTheta = dlmkMatrix[-dx+3][dy][dz+3][l][m+P][k+P]; + + const FReal Ofactor = sqrtDoubleFactorials[l][k]/sqrtDoubleFactorials[l][m]; + const FReal Mfactor = sqrtDoubleFactorials[l][m]/sqrtDoubleFactorials[l][k]; + + DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * (d_lmk + minus_1_pow_k * d_lm_minus_k); + DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * (d_lmk_minusTheta + minus_1_pow_k * d_lm_minus_k_minusTheta); + ++index_lmk; + + DlmkCoefM2LOTheta[position][index_lmk] = Ofactor * (d_lmk - minus_1_pow_k * d_lm_minus_k); + DlmkCoefM2LMMinusTheta[position][index_lmk] = Mfactor * (d_lmk_minusTheta - minus_1_pow_k * d_lm_minus_k_minusTheta); + ++index_lmk; + + minus_1_pow_k = -minus_1_pow_k; + } + } + } + } + } + } + } + } + // Deallocate matrix + for(int idxX = 0 ; idxX < 7 ; ++idxX) + for(int idxY = 0 ; idxY < 4 ; ++idxY) + for(int idxZ = 0 ; idxZ < 7 ; ++idxZ) { + delete[] dlmkMatrix[idxX][idxY][idxZ]; + } } @@ -535,16 +535,16 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void DlmkBuild0(FReal dlmk[P+1][P2+1][P2+1]) const { - for(int l = 0 ; l <= P ; ++l){ - for(int m = -l ; m <= l ; ++m){ - // first put 0 every where - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = FReal(0.0); - } - // then replace per 1 for m == k - dlmk[l][P+m][P+m] = FReal(1.0); - } - } + for(int l = 0 ; l <= P ; ++l){ + for(int m = -l ; m <= l ; ++m){ + // first put 0 every where + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = FReal(0.0); + } + // then replace per 1 for m == k + dlmk[l][P+m][P+m] = FReal(1.0); + } + } } /** Compute d_mlk for \theta = PI @@ -553,16 +553,16 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void DlmkBuildPi(FReal dlmk[P+1][P2+1][P2+1]) const { - for(int l = 0 ; l <= P ; ++l){ - for(int m = -l ; m <= l ; ++m){ - // put 0 every where - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = FReal(0.0); - } - // -1^l+k * 1 where m == k - dlmk[l][P+m][P-m] = ((l+m)&0x1 ? FReal(-1) : FReal(1)); - } - } + for(int l = 0 ; l <= P ; ++l){ + for(int m = -l ; m <= l ; ++m){ + // put 0 every where + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = FReal(0.0); + } + // -1^l+k * 1 where m == k + dlmk[l][P+m][P-m] = ((l+m)&0x1 ? FReal(-1) : FReal(1)); + } + } } /** Compute d_mlk for \theta = ]PI/2;PI[ @@ -571,23 +571,23 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void DlmkZNegative(FReal dlmk[P+1][P2+1][P2+1], const FReal dlmkZPositif[P+1][P2+1][P2+1]) const { - for(int l = 0 ; l <= P ; ++l){ - for(int m = -l ; m <= l ; ++m){ - // if l+m is odd - if( (l+m)&0x1 ){ - // put -1 every where - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = -dlmkZPositif[l][P+m][P-k]; - } - } - else{ - // else just copy - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = dlmkZPositif[l][P+m][P-k]; - } - } - } - } + for(int l = 0 ; l <= P ; ++l){ + for(int m = -l ; m <= l ; ++m){ + // if l+m is odd + if( (l+m)&0x1 ){ + // put -1 every where + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = -dlmkZPositif[l][P+m][P-k]; + } + } + else{ + // else just copy + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = dlmkZPositif[l][P+m][P-k]; + } + } + } + } } /** Compute d_mlk for \theta = ]PI;3PI/2[ @@ -596,20 +596,20 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void DlmkInverseZNegative(FReal dlmk[P+1][P2+1][P2+1], const FReal dlmkZPositif[P+1][P2+1][P2+1]) const { - for(int l = 0 ; l <= P ; ++l){ - for(int m = -l ; m <= l ; ++m){ - if( (l+m)&0x1 ){ - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = -dlmkZPositif[l][P-m][P+k]; - } - } - else{ - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = dlmkZPositif[l][P-m][P+k]; - } - } - } - } + for(int l = 0 ; l <= P ; ++l){ + for(int m = -l ; m <= l ; ++m){ + if( (l+m)&0x1 ){ + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = -dlmkZPositif[l][P-m][P+k]; + } + } + else{ + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = dlmkZPositif[l][P-m][P+k]; + } + } + } + } } /** Compute d_mlk for \theta = ]3PI/2;2PI[ @@ -618,29 +618,29 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void DlmkInverse(FReal dlmk[P+1][P2+1][P2+1], const FReal dlmkZPositif[P+1][P2+1][P2+1]) const { - for(int l = 0 ; l <= P ; ++l){ - for(int m = -l ; m <= l ; ++m){ - // we start with k == -l, so if (k+m) is odd - if( (l+m)&0x1 ){ - // then we start per (-1) - for(int k = -l ; k < l ; k+=2){ - dlmk[l][P+m][P+k] = -dlmkZPositif[l][P+m][P+k]; - dlmk[l][P+m][P+k+1] = dlmkZPositif[l][P+m][P+k+1]; - } - // l is always odd - dlmk[l][P+m][P+l] = -dlmkZPositif[l][P+m][P+l]; - } - else{ - // else we start per (+1) - for(int k = -l ; k < l ; k+=2){ - dlmk[l][P+m][P+k] = dlmkZPositif[l][P+m][P+k]; - dlmk[l][P+m][P+k+1] = -dlmkZPositif[l][P+m][P+k+1]; - } - // l is always odd - dlmk[l][P+m][P+l] = dlmkZPositif[l][P+m][P+l]; - } - } - } + for(int l = 0 ; l <= P ; ++l){ + for(int m = -l ; m <= l ; ++m){ + // we start with k == -l, so if (k+m) is odd + if( (l+m)&0x1 ){ + // then we start per (-1) + for(int k = -l ; k < l ; k+=2){ + dlmk[l][P+m][P+k] = -dlmkZPositif[l][P+m][P+k]; + dlmk[l][P+m][P+k+1] = dlmkZPositif[l][P+m][P+k+1]; + } + // l is always odd + dlmk[l][P+m][P+l] = -dlmkZPositif[l][P+m][P+l]; + } + else{ + // else we start per (+1) + for(int k = -l ; k < l ; k+=2){ + dlmk[l][P+m][P+k] = dlmkZPositif[l][P+m][P+k]; + dlmk[l][P+m][P+k+1] = -dlmkZPositif[l][P+m][P+k+1]; + } + // l is always odd + dlmk[l][P+m][P+l] = dlmkZPositif[l][P+m][P+l]; + } + } + } } @@ -653,96 +653,96 @@ class FRotationKernel : public FAbstractKernels { */ // theta should be between [0;pi] as the inclinaison angle void DlmkBuild(FReal dlmk[P+1][P2+1][P2+1], const FReal inTheta) const { - FAssertLF(0 <= inTheta && inTheta < FMath::FTwoPi); - // To have constants for very used values - const FReal F0 = FReal(0.0); - const FReal F1 = FReal(1.0); - const FReal F2 = FReal(2.0); - - const FReal cosTheta = FMath::Cos(inTheta); - const FReal sinTheta = FMath::Sin(inTheta); - - // First compute g - FReal g[SizeArray]; - {// Equ 29 - // g{0,0} = 1 - g[0] = F1; - - // g{l,0} = sqrt( (2l - 1) / 2l) g{l-1,0} for l > 0 - { - int index_l0 = 1; - FReal fl = F1; - for(int l = 1; l <= P ; ++l, ++fl ){ - g[index_l0] = FMath::Sqrt((fl*F2-F1)/(fl*F2)) * g[index_l0-l]; - index_l0 += l + 1; - } - } - // g{l,m} = sqrt( (l - m + 1) / (l+m)) g{l,m-1} for l > 0, 0 < m <= l - { - int index_lm = 2; - FReal fl = F1; - for(int l = 1; l <= P ; ++l, ++fl ){ - FReal fm = F1; - for(int m = 1; m <= l ; ++m, ++index_lm, ++fm ){ - g[index_lm] = FMath::Sqrt((fl-fm+F1)/(fl+fm)) * g[index_lm-1]; - } - ++index_lm; - } - } - } - { // initial condition - // Equ 28 - // d{l,m,l} = -1^(l+m) g{l,m} (1+cos(theta))^m sin(theta)^(l-m) , For l > 0, 0 <= m <= l - int index_lm = 0; - FReal sinTheta_pow_l = F1; - for(int l = 0 ; l <= P ; ++l){ - // build variable iteratively - FReal minus_1_pow_lm = l&0x1 ? FReal(-1) : FReal(1); - FReal cosTheta_1_pow_m = F1; - FReal sinTheta_pow_l_minus_m = sinTheta_pow_l; - for(int m = 0 ; m <= l ; ++m, ++index_lm){ - dlmk[l][P+m][P+l] = minus_1_pow_lm * g[index_lm] * cosTheta_1_pow_m * sinTheta_pow_l_minus_m; - // update - minus_1_pow_lm = -minus_1_pow_lm; - cosTheta_1_pow_m *= F1 + cosTheta; - sinTheta_pow_l_minus_m /= sinTheta; - } - // update - sinTheta_pow_l *= sinTheta; - } - } - { // build the rest of the matrix - FReal fl = F1; - for(int l = 1 ; l <= P ; ++l, ++fl){ - FReal fk = fl; - for(int k = l ; k > -l ; --k, --fk){ - // Equ 25 - // For l > 0, 0 <= m < l, -l < k <= l, cos(theta) >= 0 - // d{l,m,k-1} = sqrt( l(l+1) - m(m+1) / l(l+1) - k(k-1)) d{l,m+1,k} - // + (m+k) sin(theta) d{l,m,k} / sqrt(l(l+1) - k(k-1)) (1+cos(theta)) - FReal fm = F0; - for(int m = 0 ; m < l ; ++m, ++fm){ - dlmk[l][P+m][P+k-1] = - (FMath::Sqrt((fl*(fl+F1)-fm*(fm+F1))/(fl*(fl+F1)-fk*(fk-F1))) * dlmk[l][P+m+1][P+k]) - + ((fm+fk)*sinTheta*dlmk[l][P+m][P+k]/(FMath::Sqrt(fl*(fl+F1)-fk*(fk-F1))*(F1+cosTheta))); - } - // Equ 26 - // For l > 0, -l < k <= l, cos(theta) >= 0 - // d{l,l,k-1} = (l+k) sin(theta) d{l,l,k} - // / sqrt(l(l+1)-k(k-1)) (1+cos(theta)) - dlmk[l][P+l][P+k-1] = (fl+fk)*sinTheta*dlmk[l][P+l][P+k]/(FMath::Sqrt(fl*(fl+F1)-fk*(fk-F1))*(F1+cosTheta)); - } - // Equ 27 - // d{l,m,k} = -1^(m+k) d{l,-m,-k} , For l > 0, -l <= m < 0, -l <= k <= l - for(int m = -l ; m < 0 ; ++m){ - FReal minus_1_pow_mk = (m-l)&0x1 ? FReal(-1) : FReal(1); - for(int k = -l ; k <= l ; ++k){ - dlmk[l][P+m][P+k] = minus_1_pow_mk * dlmk[l][P-m][P-k]; - minus_1_pow_mk = -minus_1_pow_mk; - } - } - } - } + FAssertLF(0 <= inTheta && inTheta < FMath::FTwoPi); + // To have constants for very used values + const FReal F0 = FReal(0.0); + const FReal F1 = FReal(1.0); + const FReal F2 = FReal(2.0); + + const FReal cosTheta = FMath::Cos(inTheta); + const FReal sinTheta = FMath::Sin(inTheta); + + // First compute g + FReal g[SizeArray]; + {// Equ 29 + // g{0,0} = 1 + g[0] = F1; + + // g{l,0} = sqrt( (2l - 1) / 2l) g{l-1,0} for l > 0 + { + int index_l0 = 1; + FReal fl = F1; + for(int l = 1; l <= P ; ++l, ++fl ){ + g[index_l0] = FMath::Sqrt((fl*F2-F1)/(fl*F2)) * g[index_l0-l]; + index_l0 += l + 1; + } + } + // g{l,m} = sqrt( (l - m + 1) / (l+m)) g{l,m-1} for l > 0, 0 < m <= l + { + int index_lm = 2; + FReal fl = F1; + for(int l = 1; l <= P ; ++l, ++fl ){ + FReal fm = F1; + for(int m = 1; m <= l ; ++m, ++index_lm, ++fm ){ + g[index_lm] = FMath::Sqrt((fl-fm+F1)/(fl+fm)) * g[index_lm-1]; + } + ++index_lm; + } + } + } + { // initial condition + // Equ 28 + // d{l,m,l} = -1^(l+m) g{l,m} (1+cos(theta))^m sin(theta)^(l-m) , For l > 0, 0 <= m <= l + int index_lm = 0; + FReal sinTheta_pow_l = F1; + for(int l = 0 ; l <= P ; ++l){ + // build variable iteratively + FReal minus_1_pow_lm = l&0x1 ? FReal(-1) : FReal(1); + FReal cosTheta_1_pow_m = F1; + FReal sinTheta_pow_l_minus_m = sinTheta_pow_l; + for(int m = 0 ; m <= l ; ++m, ++index_lm){ + dlmk[l][P+m][P+l] = minus_1_pow_lm * g[index_lm] * cosTheta_1_pow_m * sinTheta_pow_l_minus_m; + // update + minus_1_pow_lm = -minus_1_pow_lm; + cosTheta_1_pow_m *= F1 + cosTheta; + sinTheta_pow_l_minus_m /= sinTheta; + } + // update + sinTheta_pow_l *= sinTheta; + } + } + { // build the rest of the matrix + FReal fl = F1; + for(int l = 1 ; l <= P ; ++l, ++fl){ + FReal fk = fl; + for(int k = l ; k > -l ; --k, --fk){ + // Equ 25 + // For l > 0, 0 <= m < l, -l < k <= l, cos(theta) >= 0 + // d{l,m,k-1} = sqrt( l(l+1) - m(m+1) / l(l+1) - k(k-1)) d{l,m+1,k} + // + (m+k) sin(theta) d{l,m,k} / sqrt(l(l+1) - k(k-1)) (1+cos(theta)) + FReal fm = F0; + for(int m = 0 ; m < l ; ++m, ++fm){ + dlmk[l][P+m][P+k-1] = + (FMath::Sqrt((fl*(fl+F1)-fm*(fm+F1))/(fl*(fl+F1)-fk*(fk-F1))) * dlmk[l][P+m+1][P+k]) + + ((fm+fk)*sinTheta*dlmk[l][P+m][P+k]/(FMath::Sqrt(fl*(fl+F1)-fk*(fk-F1))*(F1+cosTheta))); + } + // Equ 26 + // For l > 0, -l < k <= l, cos(theta) >= 0 + // d{l,l,k-1} = (l+k) sin(theta) d{l,l,k} + // / sqrt(l(l+1)-k(k-1)) (1+cos(theta)) + dlmk[l][P+l][P+k-1] = (fl+fk)*sinTheta*dlmk[l][P+l][P+k]/(FMath::Sqrt(fl*(fl+F1)-fk*(fk-F1))*(F1+cosTheta)); + } + // Equ 27 + // d{l,m,k} = -1^(m+k) d{l,-m,-k} , For l > 0, -l <= m < 0, -l <= k <= l + for(int m = -l ; m < 0 ; ++m){ + FReal minus_1_pow_mk = (m-l)&0x1 ? FReal(-1) : FReal(1); + for(int k = -l ; k <= l ; ++k){ + dlmk[l][P+m][P+k] = minus_1_pow_mk * dlmk[l][P-m][P-k]; + minus_1_pow_mk = -minus_1_pow_mk; + } + } + } + } } /** Compute the legendre polynomial from {0,0} to {P,P} @@ -759,37 +759,37 @@ class FRotationKernel : public FAbstractKernels { * \f] */ void computeLegendre(FReal legendre[], const FReal inCosTheta, const FReal inSinTheta) const { - const FReal invSinTheta = -inSinTheta; - - legendre[0] = 1.0; // P_0,0(1) = 1 - - legendre[1] = inCosTheta; // P_1,0 = cos(theta) - legendre[2] = invSinTheta; // P_1,1 = -sin(theta) - - // work with pointers - FReal* FRestrict legendre_l1_m1 = legendre; // P{l-2,m} starts with P_{0,0} - FReal* FRestrict legendre_l1_m = legendre + 1; // P{l-1,m} starts with P_{1,0} - FReal* FRestrict legendre_lm = legendre + 3; // P{l,m} starts with P_{2,0} - - // Compute using recurrence - FReal l2_minus_1 = 3; // 2 * l - 1 - FReal fl = FReal(2.0);// To get 'l' as a float - for(int l = 2; l <= P ; ++l, ++fl ){ - FReal lm_minus_1 = fl - FReal(1.0); // l + m - 1 - FReal l_minus_m = fl; // l - m - for( int m = 0; m < l - 1 ; ++m ){ - // P_{l,m} = \frac{(2l-1) cos( \theta ) P_{l-1,m} - (l+m-1) P_{l-2,m}x}{(l-m)} - *(legendre_lm++) = (l2_minus_1 * inCosTheta * (*legendre_l1_m++) - (lm_minus_1++) * (*legendre_l1_m1++) ) - / (l_minus_m--); - } - // P_{l,l-1} = (2l-1) cos( \theta ) P_{l-1,l-1} - *(legendre_lm++) = l2_minus_1 * inCosTheta * (*legendre_l1_m); - // P_{l,l} = (2l-1) sin( \theta ) P_{l-1,l-1} - *(legendre_lm++) = l2_minus_1 * invSinTheta * (*legendre_l1_m); - // goto P_{l-1,0} - ++legendre_l1_m; - l2_minus_1 += FReal(2.0); // 2 * l - 1 => progress by two - } + const FReal invSinTheta = -inSinTheta; + + legendre[0] = 1.0; // P_0,0(1) = 1 + + legendre[1] = inCosTheta; // P_1,0 = cos(theta) + legendre[2] = invSinTheta; // P_1,1 = -sin(theta) + + // work with pointers + FReal* FRestrict legendre_l1_m1 = legendre; // P{l-2,m} starts with P_{0,0} + FReal* FRestrict legendre_l1_m = legendre + 1; // P{l-1,m} starts with P_{1,0} + FReal* FRestrict legendre_lm = legendre + 3; // P{l,m} starts with P_{2,0} + + // Compute using recurrence + FReal l2_minus_1 = 3; // 2 * l - 1 + FReal fl = FReal(2.0);// To get 'l' as a float + for(int l = 2; l <= P ; ++l, ++fl ){ + FReal lm_minus_1 = fl - FReal(1.0); // l + m - 1 + FReal l_minus_m = fl; // l - m + for( int m = 0; m < l - 1 ; ++m ){ + // P_{l,m} = \frac{(2l-1) cos( \theta ) P_{l-1,m} - (l+m-1) P_{l-2,m}x}{(l-m)} + *(legendre_lm++) = (l2_minus_1 * inCosTheta * (*legendre_l1_m++) - (lm_minus_1++) * (*legendre_l1_m1++) ) + / (l_minus_m--); + } + // P_{l,l-1} = (2l-1) cos( \theta ) P_{l-1,l-1} + *(legendre_lm++) = l2_minus_1 * inCosTheta * (*legendre_l1_m); + // P_{l,l} = (2l-1) sin( \theta ) P_{l-1,l-1} + *(legendre_lm++) = l2_minus_1 * invSinTheta * (*legendre_l1_m); + // goto P_{l-1,0} + ++legendre_l1_m; + l2_minus_1 += FReal(2.0); // 2 * l - 1 => progress by two + } } /////////////////////////////////////////////////////// @@ -802,55 +802,55 @@ class FRotationKernel : public FAbstractKernels { * multipole or local vector. * The result is copyed in vec. * Please see the structure of dlmk to understand this function. - * Warning we cast the vec FComplexe array into a FReal array + * Warning we cast the vec FComplex array into a FReal array */ - static void RotationYWithDlmk(FComplexe vec[], const FReal* dlmkCoef){ - FReal originalVec[2*SizeArray]; - FMemUtils::copyall((FComplexe*)originalVec,vec,SizeArray); - // index_lm == atLm(l,m) but progress iteratively to write the result - int index_lm = 0; - for(int l = 0 ; l <= P ; ++l){ - const FReal*const FRestrict originalVecAtL0 = originalVec + (index_lm * 2); - for(int m = 0 ; m <= l ; ++m, ++index_lm ){ - FReal res_lkm_real = 0.0; - FReal res_lkm_imag = 0.0; - // To read all "m" value for current "l" - const FReal* FRestrict iterOrignalVec = originalVecAtL0; - { // for k == 0 - // same coef for real and imaginary - res_lkm_real += (*dlmkCoef) * (*iterOrignalVec++); - res_lkm_imag += (*dlmkCoef++) * (*iterOrignalVec++); - } - for(int k = 1 ; k <= l ; ++k){ - // coef contains first real value - res_lkm_real += (*dlmkCoef++) * (*iterOrignalVec++); - // then imaginary - res_lkm_imag += (*dlmkCoef++) * (*iterOrignalVec++); - } - // save the result - vec[index_lm].setRealImag(res_lkm_real, res_lkm_imag); - } - } + static void RotationYWithDlmk(FComplex vec[], const FReal* dlmkCoef){ + FReal originalVec[2*SizeArray]; + FMemUtils::copyall((FComplex*)originalVec,vec,SizeArray); + // index_lm == atLm(l,m) but progress iteratively to write the result + int index_lm = 0; + for(int l = 0 ; l <= P ; ++l){ + const FReal*const FRestrict originalVecAtL0 = originalVec + (index_lm * 2); + for(int m = 0 ; m <= l ; ++m, ++index_lm ){ + FReal res_lkm_real = 0.0; + FReal res_lkm_imag = 0.0; + // To read all "m" value for current "l" + const FReal* FRestrict iterOrignalVec = originalVecAtL0; + { // for k == 0 + // same coef for real and imaginary + res_lkm_real += (*dlmkCoef) * (*iterOrignalVec++); + res_lkm_imag += (*dlmkCoef++) * (*iterOrignalVec++); + } + for(int k = 1 ; k <= l ; ++k){ + // coef contains first real value + res_lkm_real += (*dlmkCoef++) * (*iterOrignalVec++); + // then imaginary + res_lkm_imag += (*dlmkCoef++) * (*iterOrignalVec++); + } + // save the result + vec[index_lm].setRealImag(res_lkm_real, res_lkm_imag); + } + } } /** This function is computing dest[:] *= src[:] - * it computes inSize FComplexe multiplication + * it computes inSize FComplex multiplication * to do so we first proceed per 4 and the the inSize%4 rest */ - static void RotationZVectorsMul(FComplexe* FRestrict dest, const FComplexe* FRestrict src, const int inSize = SizeArray){ - const FComplexe*const FRestrict lastElement = dest + inSize; - const FComplexe*const FRestrict intermediateLastElement = dest + (inSize & ~0x3); - // first the inSize - inSize%4 elements - for(; dest != intermediateLastElement ;) { - (*dest++) *= (*src++); - (*dest++) *= (*src++); - (*dest++) *= (*src++); - (*dest++) *= (*src++); - } - // then the rest - for(; dest != lastElement ;) { - (*dest++) *= (*src++); - } + static void RotationZVectorsMul(FComplex* FRestrict dest, const FComplex* FRestrict src, const int inSize = SizeArray){ + const FComplex*const FRestrict lastElement = dest + inSize; + const FComplex*const FRestrict intermediateLastElement = dest + (inSize & ~0x3); + // first the inSize - inSize%4 elements + for(; dest != intermediateLastElement ;) { + (*dest++) *= (*src++); + (*dest++) *= (*src++); + (*dest++) *= (*src++); + (*dest++) *= (*src++); + } + // then the rest + for(; dest != lastElement ;) { + (*dest++) *= (*src++); + } } /////////////////////////////////////////////////////// @@ -862,10 +862,10 @@ class FRotationKernel : public FAbstractKernels { * This is used only for the leaf */ FPoint getLeafCenter(const FTreeCoordinate coordinate) const { - return FPoint( - FReal(coordinate.getX()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getX(), - FReal(coordinate.getY()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getY(), - FReal(coordinate.getZ()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getZ()); + return FPoint( + FReal(coordinate.getX()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getX(), + FReal(coordinate.getY()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getY(), + FReal(coordinate.getZ()) * widthAtLeafLevel + widthAtLeafLevelDiv2 + boxCorner.getZ()); } /** Return position in the array of the l/m couple @@ -876,24 +876,24 @@ class FRotationKernel : public FAbstractKernels { * 6 7 8 9 ... */ int atLm(const int l, const int m) const { - // summation series over l + m => (l*(l+1))/2 + m - return ((l*(l+1))>>1) + m; + // summation series over l + m => (l*(l+1))/2 + m + return ((l*(l+1))>>1) + m; } public: /** Constructor, needs system information */ FRotationKernel( const int inTreeHeight, const FReal inBoxWidth, const FPoint& inBoxCenter) : - boxWidth(inBoxWidth), - treeHeight(inTreeHeight), - widthAtLeafLevel(inBoxWidth/FReal(1 << (inTreeHeight-1))), - widthAtLeafLevelDiv2(widthAtLeafLevel/2), - boxCorner(inBoxCenter.getX()-(inBoxWidth/2),inBoxCenter.getY()-(inBoxWidth/2),inBoxCenter.getZ()-(inBoxWidth/2)) - { - // simply does the precomputation - precomputeFactorials(); - precomputeTranslationCoef(); - precomputeRotationVectors(); + boxWidth(inBoxWidth), + treeHeight(inTreeHeight), + widthAtLeafLevel(inBoxWidth/FReal(1 << (inTreeHeight-1))), + widthAtLeafLevelDiv2(widthAtLeafLevel/2), + boxCorner(inBoxCenter.getX()-(inBoxWidth/2),inBoxCenter.getY()-(inBoxWidth/2),inBoxCenter.getZ()-(inBoxWidth/2)) + { + // simply does the precomputation + precomputeFactorials(); + precomputeTranslationCoef(); + precomputeRotationVectors(); } /** Default destructor */ @@ -909,55 +909,55 @@ public: * \f] */ void P2M(CellClass* const inPole, const ContainerClass* const inParticles ) { - const FReal i_pow_m[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; - // w is the multipole moment - FComplexe* FRestrict const w = inPole->getMultipole(); - - // Copying the position is faster than using cell position - const FPoint cellPosition = getLeafCenter(inPole->getCoordinate()); - - // We need a legendre array - FReal legendre[SizeArray]; - FReal angles[P+1][2]; - - // For all particles in the leaf box - const FReal*const physicalValues = inParticles->getPhysicalValues(); - const FReal*const positionsX = inParticles->getPositions()[0]; - const FReal*const positionsY = inParticles->getPositions()[1]; - const FReal*const positionsZ = inParticles->getPositions()[2]; - - for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ - // P2M - const FPoint position(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]); - const FSpherical sph(position - cellPosition); - - // The physical value (charge, mass) - const FReal q = physicalValues[idxPart]; - // The distance between the SH and the particle - const FReal a = sph.getR(); - - // Compute the legendre polynomial - computeLegendre(legendre, sph.getCosTheta(), sph.getSinTheta()); - - // w{l,m}(q,a) = q a^l/(l+|m|)! P{l,m}(cos(alpha)) exp(-i m Beta) - FReal q_aPowL = q; // To consutrct q*a^l continously - int index_l_m = 0; // To construct the index of (l,m) continously - FReal fl = 0.0; - for(int l = 0 ; l <= P ; ++l, ++fl ){ - { // We need to compute the angles to use in the "m" loop - // So we can compute only the one needed after "l" inc - const FReal angle = fl * sph.getPhi() + i_pow_m[l & 0x3]; - angles[l][0] = FMath::Cos(angle); - angles[l][1] = FMath::Sin(angle); - } - for(int m = 0 ; m <= l ; ++m, ++index_l_m){ - const FReal magnitude = q_aPowL * legendre[index_l_m] / factorials[l+m]; - w[index_l_m].incReal(magnitude * angles[m][0]); - w[index_l_m].incImag(magnitude * angles[m][1]); - } - q_aPowL *= a; - } - } + const FReal i_pow_m[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; + // w is the multipole moment + FComplex* FRestrict const w = inPole->getMultipole(); + + // Copying the position is faster than using cell position + const FPoint cellPosition = getLeafCenter(inPole->getCoordinate()); + + // We need a legendre array + FReal legendre[SizeArray]; + FReal angles[P+1][2]; + + // For all particles in the leaf box + const FReal*const physicalValues = inParticles->getPhysicalValues(); + const FReal*const positionsX = inParticles->getPositions()[0]; + const FReal*const positionsY = inParticles->getPositions()[1]; + const FReal*const positionsZ = inParticles->getPositions()[2]; + + for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ + // P2M + const FPoint position(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]); + const FSpherical sph(position - cellPosition); + + // The physical value (charge, mass) + const FReal q = physicalValues[idxPart]; + // The distance between the SH and the particle + const FReal a = sph.getR(); + + // Compute the legendre polynomial + computeLegendre(legendre, sph.getCosTheta(), sph.getSinTheta()); + + // w{l,m}(q,a) = q a^l/(l+|m|)! P{l,m}(cos(alpha)) exp(-i m Beta) + FReal q_aPowL = q; // To consutrct q*a^l continously + int index_l_m = 0; // To construct the index of (l,m) continously + FReal fl = 0.0; + for(int l = 0 ; l <= P ; ++l, ++fl ){ + { // We need to compute the angles to use in the "m" loop + // So we can compute only the one needed after "l" inc + const FReal angle = fl * sph.getPhi() + i_pow_m[l & 0x3]; + angles[l][0] = FMath::Cos(angle); + angles[l][1] = FMath::Sin(angle); + } + for(int m = 0 ; m <= l ; ++m, ++index_l_m){ + const FReal magnitude = q_aPowL * legendre[index_l_m] / factorials[l+m]; + w[index_l_m].incReal(magnitude * angles[m][0]); + w[index_l_m].incImag(magnitude * angles[m][1]); + } + q_aPowL *= a; + } + } } /** M2M @@ -972,48 +972,48 @@ public: * and finaly rotate back. */ void M2M(CellClass* const FRestrict inPole, const CellClass*const FRestrict *const FRestrict inChildren, const int inLevel) { - // Get the translation coef for this level (same for all child) - const FReal*const coef = M2MTranslationCoef[inLevel]; - // A buffer to copy the source w allocated once - FComplexe source_w[SizeArray]; - // For all children - for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ - // if child exists - if(inChildren[idxChild]){ - // Copy the source - FMemUtils::copyall(source_w, inChildren[idxChild]->getMultipole(), SizeArray); - - // rotate it forward - RotationZVectorsMul(source_w,rotationExpMinusImPhi[idxChild]); - RotationYWithDlmk(source_w,DlmkCoefOTheta[idxChild]); - - // Translate it - FComplexe target_w[SizeArray]; - int index_lm = 0; - for(int l = 0 ; l <= P ; ++l ){ - for(int m = 0 ; m <= l ; ++m, ++index_lm ){ - // w{l,m}(a+b) = sum(j=m:l, b^(l-j)/(l-j)! w{j,m}(a) - FReal w_lm_real = 0.0; - FReal w_lm_imag = 0.0; - int index_jm = atLm(m,m); // get atLm(l,m) - int index_l_minus_j = l-m; // get l-j continuously - for(int j = m ; j <= l ; ++j, --index_l_minus_j, index_jm += j ){ - //const coef = (b^l-j) / (l-j)!; - w_lm_real += coef[index_l_minus_j] * source_w[index_jm].getReal(); - w_lm_imag += coef[index_l_minus_j] * source_w[index_jm].getImag(); - } - target_w[index_lm].setRealImag(w_lm_real,w_lm_imag); - } - } - - // Rotate it back - RotationYWithDlmk(target_w,DlmkCoefOMinusTheta[idxChild]); - RotationZVectorsMul(target_w,rotationExpImPhi[idxChild]); - - // Sum the result - FMemUtils::addall( inPole->getMultipole(), target_w, SizeArray); - } - } + // Get the translation coef for this level (same for all child) + const FReal*const coef = M2MTranslationCoef[inLevel]; + // A buffer to copy the source w allocated once + FComplex source_w[SizeArray]; + // For all children + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + // if child exists + if(inChildren[idxChild]){ + // Copy the source + FMemUtils::copyall(source_w, inChildren[idxChild]->getMultipole(), SizeArray); + + // rotate it forward + RotationZVectorsMul(source_w,rotationExpMinusImPhi[idxChild]); + RotationYWithDlmk(source_w,DlmkCoefOTheta[idxChild]); + + // Translate it + FComplex target_w[SizeArray]; + int index_lm = 0; + for(int l = 0 ; l <= P ; ++l ){ + for(int m = 0 ; m <= l ; ++m, ++index_lm ){ + // w{l,m}(a+b) = sum(j=m:l, b^(l-j)/(l-j)! w{j,m}(a) + FReal w_lm_real = 0.0; + FReal w_lm_imag = 0.0; + int index_jm = atLm(m,m); // get atLm(l,m) + int index_l_minus_j = l-m; // get l-j continuously + for(int j = m ; j <= l ; ++j, --index_l_minus_j, index_jm += j ){ + //const coef = (b^l-j) / (l-j)!; + w_lm_real += coef[index_l_minus_j] * source_w[index_jm].getReal(); + w_lm_imag += coef[index_l_minus_j] * source_w[index_jm].getImag(); + } + target_w[index_lm].setRealImag(w_lm_real,w_lm_imag); + } + } + + // Rotate it back + RotationYWithDlmk(target_w,DlmkCoefOMinusTheta[idxChild]); + RotationZVectorsMul(target_w,rotationExpImPhi[idxChild]); + + // Sum the result + FMemUtils::addall( inPole->getMultipole(), target_w, SizeArray); + } + } } /** M2L @@ -1028,50 +1028,50 @@ public: * and finaly rotate back. */ void M2L(CellClass* const FRestrict inLocal, const CellClass* inInteractions[], const int /*inSize*/, const int inLevel) { - // To copy the multipole data allocated once - FComplexe source_w[SizeArray]; - // For all children - for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){ - // if interaction exits - if(inInteractions[idxNeigh]){ - const FReal*const coef = M2LTranslationCoef[inLevel][idxNeigh]; - // Copy multipole data into buffer - FMemUtils::copyall(source_w, inInteractions[idxNeigh]->getMultipole(), SizeArray); - - // Rotate - RotationZVectorsMul(source_w,rotationM2LExpMinusImPhi[idxNeigh]); - RotationYWithDlmk(source_w,DlmkCoefM2LOTheta[idxNeigh]); - - // Transfer to u - FComplexe target_u[SizeArray]; - int index_lm = 0; - for(int l = 0 ; l <= P ; ++l ){ - FReal minus_1_pow_m = 1.0; - for(int m = 0 ; m <= l ; ++m, ++index_lm ){ - // u{l,m}(a-b) = sum(j=|m|:P-l, (j+l)!/b^(j+l+1) w{j,-m}(a) - FReal u_lm_real = 0.0; - FReal u_lm_imag = 0.0; - int index_jl = m + l; // get j+l - int index_jm = atLm(m,m); // get atLm(l,m) - for(int j = m ; j <= P-l ; ++j, ++index_jl, index_jm += j ){ - // coef = (j+l)!/b^(j+l+1) - // because {l,-m} => {l,m} conjugate -1^m with -i - u_lm_real += minus_1_pow_m * coef[index_jl] * source_w[index_jm].getReal(); - u_lm_imag -= minus_1_pow_m * coef[index_jl] * source_w[index_jm].getImag(); - } - target_u[index_lm].setRealImag(u_lm_real,u_lm_imag); - minus_1_pow_m = -minus_1_pow_m; - } - } - - // Rotate it back - RotationYWithDlmk(target_u,DlmkCoefM2LMMinusTheta[idxNeigh]); - RotationZVectorsMul(target_u,rotationM2LExpMinusImPhi[idxNeigh]); - - // Sum - FMemUtils::addall(inLocal->getLocal(), target_u, SizeArray); - } - } + // To copy the multipole data allocated once + FComplex source_w[SizeArray]; + // For all children + for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){ + // if interaction exits + if(inInteractions[idxNeigh]){ + const FReal*const coef = M2LTranslationCoef[inLevel][idxNeigh]; + // Copy multipole data into buffer + FMemUtils::copyall(source_w, inInteractions[idxNeigh]->getMultipole(), SizeArray); + + // Rotate + RotationZVectorsMul(source_w,rotationM2LExpMinusImPhi[idxNeigh]); + RotationYWithDlmk(source_w,DlmkCoefM2LOTheta[idxNeigh]); + + // Transfer to u + FComplex target_u[SizeArray]; + int index_lm = 0; + for(int l = 0 ; l <= P ; ++l ){ + FReal minus_1_pow_m = 1.0; + for(int m = 0 ; m <= l ; ++m, ++index_lm ){ + // u{l,m}(a-b) = sum(j=|m|:P-l, (j+l)!/b^(j+l+1) w{j,-m}(a) + FReal u_lm_real = 0.0; + FReal u_lm_imag = 0.0; + int index_jl = m + l; // get j+l + int index_jm = atLm(m,m); // get atLm(l,m) + for(int j = m ; j <= P-l ; ++j, ++index_jl, index_jm += j ){ + // coef = (j+l)!/b^(j+l+1) + // because {l,-m} => {l,m} conjugate -1^m with -i + u_lm_real += minus_1_pow_m * coef[index_jl] * source_w[index_jm].getReal(); + u_lm_imag -= minus_1_pow_m * coef[index_jl] * source_w[index_jm].getImag(); + } + target_u[index_lm].setRealImag(u_lm_real,u_lm_imag); + minus_1_pow_m = -minus_1_pow_m; + } + } + + // Rotate it back + RotationYWithDlmk(target_u,DlmkCoefM2LMMinusTheta[idxNeigh]); + RotationZVectorsMul(target_u,rotationM2LExpMinusImPhi[idxNeigh]); + + // Sum + FMemUtils::addall(inLocal->getLocal(), target_u, SizeArray); + } + } } /** L2L @@ -1086,47 +1086,47 @@ public: * and finaly rotate back. */ void L2L(const CellClass* const FRestrict inLocal, CellClass* FRestrict *const FRestrict inChildren, const int inLevel) { - // Get the translation coef for this level (same for all chidl) - const FReal*const coef = L2LTranslationCoef[inLevel]; - // To copy the source local allocated once - FComplexe source_u[SizeArray]; - // For all children - for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ - // if child exists - if(inChildren[idxChild]){ - // Copy the local data into the buffer - FMemUtils::copyall(source_u, inLocal->getLocal(), SizeArray); - - // Rotate - RotationZVectorsMul(source_u,rotationExpImPhi[idxChild]); - RotationYWithDlmk(source_u,DlmkCoefMTheta[idxChild]); - - // Translate - FComplexe target_u[SizeArray]; - for(int l = 0 ; l <= P ; ++l ){ - for(int m = 0 ; m <= l ; ++m ){ - // u{l,m}(r-b) = sum(j=0:P, b^(j-l)/(j-l)! u{j,m}(r); - FReal u_lm_real = 0.0; - FReal u_lm_imag = 0.0; - int index_jm = atLm(l,m); // get atLm(j,m) - int index_j_minus_l = 0; // get l-j continously - for(int j = l ; j <= P ; ++j, ++index_j_minus_l, index_jm += j){ - // coef = b^j-l/j-l! - u_lm_real += coef[index_j_minus_l] * source_u[index_jm].getReal(); - u_lm_imag += coef[index_j_minus_l] * source_u[index_jm].getImag(); - } - target_u[atLm(l,m)].setRealImag(u_lm_real,u_lm_imag); - } - } - - // Rotate - RotationYWithDlmk(target_u,DlmkCoefMMinusTheta[idxChild]); - RotationZVectorsMul(target_u,rotationExpMinusImPhi[idxChild]); - - // Sum in child - FMemUtils::addall(inChildren[idxChild]->getLocal(), target_u, SizeArray); - } - } + // Get the translation coef for this level (same for all chidl) + const FReal*const coef = L2LTranslationCoef[inLevel]; + // To copy the source local allocated once + FComplex source_u[SizeArray]; + // For all children + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + // if child exists + if(inChildren[idxChild]){ + // Copy the local data into the buffer + FMemUtils::copyall(source_u, inLocal->getLocal(), SizeArray); + + // Rotate + RotationZVectorsMul(source_u,rotationExpImPhi[idxChild]); + RotationYWithDlmk(source_u,DlmkCoefMTheta[idxChild]); + + // Translate + FComplex target_u[SizeArray]; + for(int l = 0 ; l <= P ; ++l ){ + for(int m = 0 ; m <= l ; ++m ){ + // u{l,m}(r-b) = sum(j=0:P, b^(j-l)/(j-l)! u{j,m}(r); + FReal u_lm_real = 0.0; + FReal u_lm_imag = 0.0; + int index_jm = atLm(l,m); // get atLm(j,m) + int index_j_minus_l = 0; // get l-j continously + for(int j = l ; j <= P ; ++j, ++index_j_minus_l, index_jm += j){ + // coef = b^j-l/j-l! + u_lm_real += coef[index_j_minus_l] * source_u[index_jm].getReal(); + u_lm_imag += coef[index_j_minus_l] * source_u[index_jm].getImag(); + } + target_u[atLm(l,m)].setRealImag(u_lm_real,u_lm_imag); + } + } + + // Rotate + RotationYWithDlmk(target_u,DlmkCoefMMinusTheta[idxChild]); + RotationZVectorsMul(target_u,rotationExpMinusImPhi[idxChild]); + + // Sum in child + FMemUtils::addall(inChildren[idxChild]->getLocal(), target_u, SizeArray); + } + } } /** L2P @@ -1148,156 +1148,156 @@ public: * \f] */ void L2P(const CellClass* const inLocal, ContainerClass* const inParticles){ - const FReal i_pow_m[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; - // Take the local value from the cell - const FComplexe* FRestrict const u = inLocal->getLocal(); - - // Copying the position is faster than using cell position - const FPoint cellPosition = getLeafCenter(inLocal->getCoordinate()); - - // For all particles in the leaf box - const FReal*const physicalValues = inParticles->getPhysicalValues(); - const FReal*const positionsX = inParticles->getPositions()[0]; - const FReal*const positionsY = inParticles->getPositions()[1]; - const FReal*const positionsZ = inParticles->getPositions()[2]; - FReal*const forcesX = inParticles->getForcesX(); - FReal*const forcesY = inParticles->getForcesY(); - FReal*const forcesZ = inParticles->getForcesZ(); - FReal*const potentials = inParticles->getPotentials(); - - for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ - // L2P - const FPoint position(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]); - const FSpherical sph(position - cellPosition); - - // The distance between the SH and the particle - const FReal r = sph.getR(); - - // Compute the legendre polynomial - FReal legendre[SizeArray]; - computeLegendre(legendre, sph.getCosTheta(), sph.getSinTheta()); - - // pre compute what is used more than once - FReal minus_r_pow_l_div_fact_lm[SizeArray]; - FReal minus_r_pow_l_legendre_div_fact_lm[SizeArray]; - { - int index_lm = 0; - FReal minus_r_pow_l = 1.0; // To get (-1*r)^l - for(int l = 0 ; l <= P ; ++l){ - for(int m = 0 ; m <= l ; ++m, ++index_lm){ - minus_r_pow_l_div_fact_lm[index_lm] = minus_r_pow_l / factorials[l+m]; - minus_r_pow_l_legendre_div_fact_lm[index_lm] = minus_r_pow_l_div_fact_lm[index_lm] * legendre[index_lm]; - } - minus_r_pow_l *= -r; - } - } - // pre compute what is use more than once - FReal cos_m_phi_i_pow_m[P+1]; - FReal sin_m_phi_i_pow_m[P+1]; - { - for(int m = 0 ; m <= P ; ++m){ - const FReal m_phi_i_pow_m = FReal(m) * sph.getPhi() + i_pow_m[m & 0x3]; - cos_m_phi_i_pow_m[m] = FMath::Cos(m_phi_i_pow_m); - sin_m_phi_i_pow_m[m] = FMath::Sin(m_phi_i_pow_m); - } - } - - // compute the forces - { - FReal Fr = 0; - FReal FO = 0; - FReal Fp = 0; - - int index_lm = 1; // To get atLm(l,m), warning starts with l = 1 - FReal fl = 1.0; // To get "l" as a float - - for(int l = 1 ; l <= P ; ++l, ++fl){ - // first m == 0 - { - Fr += fl * u[index_lm].getReal() * minus_r_pow_l_legendre_div_fact_lm[index_lm]; - } - { - const FReal coef = minus_r_pow_l_div_fact_lm[index_lm] * (fl * (sph.getCosTheta()*legendre[index_lm] - - legendre[index_lm-l]) / sph.getSinTheta()); - const FReal dI_real = coef; - // F(O) += 2 * Real(L dI/dO) - FO += u[index_lm].getReal() * dI_real; - } - ++index_lm; - // then 0 < m - for(int m = 1 ; m <= l ; ++m, ++index_lm){ - { - const FReal coef = minus_r_pow_l_legendre_div_fact_lm[index_lm]; - const FReal I_real = coef * cos_m_phi_i_pow_m[m]; - const FReal I_imag = coef * sin_m_phi_i_pow_m[m]; - // F(r) += 2 x l x Real(LI) - Fr += 2 * fl * (u[index_lm].getReal() * I_real - u[index_lm].getImag() * I_imag); - // F(p) += -2 x m x Imag(LI) - Fp -= 2 * FReal(m) * (u[index_lm].getReal() * I_imag + u[index_lm].getImag() * I_real); - } - { - const FReal legendre_l_minus_1 = (m == l) ? FReal(0.0) : FReal(l+m)*legendre[index_lm-l]; - const FReal coef = minus_r_pow_l_div_fact_lm[index_lm] * ((fl * sph.getCosTheta()*legendre[index_lm] - - legendre_l_minus_1) / sph.getSinTheta()); - const FReal dI_real = coef * cos_m_phi_i_pow_m[m]; - const FReal dI_imag = coef * sin_m_phi_i_pow_m[m]; - // F(O) += 2 * Real(L dI/dO) - FO += FReal(2.0) * (u[index_lm].getReal() * dI_real - u[index_lm].getImag() * dI_imag); - } - } - } - // div by r - Fr /= sph.getR(); - FO /= sph.getR(); - Fp /= sph.getR() * sph.getSinTheta(); - - // copy variable from spherical position - const FReal cosPhi = FMath::Cos(sph.getPhi()); - const FReal sinPhi = FMath::Sin(sph.getPhi()); - const FReal physicalValue = physicalValues[idxPart]; - - // compute forces - const FReal forceX = ( - cosPhi * sph.getSinTheta() * Fr + - cosPhi * sph.getCosTheta() * FO + - (-sinPhi) * Fp) * physicalValue; - - const FReal forceY = ( - sinPhi * sph.getSinTheta() * Fr + - sinPhi * sph.getCosTheta() * FO + - cosPhi * Fp) * physicalValue; - - const FReal forceZ = ( - sph.getCosTheta() * Fr + - (-sph.getSinTheta()) * FO) * physicalValue; - - // inc particles forces - forcesX[idxPart] += forceX; - forcesY[idxPart] += forceY; - forcesZ[idxPart] += forceZ; - } - // compute the potential - { - FReal magnitude = 0; - // E = sum( l = 0:P, sum(m = -l:l, u{l,m} )) - int index_lm = 0; - for(int l = 0 ; l <= P ; ++l ){ - {//for m == 0 - // (l-|m|)! * P{l,0} / r^(l+1) - magnitude += u[index_lm].getReal() * minus_r_pow_l_legendre_div_fact_lm[index_lm]; - ++index_lm; - } - for(int m = 1 ; m <= l ; ++m, ++index_lm ){ - const FReal coef = minus_r_pow_l_legendre_div_fact_lm[index_lm]; - const FReal I_real = coef * cos_m_phi_i_pow_m[m]; - const FReal I_imag = coef * sin_m_phi_i_pow_m[m]; - magnitude += FReal(2.0) * ( u[index_lm].getReal() * I_real - u[index_lm].getImag() * I_imag ); - } - } - // inc potential - potentials[idxPart] += magnitude; - } - } + const FReal i_pow_m[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; + // Take the local value from the cell + const FComplex* FRestrict const u = inLocal->getLocal(); + + // Copying the position is faster than using cell position + const FPoint cellPosition = getLeafCenter(inLocal->getCoordinate()); + + // For all particles in the leaf box + const FReal*const physicalValues = inParticles->getPhysicalValues(); + const FReal*const positionsX = inParticles->getPositions()[0]; + const FReal*const positionsY = inParticles->getPositions()[1]; + const FReal*const positionsZ = inParticles->getPositions()[2]; + FReal*const forcesX = inParticles->getForcesX(); + FReal*const forcesY = inParticles->getForcesY(); + FReal*const forcesZ = inParticles->getForcesZ(); + FReal*const potentials = inParticles->getPotentials(); + + for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ + // L2P + const FPoint position(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]); + const FSpherical sph(position - cellPosition); + + // The distance between the SH and the particle + const FReal r = sph.getR(); + + // Compute the legendre polynomial + FReal legendre[SizeArray]; + computeLegendre(legendre, sph.getCosTheta(), sph.getSinTheta()); + + // pre compute what is used more than once + FReal minus_r_pow_l_div_fact_lm[SizeArray]; + FReal minus_r_pow_l_legendre_div_fact_lm[SizeArray]; + { + int index_lm = 0; + FReal minus_r_pow_l = 1.0; // To get (-1*r)^l + for(int l = 0 ; l <= P ; ++l){ + for(int m = 0 ; m <= l ; ++m, ++index_lm){ + minus_r_pow_l_div_fact_lm[index_lm] = minus_r_pow_l / factorials[l+m]; + minus_r_pow_l_legendre_div_fact_lm[index_lm] = minus_r_pow_l_div_fact_lm[index_lm] * legendre[index_lm]; + } + minus_r_pow_l *= -r; + } + } + // pre compute what is use more than once + FReal cos_m_phi_i_pow_m[P+1]; + FReal sin_m_phi_i_pow_m[P+1]; + { + for(int m = 0 ; m <= P ; ++m){ + const FReal m_phi_i_pow_m = FReal(m) * sph.getPhi() + i_pow_m[m & 0x3]; + cos_m_phi_i_pow_m[m] = FMath::Cos(m_phi_i_pow_m); + sin_m_phi_i_pow_m[m] = FMath::Sin(m_phi_i_pow_m); + } + } + + // compute the forces + { + FReal Fr = 0; + FReal FO = 0; + FReal Fp = 0; + + int index_lm = 1; // To get atLm(l,m), warning starts with l = 1 + FReal fl = 1.0; // To get "l" as a float + + for(int l = 1 ; l <= P ; ++l, ++fl){ + // first m == 0 + { + Fr += fl * u[index_lm].getReal() * minus_r_pow_l_legendre_div_fact_lm[index_lm]; + } + { + const FReal coef = minus_r_pow_l_div_fact_lm[index_lm] * (fl * (sph.getCosTheta()*legendre[index_lm] + - legendre[index_lm-l]) / sph.getSinTheta()); + const FReal dI_real = coef; + // F(O) += 2 * Real(L dI/dO) + FO += u[index_lm].getReal() * dI_real; + } + ++index_lm; + // then 0 < m + for(int m = 1 ; m <= l ; ++m, ++index_lm){ + { + const FReal coef = minus_r_pow_l_legendre_div_fact_lm[index_lm]; + const FReal I_real = coef * cos_m_phi_i_pow_m[m]; + const FReal I_imag = coef * sin_m_phi_i_pow_m[m]; + // F(r) += 2 x l x Real(LI) + Fr += 2 * fl * (u[index_lm].getReal() * I_real - u[index_lm].getImag() * I_imag); + // F(p) += -2 x m x Imag(LI) + Fp -= 2 * FReal(m) * (u[index_lm].getReal() * I_imag + u[index_lm].getImag() * I_real); + } + { + const FReal legendre_l_minus_1 = (m == l) ? FReal(0.0) : FReal(l+m)*legendre[index_lm-l]; + const FReal coef = minus_r_pow_l_div_fact_lm[index_lm] * ((fl * sph.getCosTheta()*legendre[index_lm] + - legendre_l_minus_1) / sph.getSinTheta()); + const FReal dI_real = coef * cos_m_phi_i_pow_m[m]; + const FReal dI_imag = coef * sin_m_phi_i_pow_m[m]; + // F(O) += 2 * Real(L dI/dO) + FO += FReal(2.0) * (u[index_lm].getReal() * dI_real - u[index_lm].getImag() * dI_imag); + } + } + } + // div by r + Fr /= sph.getR(); + FO /= sph.getR(); + Fp /= sph.getR() * sph.getSinTheta(); + + // copy variable from spherical position + const FReal cosPhi = FMath::Cos(sph.getPhi()); + const FReal sinPhi = FMath::Sin(sph.getPhi()); + const FReal physicalValue = physicalValues[idxPart]; + + // compute forces + const FReal forceX = ( + cosPhi * sph.getSinTheta() * Fr + + cosPhi * sph.getCosTheta() * FO + + (-sinPhi) * Fp) * physicalValue; + + const FReal forceY = ( + sinPhi * sph.getSinTheta() * Fr + + sinPhi * sph.getCosTheta() * FO + + cosPhi * Fp) * physicalValue; + + const FReal forceZ = ( + sph.getCosTheta() * Fr + + (-sph.getSinTheta()) * FO) * physicalValue; + + // inc particles forces + forcesX[idxPart] += forceX; + forcesY[idxPart] += forceY; + forcesZ[idxPart] += forceZ; + } + // compute the potential + { + FReal magnitude = 0; + // E = sum( l = 0:P, sum(m = -l:l, u{l,m} )) + int index_lm = 0; + for(int l = 0 ; l <= P ; ++l ){ + {//for m == 0 + // (l-|m|)! * P{l,0} / r^(l+1) + magnitude += u[index_lm].getReal() * minus_r_pow_l_legendre_div_fact_lm[index_lm]; + ++index_lm; + } + for(int m = 1 ; m <= l ; ++m, ++index_lm ){ + const FReal coef = minus_r_pow_l_legendre_div_fact_lm[index_lm]; + const FReal I_real = coef * cos_m_phi_i_pow_m[m]; + const FReal I_imag = coef * sin_m_phi_i_pow_m[m]; + magnitude += FReal(2.0) * ( u[index_lm].getReal() * I_real - u[index_lm].getImag() * I_imag ); + } + } + // inc potential + potentials[idxPart] += magnitude; + } + } } @@ -1308,17 +1308,17 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& /*inPosition*/, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, - ContainerClass* const inNeighbors[27], const int /*inSize*/){ - FP2PR::FullMutual(inTargets,inNeighbors,14); + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const inNeighbors[27], const int /*inSize*/){ + FP2PR::FullMutual(inTargets,inNeighbors,14); } /** Use mutual even if it not useful and call particlesMutualInteraction */ void P2PRemote(const FTreeCoordinate& /*inPosition*/, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, - ContainerClass* const inNeighbors[27], const int /*inSize*/){ - FP2PR::FullRemote(inTargets,inNeighbors,27); + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const inNeighbors[27], const int /*inSize*/){ + FP2PR::FullRemote(inTargets,inNeighbors,27); } }; diff --git a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp index 27dd8e508dbc016b921281b981644001b341a1e7..15e37e861faf37c201fab808dd6f1779d1eb5060 100755 --- a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp +++ b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp @@ -18,7 +18,7 @@ #include "../../Components/FAbstractKernels.hpp" #include "../../Utils/FSmartPointer.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "../../Utils/FMemUtils.hpp" #include "../../Utils/FSpherical.hpp" @@ -192,12 +192,12 @@ class FRotationOriginalKernel : public FAbstractKernelsgetMultipole(); + FComplex* FRestrict const w = inPole->getMultipole(); // Copying the position is faster than using cell position const FPoint cellPosition = getLeafCenter(inPole->getCoordinate()); @@ -498,7 +498,7 @@ public: */ void M2M(CellClass* const FRestrict inPole, const CellClass*const FRestrict *const FRestrict inChildren, const int inLevel) { // A buffer to copy the source w allocated once - FComplexe source_w[SizeArray]; + FComplex source_w[SizeArray]; // For all children for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ // if child exists @@ -512,7 +512,7 @@ public: const FReal b = -sph.getR(); // Translate it - FComplexe target_w[SizeArray]; + FComplex target_w[SizeArray]; for(int l = 0 ; l <= P ; ++l ){ for(int m = 0 ; m <= l ; ++m ){ // w{l,m}(a+b) = sum(j=m:l, b^(l-j)/(l-j)! w{j,m}(a) @@ -549,7 +549,7 @@ public: */ void M2L(CellClass* const FRestrict inLocal, const CellClass* inInteractions[], const int /*inSize*/, const int inLevel) { // To copy the multipole data allocated once - FComplexe source_w[SizeArray]; + FComplex source_w[SizeArray]; // For all children for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){ // if interaction exits @@ -563,7 +563,7 @@ public: const FReal b = sph.getR(); // Transfer to u - FComplexe target_u[SizeArray]; + FComplex target_u[SizeArray]; for(int l = 0 ; l <= P ; ++l ){ for(int m = 0 ; m <= l ; ++m ){ // u{l,m}(a-b) = sum(j=|m|:P-l, (j+l)!/b^(j+l+1) w{j,-m}(a) @@ -601,7 +601,7 @@ public: */ void L2L(const CellClass* const FRestrict inLocal, CellClass* FRestrict *const FRestrict inChildren, const int inLevel) { // To copy the source local allocated once - FComplexe source_u[SizeArray]; + FComplex source_u[SizeArray]; // For all children for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ // if child exists @@ -615,7 +615,7 @@ public: const FReal b = sph.getR(); // Translate - FComplexe target_u[SizeArray]; + FComplex target_u[SizeArray]; for(int l = 0 ; l <= P ; ++l ){ for(int m = 0 ; m <= l ; ++m ){ // u{l,m}(r-b) = sum(j=0:P, b^(j-l)/(j-l)! u{j,m}(r); @@ -660,7 +660,7 @@ public: void L2P(const CellClass* const inLocal, ContainerClass* const inParticles){ const FReal i_pow_m[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; // Take the local value from the cell - const FComplexe* FRestrict const u = inLocal->getLocal(); + const FComplex* FRestrict const u = inLocal->getLocal(); // Copying the position is faster than using cell position const FPoint cellPosition = getLeafCenter(inLocal->getCoordinate()); diff --git a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp index 81a027a54f6151c8223109ea327ea0228f968645..08bd2a7769396a0662826db460f99b3baecab702 100755 --- a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp +++ b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp @@ -50,21 +50,21 @@ protected: FHarmonic harmonic; //< The harmonic computation class // For normal computation - FSmartPointer preL2LTransitions; //< The pre-computation for the L2L based on the level - FSmartPointer preM2MTransitions; //< The pre-computation for the M2M based on the level + FSmartPointer preL2LTransitions; //< The pre-computation for the L2L based on the level + FSmartPointer preM2MTransitions; //< The pre-computation for the M2M based on the level /** Alloc and init pre-vectors*/ void allocAndInit(){ - preL2LTransitions = new FComplexe*[treeHeight ]; - memset(preL2LTransitions.getPtr(), 0, (treeHeight) * sizeof(FComplexe*)); - preM2MTransitions = new FComplexe*[treeHeight]; - memset(preM2MTransitions.getPtr(), 0, (treeHeight) * sizeof(FComplexe*)); + preL2LTransitions = new FComplex*[treeHeight ]; + memset(preL2LTransitions.getPtr(), 0, (treeHeight) * sizeof(FComplex*)); + preM2MTransitions = new FComplex*[treeHeight]; + memset(preM2MTransitions.getPtr(), 0, (treeHeight) * sizeof(FComplex*)); FReal treeWidthAtLevel = (boxWidth)/2; for(int idxLevel = 0 ; idxLevel < treeHeight - 1 ; ++idxLevel ){ - preL2LTransitions[idxLevel] = new FComplexe[ 8 * harmonic.getExpSize()]; - preM2MTransitions[idxLevel] = new FComplexe[ 8 * harmonic.getExpSize()]; + preL2LTransitions[idxLevel] = new FComplex[ 8 * harmonic.getExpSize()]; + preM2MTransitions[idxLevel] = new FComplex[ 8 * harmonic.getExpSize()]; const FPoint father(treeWidthAtLevel,treeWidthAtLevel,treeWidthAtLevel); treeWidthAtLevel /= 2; @@ -80,7 +80,7 @@ protected: ); harmonic.computeInner(FSpherical(M2MVector)); - FMemUtils::copyall(&preM2MTransitions[idxLevel][harmonic.getExpSize() * idxChild], harmonic.result(), harmonic.getExpSize()); + FMemUtils::copyall(&preM2MTransitions[idxLevel][harmonic.getExpSize() * idxChild], harmonic.result(), harmonic.getExpSize()); const FPoint L2LVector ( (treeWidthAtLevel * FReal(1 + (childBox.getX() * 2))) - father.getX(), @@ -89,7 +89,7 @@ protected: ); harmonic.computeInner(FSpherical(L2LVector)); - FMemUtils::copyall(&preL2LTransitions[idxLevel][harmonic.getExpSize() * idxChild], harmonic.result(), harmonic.getExpSize()); + FMemUtils::copyall(&preL2LTransitions[idxLevel][harmonic.getExpSize() * idxChild], harmonic.result(), harmonic.getExpSize()); } } } @@ -145,7 +145,7 @@ public: /** P2M with a cell and all its particles */ void P2M(CellClass* const inPole, const ContainerClass* const inParticles) { - FComplexe* FRestrict const cellMultiPole = inPole->getMultipole(); + FComplex* FRestrict const cellMultiPole = inPole->getMultipole(); // Copying the position is faster than using cell position const FPoint polePosition = getLeafCenter(inPole->getCoordinate()); // For all particles in the leaf box @@ -163,9 +163,9 @@ public: /** M2M with a cell and all its child */ void M2M(CellClass* const FRestrict inPole, const CellClass *const FRestrict *const FRestrict inChild, const int inLevel) { - FComplexe* FRestrict const multipole_exp_target = inPole->getMultipole(); + FComplex* FRestrict const multipole_exp_target = inPole->getMultipole(); // iter on each child and process M2M - const FComplexe* FRestrict const preM2MTransitionsAtLevel = preM2MTransitions[inLevel]; + const FComplex* FRestrict const preM2MTransitionsAtLevel = preM2MTransitions[inLevel]; for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ if(inChild[idxChild]){ multipoleToMultipole(multipole_exp_target, inChild[idxChild]->getMultipole(), &preM2MTransitionsAtLevel[idxChild * harmonic.getExpSize()]); @@ -180,7 +180,7 @@ public: /** L2L with a cell and all its child */ void L2L(const CellClass* const FRestrict pole, CellClass* FRestrict *const FRestrict child, const int inLevel) { // iter on each child and process L2L - const FComplexe* FRestrict const preL2LTransitionsAtLevel = preL2LTransitions[inLevel]; + const FComplex* FRestrict const preL2LTransitionsAtLevel = preL2LTransitions[inLevel]; for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ if(child[idxChild]){ localToLocal(child[idxChild]->getLocal(), pole->getLocal(), &preL2LTransitionsAtLevel[idxChild * harmonic.getExpSize()]); @@ -190,7 +190,7 @@ public: /** L2P with a cell and all its particles */ void L2P(const CellClass* const local, ContainerClass* const inParticles){ - const FComplexe* const cellLocal = local->getLocal(); + const FComplex* const cellLocal = local->getLocal(); // Copying the position is faster than using cell position const FPoint localPosition = getLeafCenter(local->getCoordinate()); // For all particles in the leaf box @@ -265,7 +265,7 @@ private: * Phi(x) = sum_{n=0}^{+} sum_{m=-n}^{n} M_n^m O_n^{-m} (x - *p_center) * */ - void particleToMultiPole(FComplexe* const cellMultiPole, const FPoint& inPolePosition , + void particleToMultiPole(FComplex* const cellMultiPole, const FPoint& inPolePosition , const FPoint& particlePosition, const FReal particlePhysicalValue){ // Inner of Qi - Z0 => harmonic.result @@ -304,9 +304,9 @@ private: * * Warning: if j-n < |k-l| we do nothing. */ - void multipoleToMultipole(FComplexe* const FRestrict multipole_exp_target, - const FComplexe* const FRestrict multipole_exp_src, - const FComplexe* const FRestrict M2M_Inner_transfer){ + void multipoleToMultipole(FComplex* const FRestrict multipole_exp_target, + const FComplex* const FRestrict multipole_exp_src, + const FComplex* const FRestrict M2M_Inner_transfer){ // n from 0 to P for(int n = 0 ; n <= devP ; ++n ){ @@ -318,7 +318,7 @@ private: // l from -n to <0 for(int l = -n ; l < 0 ; ++l){ - const FComplexe M_n__n_l = multipole_exp_src[index_n -l]; + const FComplex M_n__n_l = multipole_exp_src[index_n -l]; // j from n to P for(int j = n ; j <= devP ; ++j ){ @@ -329,7 +329,7 @@ private: // since n-j+l<0 for(int k = 0 ; k <= (j-n+l) ; ++k ){ // l<0 && k>=0 => k-l>0 - const FComplexe I_j_n__k_l = M2M_Inner_transfer[index_j_n + k - l]; + const FComplex I_j_n__k_l = M2M_Inner_transfer[index_j_n + k - l]; multipole_exp_target[index_j + k].incReal( pow_of_minus_1_for_l * ((M_n__n_l.getReal() * I_j_n__k_l.getReal()) + @@ -346,7 +346,7 @@ private: // l from 0 to n for(int l = 0 ; l <= n ; ++l){ - const FComplexe M_n__n_l = multipole_exp_src[index_n + l]; + const FComplex M_n__n_l = multipole_exp_src[index_n + l]; // j from n to P for( int j = n ; j <= devP ; ++j ){ @@ -360,7 +360,7 @@ private: int k = first_k; for(; k <= (j-n+l) && k < l ; ++k){ /* l>=0 && k-l<0 */ - const FComplexe I_j_n__l_k = M2M_Inner_transfer[index_j_n + l - k]; + const FComplex I_j_n__l_k = M2M_Inner_transfer[index_j_n + l - k]; multipole_exp_target[index_j + k].incReal( pow_of_minus_1_for_k * pow_of_minus_1_for_l * ((M_n__n_l.getReal() * I_j_n__l_k.getReal()) + @@ -373,7 +373,7 @@ private: } // for k for(/* k = l */; k <= (j - n + l) ; ++k){ // l>=0 && k-l>=0 - const FComplexe I_j_n__k_l = M2M_Inner_transfer[index_j_n + k - l]; + const FComplex I_j_n__k_l = M2M_Inner_transfer[index_j_n + k - l]; multipole_exp_target[index_j + k].incReal( (M_n__n_l.getReal() * I_j_n__k_l.getReal()) - @@ -408,8 +408,8 @@ private: * *Warning: if |l-k| > n-j, we do nothing. */ - void localToLocal(FComplexe* const FRestrict local_exp_target, const FComplexe* const FRestrict local_exp_src, - const FComplexe* const FRestrict L2L_tranfer){ + void localToLocal(FComplex* const FRestrict local_exp_target, const FComplex* const FRestrict local_exp_src, + const FComplex* const FRestrict L2L_tranfer){ // L_j^k int index_j_k = 0; @@ -418,7 +418,7 @@ private: FReal pow_of_minus_1_for_k = 1.0; for (int k = 0 ; k <= j ; ++k, ++index_j_k ){ - FComplexe L_j_k = local_exp_target[index_j_k]; + FComplex L_j_k = local_exp_target[index_j_k]; for (int n=j; n <= devP;++n){ // O_n^l : here points on the source multipole expansion term of degree n and order |l| @@ -429,8 +429,8 @@ private: const int index_n_j = harmonic.getPreExpRedirJ(n-j); for(/*l = n - j + k*/ ; l-k > 0 ; --l){ /* l>0 && l-k>0 */ - const FComplexe L_j_l = local_exp_src[index_n + l]; - const FComplexe I_l_j__l_k = L2L_tranfer[index_n_j + l - k]; + const FComplex L_j_l = local_exp_src[index_n + l]; + const FComplex I_l_j__l_k = L2L_tranfer[index_n_j + l - k]; L_j_k.incReal( (L_j_l.getReal() * I_l_j__l_k.getReal()) - (L_j_l.getImag() * I_l_j__l_k.getImag())); @@ -442,8 +442,8 @@ private: // (-1)^l FReal pow_of_minus_1_for_l = ((l&1) ? FReal(-1.0) : FReal(1.0)); for(/*l = k*/; l>0 && l>=j-n+k; --l){ /* l>0 && l-k<=0 */ - const FComplexe L_j_l = local_exp_src[index_n + l]; - const FComplexe I_l_j__l_k = L2L_tranfer[index_n_j - l + k]; + const FComplex L_j_l = local_exp_src[index_n + l]; + const FComplex I_l_j__l_k = L2L_tranfer[index_n_j - l + k]; L_j_k.incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k * ((L_j_l.getReal() * I_l_j__l_k.getReal()) + @@ -457,8 +457,8 @@ private: // l<=0 && l-k<=0 for(/*l = 0 ou l = j-n+k-1*/; l>=j-n+k; --l){ - const FComplexe L_j_l = local_exp_src[index_n - l]; - const FComplexe I_l_j__l_k = L2L_tranfer[index_n_j - l + k]; + const FComplex L_j_l = local_exp_src[index_n - l]; + const FComplex I_l_j__l_k = L2L_tranfer[index_n_j - l + k]; L_j_k.incReal( pow_of_minus_1_for_k * ((L_j_l.getReal() * I_l_j__l_k.getReal()) - @@ -480,7 +480,7 @@ private: /** L2P */ - void localToParticle(const FPoint& local_position,const FComplexe*const local_exp, + void localToParticle(const FPoint& local_position,const FComplex*const local_exp, const FPoint& particlePosition, const FReal physicalValue, FReal*const potential, FReal*const forcesX,FReal*const forcesY,FReal*const forcesZ){ diff --git a/Src/Kernels/Spherical/FHarmonic.hpp b/Src/Kernels/Spherical/FHarmonic.hpp index f6ecb3086a6f9f0192835a3d537aab25d249cc9d..b59902889e1c8ae778b7761ee96f78218123c46d 100755 --- a/Src/Kernels/Spherical/FHarmonic.hpp +++ b/Src/Kernels/Spherical/FHarmonic.hpp @@ -17,7 +17,7 @@ #define FHARMONIC_HPP #include "../../Utils/FGlobal.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "../../Utils/FSpherical.hpp" #include "../../Utils/FNoCopyable.hpp" @@ -30,11 +30,11 @@ class FHarmonic : public FNoAssignement { const int expSize; //< Number of elements in the expansion expSize = (p+1)^2 const int nExpSize; //< - FComplexe* harmonic;//< Harmonic Result - FComplexe* cosSin; //< Cos/Sin precomputed values + FComplex* harmonic;//< Harmonic Result + FComplex* cosSin; //< Cos/Sin precomputed values FReal* legendre;//< Legendre results - FComplexe* thetaDerivatedResult; //< the theta derivated result + FComplex* thetaDerivatedResult; //< the theta derivated result FReal* sphereHarmoInnerCoef; //< sphere innter pre computed coefficients FReal* sphereHarmoOuterCoef; //< sphere outer pre computed coefficients //! @@ -45,10 +45,10 @@ class FHarmonic : public FNoAssignement { /** Allocate and init */ void allocAndInit(){ - harmonic = new FComplexe[expSize]; - cosSin = new FComplexe[devP + 1]; + harmonic = new FComplex[expSize]; + cosSin = new FComplex[devP + 1]; legendre = new FReal[expSize]; - thetaDerivatedResult = new FComplexe[expSize]; + thetaDerivatedResult = new FComplex[expSize]; // Pre compute coef sphereHarmoOuterCoef = new FReal[devP + 1]; @@ -176,35 +176,35 @@ public: return nExpSize; } - FComplexe* result(){ + FComplex* result(){ return harmonic; } - const FComplexe* result() const { + const FComplex* result() const { return harmonic; } - FComplexe& result(const int index){ + FComplex& result(const int index){ return harmonic[index]; } - const FComplexe& result(const int index) const{ + const FComplex& result(const int index) const{ return harmonic[index]; } - FComplexe* resultThetaDerivated(){ + FComplex* resultThetaDerivated(){ return thetaDerivatedResult; } - const FComplexe* resultThetaDerivated() const { + const FComplex* resultThetaDerivated() const { return thetaDerivatedResult; } - FComplexe& resultThetaDerivated(const int index){ + FComplex& resultThetaDerivated(const int index){ return thetaDerivatedResult[index]; } - const FComplexe& resultThetaDerivated(const int index) const{ + const FComplex& resultThetaDerivated(const int index) const{ return thetaDerivatedResult[index]; } diff --git a/Src/Kernels/Spherical/FSphericalBlasKernel.hpp b/Src/Kernels/Spherical/FSphericalBlasKernel.hpp index b5476cd6bb4668c6427aed9e08f43095862222fb..a353b95d1c5c2894e545bd91894959a96e09b30d 100755 --- a/Src/Kernels/Spherical/FSphericalBlasKernel.hpp +++ b/Src/Kernels/Spherical/FSphericalBlasKernel.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FSPHERICALBLASKERNEL_HPP @@ -34,8 +34,8 @@ protected: const int FF_MATRIX_COLUMN_DIM; //< The blas matrix number of columns const int FF_MATRIX_SIZE; //< The blas matrix size - FComplexe* temporaryMultiSource; //< To perform the M2L without allocating at each call - FSmartPointer preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities + FComplex* temporaryMultiSource; //< To perform the M2L without allocating at each call + FSmartPointer preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities /** To access te precomputed M2L transfer matrixes */ int indexM2LTransition(const int idxX,const int idxY,const int idxZ) const { @@ -44,98 +44,98 @@ protected: /** Alloc and init pre-vectors*/ - void allocAndInit(){ - FHarmonic blasHarmonic(Parent::devP * 2); - - // Matrix to fill and then transposed - FComplexe*const workMatrix = new FComplexe[FF_MATRIX_SIZE]; - - // M2L transfer, there is a maximum of 3 neighbors in each direction, - // so 6 in each dimension - FReal treeWidthAtLevel = Parent::boxWidth; - preM2LTransitions = new FComplexe**[Parent::treeHeight]; - memset(preM2LTransitions.getPtr(), 0, sizeof(FComplexe**) * (Parent::treeHeight)); - - for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){ - preM2LTransitions[idxLevel] = new FComplexe*[(7 * 7 * 7)]; - memset(preM2LTransitions[idxLevel], 0, sizeof(FComplexe*) * (7*7*7)); - - for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ - for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ - for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ - if(FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ - // Compute harmonic - const FPoint relativePos( FReal(-idxX) * treeWidthAtLevel , FReal(-idxY) * treeWidthAtLevel , FReal(-idxZ) * treeWidthAtLevel ); - blasHarmonic.computeOuter(FSpherical(relativePos)); - - // Reset Matrix - FMemUtils::setall(workMatrix, FComplexe(), FF_MATRIX_SIZE); - FComplexe* FRestrict fillTransfer = workMatrix; - - for(int M = 0 ; M <= Parent::devP ; ++M){ - for (int m = 0 ; m <= M ; ++m){ - for (int N = 0 ; N <= Parent::devP ; ++N){ - for (int n = 0 ; n <= 2*N ; ++n, ++fillTransfer){ - const int k = N-n-m; - if (k < 0){ - const FReal pow_of_minus_1 = FReal((k&1) ? -1 : 1); - fillTransfer->setReal( pow_of_minus_1 * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].getReal()); - fillTransfer->setImag((-pow_of_minus_1) * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].getImag()); - } - else{ - (*fillTransfer) = blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)+k]; - } - - } - } - } - } - - // Transpose and copy result - FComplexe*const matrix = new FComplexe[FF_MATRIX_SIZE]; - for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){ - for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){ - matrix[idxCol * FF_MATRIX_ROW_DIM + idxRow] = workMatrix[idxCol + idxRow * FF_MATRIX_COLUMN_DIM]; - } - } - // - // Single Layer - // - std::cout << std::endl ; - FComplexe Czero(0.,0.); - int idxRow = 0 ; - for(int j =0 ; j<= Parent::devP ; ++j ){ // Row - for(int k=0; k<=j ; ++k){ // Row - int idxCol = 0 ; - for(int n =0 ; n<= Parent::devP ; ++n){ // Col - for(int l=-n; l<=n ; ++l){ - // std::cout << " j " << j << " k " << k << " n " < 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ + // Compute harmonic + const FPoint relativePos( FReal(-idxX) * treeWidthAtLevel , FReal(-idxY) * treeWidthAtLevel , FReal(-idxZ) * treeWidthAtLevel ); + blasHarmonic.computeOuter(FSpherical(relativePos)); + + // Reset Matrix + FMemUtils::setall(workMatrix, FComplex(), FF_MATRIX_SIZE); + FComplex* FRestrict fillTransfer = workMatrix; + + for(int M = 0 ; M <= Parent::devP ; ++M){ + for (int m = 0 ; m <= M ; ++m){ + for (int N = 0 ; N <= Parent::devP ; ++N){ + for (int n = 0 ; n <= 2*N ; ++n, ++fillTransfer){ + const int k = N-n-m; + if (k < 0){ + const FReal pow_of_minus_1 = FReal((k&1) ? -1 : 1); + fillTransfer->setReal( pow_of_minus_1 * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].getReal()); + fillTransfer->setImag((-pow_of_minus_1) * blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)-k].getImag()); + } + else{ + (*fillTransfer) = blasHarmonic.result()[blasHarmonic.getPreExpRedirJ(M+N)+k]; + } + + } + } + } + } + + // Transpose and copy result + FComplex*const matrix = new FComplex[FF_MATRIX_SIZE]; + for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){ + for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){ + matrix[idxCol * FF_MATRIX_ROW_DIM + idxRow] = workMatrix[idxCol + idxRow * FF_MATRIX_COLUMN_DIM]; + } + } + // + // Single Layer + // + // std::cout << std::endl ; + FComplex Czero(0.,0.); + int idxRow = 0 ; + for(int j =0 ; j<= Parent::devP ; ++j ){ // Row + for(int k=0; k<=j ; ++k){ // Row + int idxCol = 0 ; + for(int n =0 ; n<= Parent::devP ; ++n){ // Col + for(int l=-n; l<=n ; ++l){ + // std::cout << " j " << j << " k " << k << " n " <getLocal(), distantNeighbors[idxNeigh]->getMultipole(), transitionVector); } } @@ -196,7 +196,7 @@ public: /** preExpNExp * @param exp an exponent vector to create an computable vector */ - void preExpNExp(FComplexe* const exp) const { + void preExpNExp(FComplex* const exp) const { for(int j = Parent::devP; j>= 0 ; --j){ // Position in 'exp': (j*(j+1)*0.5) + k // Position in 'nexp': j*(j+1) + k @@ -241,10 +241,10 @@ public: * } * } */ - void multipoleToLocal(FComplexe*const FRestrict local_exp, const FComplexe* const FRestrict multipole_exp_src, - const FComplexe* const FRestrict M2L_Outer_transfer){ + void multipoleToLocal(FComplex*const FRestrict local_exp, const FComplex* const FRestrict multipole_exp_src, + const FComplex* const FRestrict M2L_Outer_transfer){ // Copy original vector and compute exp2nexp - FMemUtils::copyall(temporaryMultiSource, multipole_exp_src, CellClass::GetPoleSize()); + FMemUtils::copyall(temporaryMultiSource, multipole_exp_src, CellClass::GetPoleSize()); // Get a computable vector preExpNExp(temporaryMultiSource); @@ -254,41 +254,41 @@ public: FF_MATRIX_ROW_DIM, FF_MATRIX_COLUMN_DIM, one, - FComplexe::ToFReal(M2L_Outer_transfer), - FComplexe::ToFReal(temporaryMultiSource), - FComplexe::ToFReal(local_exp)); + FComplex::ToFReal(M2L_Outer_transfer), + FComplex::ToFReal(temporaryMultiSource), + FComplex::ToFReal(local_exp)); -#ifdef DEBUG_SPHERICAL_M2L +// #ifdef DEBUG_SPHERICAL_M2L - std::cout << "\n ====== Multipole expansion ====== \n"< preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities + FComplex*const multipoleMatrix; //< To copy all the multipole vectors + FComplex*const localMatrix; //< To save all the local vectors result + FSmartPointer preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities FVector interactions[343]; //< All the current interaction @@ -63,17 +63,17 @@ protected: FHarmonic blasHarmonic(Parent::devP * 2); // Matrix to fill and then transposed - FComplexe*const workMatrix = new FComplexe[FF_MATRIX_SIZE]; + FComplex*const workMatrix = new FComplex[FF_MATRIX_SIZE]; // M2L transfer, there is a maximum of 3 neighbors in each direction, // so 6 in each dimension FReal treeWidthAtLevel = Parent::boxWidth; - preM2LTransitions = new FComplexe**[Parent::treeHeight]; - memset(preM2LTransitions.getPtr(), 0, sizeof(FComplexe**) * (Parent::treeHeight)); + preM2LTransitions = new FComplex**[Parent::treeHeight]; + memset(preM2LTransitions.getPtr(), 0, sizeof(FComplex**) * (Parent::treeHeight)); for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){ - preM2LTransitions[idxLevel] = new FComplexe*[(7 * 7 * 7)]; - memset(preM2LTransitions[idxLevel], 0, sizeof(FComplexe*) * (7*7*7)); + preM2LTransitions[idxLevel] = new FComplex*[(7 * 7 * 7)]; + memset(preM2LTransitions[idxLevel], 0, sizeof(FComplex*) * (7*7*7)); for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ @@ -84,8 +84,8 @@ protected: blasHarmonic.computeOuter(FSpherical(relativePos)); // Reset Matrix - FMemUtils::setall(workMatrix, FComplexe(), FF_MATRIX_SIZE); - FComplexe* FRestrict fillTransfer = workMatrix; + FMemUtils::setall(workMatrix, FComplex(), FF_MATRIX_SIZE); + FComplex* FRestrict fillTransfer = workMatrix; for(int M = 0 ; M <= Parent::devP ; ++M){ for (int m = 0 ; m <= M ; ++m){ @@ -107,7 +107,7 @@ protected: } // Transpose and copy result - FComplexe*const matrix = new FComplexe[FF_MATRIX_SIZE]; + FComplex*const matrix = new FComplex[FF_MATRIX_SIZE]; for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){ for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){ matrix[idxCol * FF_MATRIX_ROW_DIM + idxRow] = workMatrix[idxCol + idxRow * FF_MATRIX_COLUMN_DIM]; @@ -138,8 +138,8 @@ public: FF_MATRIX_ROW_DIM(Parent::harmonic.getExpSize()), FF_MATRIX_COLUMN_DIM(Parent::harmonic.getNExpSize()), FF_MATRIX_SIZE(FF_MATRIX_ROW_DIM * FF_MATRIX_COLUMN_DIM), BlockSize(inBlockSize), - multipoleMatrix(new FComplexe[inBlockSize * FF_MATRIX_COLUMN_DIM]), - localMatrix(new FComplexe[inBlockSize * FF_MATRIX_ROW_DIM]), + multipoleMatrix(new FComplex[inBlockSize * FF_MATRIX_COLUMN_DIM]), + localMatrix(new FComplex[inBlockSize * FF_MATRIX_ROW_DIM]), preM2LTransitions(nullptr){ allocAndInit(); } @@ -150,8 +150,8 @@ public: FF_MATRIX_ROW_DIM(other.FF_MATRIX_ROW_DIM), FF_MATRIX_COLUMN_DIM(other.FF_MATRIX_COLUMN_DIM), FF_MATRIX_SIZE(other.FF_MATRIX_SIZE), BlockSize(other.BlockSize), - multipoleMatrix(new FComplexe[other.BlockSize * FF_MATRIX_COLUMN_DIM]), - localMatrix(new FComplexe[other.BlockSize * FF_MATRIX_ROW_DIM]), + multipoleMatrix(new FComplex[other.BlockSize * FF_MATRIX_COLUMN_DIM]), + localMatrix(new FComplex[other.BlockSize * FF_MATRIX_ROW_DIM]), preM2LTransitions(other.preM2LTransitions) { } @@ -201,7 +201,7 @@ public: /** preExpNExp * @param exp an exponent vector to create an computable vector */ - void preExpNExp(FComplexe* const exp) const { + void preExpNExp(FComplex* const exp) const { for(int j = Parent::devP; j>= 0 ; --j){ // Position in 'exp': (j*(j+1)*0.5) + k // Position in 'nexp': j*(j+1) + k @@ -240,10 +240,10 @@ public: * *Remark: here we have always j+n >= |-k-l| * - * const FComplexe*const M2LTransfer = preM2LTransitions[inLevel][interactionIndex]; + * const FComplex*const M2LTransfer = preM2LTransitions[inLevel][interactionIndex]; * for(int idxK = 0 ; idxK < interactions[interactionIndex].getSize() ; ++idxK){ * for(int idxRow = 0 ; idxRow < FF_MATRIX_ROW_DIM ; ++idxRow){ - * FComplexe compute; + * FComplex compute; * for(int idxCol = 0 ; idxCol < FF_MATRIX_COLUMN_DIM ; ++idxCol){ * compute.addMul(M2LTransfer[idxCol * FF_MATRIX_ROW_DIM + idxRow], multipoleMatrix[idxCol + idxK * FF_MATRIX_COLUMN_DIM]); * } @@ -254,7 +254,7 @@ public: void multipoleToLocal(const int interactionIndex, const int inLevel){ for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){ // Copy original vector and compute exp2nexp - FMemUtils::copyall(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM], + FMemUtils::copyall(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM], interactions[interactionIndex][idxInter].pole, FF_MATRIX_COLUMN_DIM); // Get a computable vector @@ -268,15 +268,15 @@ public: FF_MATRIX_COLUMN_DIM, interactions[interactionIndex].getSize(), one, - FComplexe::ToFReal(preM2LTransitions[inLevel][interactionIndex]), + FComplex::ToFReal(preM2LTransitions[inLevel][interactionIndex]), FF_MATRIX_ROW_DIM, - FComplexe::ToFReal(multipoleMatrix), + FComplex::ToFReal(multipoleMatrix), FF_MATRIX_COLUMN_DIM, - FComplexe::ToFReal(localMatrix), + FComplex::ToFReal(localMatrix), FF_MATRIX_ROW_DIM); for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){ - FMemUtils::addall(interactions[interactionIndex][idxInter].local, &localMatrix[idxInter * FF_MATRIX_ROW_DIM], FF_MATRIX_ROW_DIM); + FMemUtils::addall(interactions[interactionIndex][idxInter].local, &localMatrix[idxInter * FF_MATRIX_ROW_DIM], FF_MATRIX_ROW_DIM); } interactions[interactionIndex].clear(); diff --git a/Src/Kernels/Spherical/FSphericalCell.hpp b/Src/Kernels/Spherical/FSphericalCell.hpp index 1b6de975fd9be4034788cc67f7799aa762e1fa42..999b35f53e1456b15587b2ababeeeec80df568c0 100755 --- a/Src/Kernels/Spherical/FSphericalCell.hpp +++ b/Src/Kernels/Spherical/FSphericalCell.hpp @@ -16,7 +16,7 @@ #ifndef FSPHERICALCELL_HPP #define FSPHERICALCELL_HPP -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "../../Utils/FMemUtils.hpp" #include "../../Extensions/FExtendCellType.hpp" @@ -33,8 +33,8 @@ protected: static int PoleSize; static bool UseBlas; - FComplexe* multipole_exp; //< For multipole extenssion - FComplexe* local_exp; //< For local extenssion + FComplex* multipole_exp; //< For multipole extenssion + FComplex* local_exp; //< For local extenssion public: static void Init(const int inDevP, const bool inUseBlas = false){ @@ -62,15 +62,15 @@ public: /** Default constructor */ FSphericalCell() : multipole_exp(nullptr), local_exp(nullptr){ - multipole_exp = new FComplexe[PoleSize]; - local_exp = new FComplexe[LocalSize]; + multipole_exp = new FComplex[PoleSize]; + local_exp = new FComplex[LocalSize]; } /** Constructor */ FSphericalCell(const FSphericalCell& other) : multipole_exp(nullptr), local_exp(nullptr){ - multipole_exp = new FComplexe[PoleSize]; - local_exp = new FComplexe[LocalSize]; + multipole_exp = new FComplex[PoleSize]; + local_exp = new FComplex[LocalSize]; (*this) = other; } @@ -88,20 +88,20 @@ public: } /** Get Multipole */ - const FComplexe* getMultipole() const { + const FComplex* getMultipole() const { return multipole_exp; } /** Get Local */ - const FComplexe* getLocal() const { + const FComplex* getLocal() const { return local_exp; } /** Get Multipole */ - FComplexe* getMultipole() { + FComplex* getMultipole() { return multipole_exp; } /** Get Local */ - FComplexe* getLocal() { + FComplex* getLocal() { return local_exp; } @@ -153,7 +153,7 @@ public: } static int GetSize(){ - return (int) sizeof(FComplexe) * (PoleSize+LocalSize); + return (int) sizeof(FComplex) * (PoleSize+LocalSize); } }; diff --git a/Src/Kernels/Spherical/FSphericalKernel.hpp b/Src/Kernels/Spherical/FSphericalKernel.hpp index 06a60fd25c033c5a3d7bc87db9413ed4223d58fd..fd20c6ffaa2a7e3d054d866635892fade75c46d2 100755 --- a/Src/Kernels/Spherical/FSphericalKernel.hpp +++ b/Src/Kernels/Spherical/FSphericalKernel.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FSPHERICALKERNEL_HPP @@ -30,39 +30,39 @@ protected: const int devM2lP; //< A secondary P - FSmartPointer preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities + FSmartPointer preM2LTransitions; //< The pre-computation for the M2L based on the level and the 189 possibilities /** To access te pre computed M2L transfer vector */ int indexM2LTransition(const int idxX,const int idxY,const int idxZ) const { - return (((((idxX+3) * 7) + (idxY+3)) * 7 ) + (idxZ+3)) * devM2lP; + return (((((idxX+3) * 7) + (idxY+3)) * 7 ) + (idxZ+3)) * devM2lP; } /** Alloc and init pre-vectors*/ void allocAndInit(){ - // M2L transfer, there is a maximum of 3 neighbors in each direction, - // so 6 in each dimension - preM2LTransitions = new FComplexe*[Parent::treeHeight]; - memset(preM2LTransitions.getPtr(), 0, sizeof(FComplexe*) * (Parent::treeHeight)); - // We start from the higher level - FReal treeWidthAtLevel = Parent::boxWidth; - for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){ - // Allocate data for this level - preM2LTransitions[idxLevel] = new FComplexe[(7 * 7 * 7) * devM2lP]; - // Precompute transfer vector - for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ - for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ - for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ - if(FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ - const FPoint relativePos( FReal(-idxX) * treeWidthAtLevel , FReal(-idxY) * treeWidthAtLevel , FReal(-idxZ) * treeWidthAtLevel ); - Parent::harmonic.computeOuter(FSpherical(relativePos)); - FMemUtils::copyall(&preM2LTransitions[idxLevel][indexM2LTransition(idxX,idxY,idxZ)], Parent::harmonic.result(), Parent::harmonic.getExpSize()); - } - } - } - } - // We divide the bow per 2 when we go down - treeWidthAtLevel /= 2; - } + // M2L transfer, there is a maximum of 3 neighbors in each direction, + // so 6 in each dimension + preM2LTransitions = new FComplex*[Parent::treeHeight]; + memset(preM2LTransitions.getPtr(), 0, sizeof(FComplex*) * (Parent::treeHeight)); + // We start from the higher level + FReal treeWidthAtLevel = Parent::boxWidth; + for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){ + // Allocate data for this level + preM2LTransitions[idxLevel] = new FComplex[(7 * 7 * 7) * devM2lP]; + // Precompute transfer vector + for(int idxX = -3 ; idxX <= 3 ; ++idxX ){ + for(int idxY = -3 ; idxY <= 3 ; ++idxY ){ + for(int idxZ = -3 ; idxZ <= 3 ; ++idxZ ){ + if(FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ + const FPoint relativePos( FReal(-idxX) * treeWidthAtLevel , FReal(-idxY) * treeWidthAtLevel , FReal(-idxZ) * treeWidthAtLevel ); + Parent::harmonic.computeOuter(FSpherical(relativePos)); + FMemUtils::copyall(&preM2LTransitions[idxLevel][indexM2LTransition(idxX,idxY,idxZ)], Parent::harmonic.result(), Parent::harmonic.getExpSize()); + } + } + } + } + // We divide the bow per 2 when we go down + treeWidthAtLevel /= 2; + } } @@ -74,36 +74,36 @@ public: * @param inPeriodicLevel the number of level upper to 0 that will be requiried */ FSphericalKernel(const int inDevP, const int inTreeHeight, const FReal inBoxWidth, const FPoint& inBoxCenter) - : Parent(inDevP, inTreeHeight, inBoxWidth, inBoxCenter), - devM2lP(int(((inDevP*2)+1) * ((inDevP*2)+2) * 0.5)), - preM2LTransitions(nullptr) { - allocAndInit(); + : Parent(inDevP, inTreeHeight, inBoxWidth, inBoxCenter), + devM2lP(int(((inDevP*2)+1) * ((inDevP*2)+2) * 0.5)), + preM2LTransitions(nullptr) { + allocAndInit(); } /** Copy constructor */ FSphericalKernel(const FSphericalKernel& other) - : Parent(other), devM2lP(other.devM2lP), - preM2LTransitions(other.preM2LTransitions) { + : Parent(other), devM2lP(other.devM2lP), + preM2LTransitions(other.preM2LTransitions) { } /** Destructor */ ~FSphericalKernel(){ - if( preM2LTransitions.isLast() ){ - FMemUtils::DeleteAll(preM2LTransitions.getPtr(), Parent::treeHeight); - } + if( preM2LTransitions.isLast() ){ + FMemUtils::DeleteAll(preM2LTransitions.getPtr(), Parent::treeHeight); + } } /** M2L with a cell and all the existing neighbors */ void M2L(CellClass* const FRestrict pole, const CellClass* distantNeighbors[343], - const int /*size*/, const int inLevel) { - // For all neighbors compute M2L - for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){ - if( distantNeighbors[idxNeigh] ){ - const FComplexe* const transitionVector = &preM2LTransitions[inLevel][idxNeigh * devM2lP]; - multipoleToLocal(pole->getLocal(), distantNeighbors[idxNeigh]->getMultipole(), transitionVector); - } - } + const int /*size*/, const int inLevel) { + // For all neighbors compute M2L + for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){ + if( distantNeighbors[idxNeigh] ){ + const FComplex* const transitionVector = &preM2LTransitions[inLevel][idxNeigh * devM2lP]; + multipoleToLocal(pole->getLocal(), distantNeighbors[idxNeigh]->getMultipole(), transitionVector); + } + } } @@ -125,87 +125,87 @@ public: *Remark: here we have always j+n >= |-k-l| * */ - void multipoleToLocal(FComplexe*const FRestrict local_exp, const FComplexe* const FRestrict multipole_exp_src, - const FComplexe* const FRestrict M2L_Outer_transfer){ - int index_j_k = 0; - - // L_j^k - // HPMSTART(51, "M2L computation (loops)"); - // j from 0 to P - for (int j = 0 ; j <= Parent::devP ; ++j){ - // (-1)^k - FReal pow_of_minus_1_for_k = 1.0; - //k from 0 to j - for (int k = 0 ; k <= j ; ++k, ++index_j_k){ - // (-1)^n - FReal pow_of_minus_1_for_n = 1.0; - - // work with a local variable - FComplexe L_j_k = local_exp[index_j_k]; - // n from 0 to P or do P-j - // for (int n = 0 ; n <= Parent::devP /*or*/ /*Parent::devP-j*/ ; ++n){ - for (int n = 0 ; n <= Parent::devP-j ; ++n){ // faster than double height Parent::devP - // O_n^l : here points on the source multipole expansion term of degree n and order |l| - const int index_n = Parent::harmonic.getPreExpRedirJ(n); - - // Outer_{j+n}^{-k-l} : here points on the M2L transfer function/expansion term of degree j+n and order |-k-l| - const int index_n_j = Parent::harmonic.getPreExpRedirJ(n+j); - - FReal pow_of_minus_1_for_l = pow_of_minus_1_for_n; // (-1)^l - - // We start with l=n (and not l=-n) so that we always set p_Outer_term to a correct value in the first loop. - int l = n; - for(/* l = n */ ; l > 0 ; --l){ // we have -k-l<0 and l>0 - const FComplexe M_n_l = multipole_exp_src[index_n + l]; - const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l]; - - L_j_k.incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k * - ((M_n_l.getReal() * O_n_j__k_l.getReal()) + - (M_n_l.getImag() * O_n_j__k_l.getImag()))); - L_j_k.incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k * - ((M_n_l.getImag() * O_n_j__k_l.getReal()) - - (M_n_l.getReal() * O_n_j__k_l.getImag()))); - - pow_of_minus_1_for_l = -pow_of_minus_1_for_l; - } - - for(/* l = 0 */; l >= -n && (-k-l) < 0 ; --l){ // we have -k-l<0 and l<=0 - const FComplexe M_n_l = multipole_exp_src[index_n - l]; - const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l]; - - L_j_k.incReal( pow_of_minus_1_for_k * - ((M_n_l.getReal() * O_n_j__k_l.getReal()) - - (M_n_l.getImag() * O_n_j__k_l.getImag()))); - L_j_k.decImag( pow_of_minus_1_for_k * - ((M_n_l.getImag() * O_n_j__k_l.getReal()) + - (M_n_l.getReal() * O_n_j__k_l.getImag()))); - - pow_of_minus_1_for_l = -pow_of_minus_1_for_l; - } - - for(/*l = -n-1 or l = -k-1 */; l >= -n ; --l){ // we have -k-l>=0 and l<=0 - const FComplexe M_n_l = multipole_exp_src[index_n - l]; - const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j - (k + l)]; - - L_j_k.incReal( pow_of_minus_1_for_l * - ((M_n_l.getReal() * O_n_j__k_l.getReal()) + - (M_n_l.getImag() * O_n_j__k_l.getImag()))); - L_j_k.incImag( pow_of_minus_1_for_l * - ((M_n_l.getReal() * O_n_j__k_l.getImag()) - - (M_n_l.getImag() * O_n_j__k_l.getReal()))); - - pow_of_minus_1_for_l = -pow_of_minus_1_for_l; - } - - pow_of_minus_1_for_n = -pow_of_minus_1_for_n; - }//n - - // put in the local vector - local_exp[index_j_k] = L_j_k; - - pow_of_minus_1_for_k = -pow_of_minus_1_for_k; - }//k - } + void multipoleToLocal(FComplex*const FRestrict local_exp, const FComplex* const FRestrict multipole_exp_src, + const FComplex* const FRestrict M2L_Outer_transfer){ + int index_j_k = 0; + + // L_j^k + // HPMSTART(51, "M2L computation (loops)"); + // j from 0 to P + for (int j = 0 ; j <= Parent::devP ; ++j){ + // (-1)^k + FReal pow_of_minus_1_for_k = 1.0; + //k from 0 to j + for (int k = 0 ; k <= j ; ++k, ++index_j_k){ + // (-1)^n + FReal pow_of_minus_1_for_n = 1.0; + + // work with a local variable + FComplex L_j_k = local_exp[index_j_k]; + // n from 0 to P or do P-j + //for (int n = 0 ; n <= Parent::devP /*or*/ /*Parent::devP-j*/ ; ++n){ + for (int n = 0 ; n <= Parent::devP-j ; ++n){ // faster than double height Parent::devP + // O_n^l : here points on the source multipole expansion term of degree n and order |l| + const int index_n = Parent::harmonic.getPreExpRedirJ(n); + + // Outer_{j+n}^{-k-l} : here points on the M2L transfer function/expansion term of degree j+n and order |-k-l| + const int index_n_j = Parent::harmonic.getPreExpRedirJ(n+j); + + FReal pow_of_minus_1_for_l = pow_of_minus_1_for_n; // (-1)^l + + // We start with l=n (and not l=-n) so that we always set p_Outer_term to a correct value in the first loop. + int l = n; + for(/* l = n */ ; l > 0 ; --l){ // we have -k-l<0 and l>0 + const FComplex M_n_l = multipole_exp_src[index_n + l]; + const FComplex O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l]; + + L_j_k.incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k * + ((M_n_l.getReal() * O_n_j__k_l.getReal()) + + (M_n_l.getImag() * O_n_j__k_l.getImag()))); + L_j_k.incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k * + ((M_n_l.getImag() * O_n_j__k_l.getReal()) - + (M_n_l.getReal() * O_n_j__k_l.getImag()))); + + pow_of_minus_1_for_l = -pow_of_minus_1_for_l; + } + + for(/* l = 0 */; l >= -n && (-k-l) < 0 ; --l){ // we have -k-l<0 and l<=0 + const FComplex M_n_l = multipole_exp_src[index_n - l]; + const FComplex O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l]; + + L_j_k.incReal( pow_of_minus_1_for_k * + ((M_n_l.getReal() * O_n_j__k_l.getReal()) - + (M_n_l.getImag() * O_n_j__k_l.getImag()))); + L_j_k.decImag( pow_of_minus_1_for_k * + ((M_n_l.getImag() * O_n_j__k_l.getReal()) + + (M_n_l.getReal() * O_n_j__k_l.getImag()))); + + pow_of_minus_1_for_l = -pow_of_minus_1_for_l; + } + + for(/*l = -n-1 or l = -k-1 */; l >= -n ; --l){ // we have -k-l>=0 and l<=0 + const FComplex M_n_l = multipole_exp_src[index_n - l]; + const FComplex O_n_j__k_l = M2L_Outer_transfer[index_n_j - (k + l)]; + + L_j_k.incReal( pow_of_minus_1_for_l * + ((M_n_l.getReal() * O_n_j__k_l.getReal()) + + (M_n_l.getImag() * O_n_j__k_l.getImag()))); + L_j_k.incImag( pow_of_minus_1_for_l * + ((M_n_l.getReal() * O_n_j__k_l.getImag()) - + (M_n_l.getImag() * O_n_j__k_l.getReal()))); + + pow_of_minus_1_for_l = -pow_of_minus_1_for_l; + } + + pow_of_minus_1_for_n = -pow_of_minus_1_for_n; + }//n + + // put in the local vector + local_exp[index_j_k] = L_j_k; + + pow_of_minus_1_for_k = -pow_of_minus_1_for_k; + }//k + } } }; diff --git a/Src/Kernels/Spherical/FSphericalRotationKernel.hpp b/Src/Kernels/Spherical/FSphericalRotationKernel.hpp index 2317ebe1f9a348a5ba2677a79f7a987955949907..c00fe73b0b6daede1b08417245c2e5f2a2cfc277 100755 --- a/Src/Kernels/Spherical/FSphericalRotationKernel.hpp +++ b/Src/Kernels/Spherical/FSphericalRotationKernel.hpp @@ -34,8 +34,8 @@ protected: FReal* rotation_a; FReal* rotation_b; - FComplexe* p_rot_multipole_exp; - FComplexe* p_rot_local_exp; + FComplex* p_rot_multipole_exp; + FComplex* p_rot_local_exp; /** Get z vector size */ static int ZAxisExpensionSize(const int inDevP){ @@ -60,8 +60,8 @@ protected: } } const int z_size = ZAxisExpensionSize(inDevP); - p_rot_multipole_exp = new FComplexe[z_size]; - p_rot_local_exp = new FComplexe[z_size]; + p_rot_multipole_exp = new FComplex[z_size]; + p_rot_local_exp = new FComplex[z_size]; } /** Destructor */ @@ -80,19 +80,19 @@ protected: struct RotationM2LTransfer { const int devP; const int expSize; - FComplexe** rcc_outer; - FComplexe** rcc_inner; + FComplex** rcc_outer; + FComplex** rcc_inner; FReal* outer_array; /** Constructor */ RotationM2LTransfer(const int inDevP, const int inDevM2lP, const int inExpSize) : devP(inDevP), expSize(inExpSize){ - rcc_outer = new FComplexe*[devP + 1]; - rcc_inner = new FComplexe*[devP + 1]; + rcc_outer = new FComplex*[devP + 1]; + rcc_inner = new FComplex*[devP + 1]; for( int idxP = 0 ; idxP <= devP ; ++idxP){ const int rotationSize = ((idxP+1)*(2*idxP+1)); - rcc_outer[idxP] = new FComplexe[rotationSize]; - rcc_inner[idxP] = new FComplexe[rotationSize]; + rcc_outer[idxP] = new FComplex[rotationSize]; + rcc_inner[idxP] = new FComplex[rotationSize]; } outer_array = new FReal[inDevM2lP + 1]; } @@ -193,13 +193,13 @@ protected: const FReal cos_gamma, const FReal sin_gamma, const FReal chi, const RotationInfo& rotation_Info){ - FComplexe** rcc_tmp_transposed = new FComplexe*[devP + 1]; + FComplex** rcc_tmp_transposed = new FComplex*[devP + 1]; for( int idxP = 0 ; idxP <= devP ; ++idxP){ const int rotationSize = ((idxP+1)*(2*idxP+1)); - rcc_tmp_transposed[idxP] = new FComplexe[rotationSize]; + rcc_tmp_transposed[idxP] = new FComplex[rotationSize]; } - FComplexe _pow_of_I_array[7]; + FComplex _pow_of_I_array[7]; _pow_of_I_array[0].setRealImag(0 , 1 ) /* I^{-3} */; _pow_of_I_array[1].setRealImag(-1, 0 ) /* I^{-2} */; _pow_of_I_array[2].setRealImag(0 , -1) /* I^{-1} */; @@ -208,13 +208,13 @@ protected: _pow_of_I_array[5].setRealImag(-1, 0 ) /* I^2 */; _pow_of_I_array[6].setRealImag(0 , -1) /* I^3 */; - const FComplexe* pow_of_I_array = _pow_of_I_array + 3; /* points on I^0 */ + const FComplex* pow_of_I_array = _pow_of_I_array + 3; /* points on I^0 */ - FComplexe* const _precomputed_exp_I_chi_array = new FComplexe[2*devP + 1]; - FComplexe* precomputed_exp_I_chi_array = _precomputed_exp_I_chi_array + devP; + FComplex* const _precomputed_exp_I_chi_array = new FComplex[2*devP + 1]; + FComplex* precomputed_exp_I_chi_array = _precomputed_exp_I_chi_array + devP; - FComplexe* const _precomputed_exp_I_omega_array = new FComplexe[2*devP + 1]; - FComplexe* precomputed_exp_I_omega_array = _precomputed_exp_I_omega_array + devP; + FComplex* const _precomputed_exp_I_omega_array = new FComplex[2*devP + 1]; + FComplex* precomputed_exp_I_omega_array = _precomputed_exp_I_omega_array + devP; // cos(x) = sin(x + Pi/2) @@ -286,7 +286,7 @@ protected: for(int m = -n; m <= n; ++m){ FReal A_terms = A_div_A(n, m, nu); /* A_n^m / A_n^nu */ int abs_m_minus_abs_nu_mod4 = (FMath::Abs(m) - FMath::Abs(nu)) % 4; /* can be negative! */ - const FComplexe p_H_tmp = ( m >= 0 ? + const FComplex p_H_tmp = ( m >= 0 ? rcc_tmp_transposed[n][getTranspRotationCoefP(n, nu, m)] : rcc_tmp_transposed[n][getTranspRotationCoefP(n, -nu, -m)]) ; @@ -358,7 +358,7 @@ protected: // M2L transfer, there is a maximum of 3 neighbors in each direction, // so 6 in each dimension preM2LTransitions = new RotationM2LTransfer*[Parent::treeHeight]; - memset(preM2LTransitions.getPtr(), 0, sizeof(FComplexe*) * (Parent::treeHeight)); + memset(preM2LTransitions.getPtr(), 0, sizeof(FComplex*) * (Parent::treeHeight)); // We start from the higher level FReal treeWidthAtLevel = Parent::boxWidth; for(int idxLevel = 0 ; idxLevel < Parent::treeHeight ; ++idxLevel ){ @@ -433,11 +433,11 @@ public: /** M2L With rotation */ - void multipoleToLocal(FComplexe*const FRestrict local_exp_target, const FComplexe* const FRestrict multipole_exp_src, + void multipoleToLocal(FComplex*const FRestrict local_exp_target, const FComplex* const FRestrict multipole_exp_src, const RotationM2LTransfer& transfer_M2L_rotation){ - memset(rotation_Info.p_rot_multipole_exp, 0, RotationInfo::ZAxisExpensionSize(Parent::devP) * sizeof(FComplexe)); - memset(rotation_Info.p_rot_local_exp, 0, RotationInfo::ZAxisExpensionSize(Parent::devP) * sizeof(FComplexe)); + memset(rotation_Info.p_rot_multipole_exp, 0, RotationInfo::ZAxisExpensionSize(Parent::devP) * sizeof(FComplex)); + memset(rotation_Info.p_rot_local_exp, 0, RotationInfo::ZAxisExpensionSize(Parent::devP) * sizeof(FComplex)); rotation_Rotate_multipole_expansion_terms(multipole_exp_src, transfer_M2L_rotation.rcc_outer, rotation_Info.p_rot_multipole_exp); @@ -447,16 +447,16 @@ public: } /** Needed when doing the M2L */ - void rotation_Rotate_multipole_expansion_terms(const FComplexe*const FRestrict multipole_exp, - const FComplexe* const FRestrict * const FRestrict rcc_outer, - FComplexe*const FRestrict rot_multipole_exp){ + void rotation_Rotate_multipole_expansion_terms(const FComplex*const FRestrict multipole_exp, + const FComplex* const FRestrict * const FRestrict rcc_outer, + FComplex*const FRestrict rot_multipole_exp){ - FComplexe* p_rot_multipole_exp = rot_multipole_exp; + FComplex* p_rot_multipole_exp = rot_multipole_exp; for(int nu = 0 ; nu <= (Parent::devP/2) ; ++nu){ for(int j = nu; j <= (Parent::devP-nu) ; ++j){ - const FComplexe* p_rcc_outer = &rcc_outer[j][RotationM2LTransfer::getRotationCoefP(j, nu, j)]; - const FComplexe* p_multipole_exp = &multipole_exp[Parent::harmonic.getPreExpRedirJ(j) + j]; + const FComplex* p_rcc_outer = &rcc_outer[j][RotationM2LTransfer::getRotationCoefP(j, nu, j)]; + const FComplex* p_multipole_exp = &multipole_exp[Parent::harmonic.getPreExpRedirJ(j) + j]; FReal minus_1_pow_k = FReal(j&1 ? -1 : 1); for(int k = -j ; k < 0 ; ++k){ /* k < 0 */ @@ -490,17 +490,17 @@ public: } /** Needed when doing the M2L */ - void M2L_z_axis(FComplexe* const FRestrict rot_local_exp, - const FComplexe* const FRestrict rot_multipole_exp, + void M2L_z_axis(FComplex* const FRestrict rot_local_exp, + const FComplex* const FRestrict rot_multipole_exp, const FReal* const outer_array){ - FComplexe* p_rot_local_exp = rot_local_exp; + FComplex* p_rot_local_exp = rot_local_exp; for(int j = 0 ; j <= Parent::devP; ++j){ const FReal* p_outer_array_j = outer_array + j; const int stop_for_n = Parent::devP-j; const int min_j = FMath::Min(j, stop_for_n); for(int k = 0 ; k <= min_j ; ++k){ - const FComplexe* p_rot_multipole_exp = rot_multipole_exp + k * (Parent::devP + 2 - k); + const FComplex* p_rot_multipole_exp = rot_multipole_exp + k * (Parent::devP + 2 - k); for(int n = k ; n <= stop_for_n ; ++n){ p_rot_local_exp->incReal(p_rot_multipole_exp->getReal() * p_outer_array_j[n]); p_rot_local_exp->incImag(p_rot_multipole_exp->getImag() * p_outer_array_j[n]); @@ -512,20 +512,20 @@ public: } /** Needed when doing the M2L */ - void rotation_Rotate_local_expansion_terms(const FComplexe*const rot_local_exp, - const FComplexe*const FRestrict *const FRestrict rcc_inner, - FComplexe*const FRestrict local_exp){ + void rotation_Rotate_local_expansion_terms(const FComplex*const rot_local_exp, + const FComplex*const FRestrict *const FRestrict rcc_inner, + FComplex*const FRestrict local_exp){ const int Q = Parent::devP/2; - FComplexe* FRestrict p_local_exp = local_exp; + FComplex* FRestrict p_local_exp = local_exp; for(int j = 0 ; j <= Q ; ++j){ const int min_j = j; - const FComplexe* const FRestrict p_rot_local_exp_j = &rot_local_exp[Parent::harmonic.getPreExpRedirJ(j) + j]; + const FComplex* const FRestrict p_rot_local_exp_j = &rot_local_exp[Parent::harmonic.getPreExpRedirJ(j) + j]; for (int nu = 0 ; nu <= j; ++nu){ - const FComplexe* FRestrict p_rcc_inner = &rcc_inner[j][RotationM2LTransfer::getRotationCoefP(j, nu, -min_j)]; - const FComplexe* FRestrict p_rot_local_exp = p_rot_local_exp_j; + const FComplex* FRestrict p_rcc_inner = &rcc_inner[j][RotationM2LTransfer::getRotationCoefP(j, nu, -min_j)]; + const FComplex* FRestrict p_rot_local_exp = p_rot_local_exp_j; FReal minus_1_pow_k = FReal(min_j&1 ? -1 : 1); for(int k = -min_j ; k < 0 ; ++k){ /* k < 0 */ @@ -558,15 +558,15 @@ public: } /* for nu */ } /* for j */ - const FComplexe* FRestrict p_rot_local_exp_j = &rot_local_exp[Parent::harmonic.getPreExpRedirJ(Q) + Q]; + const FComplex* FRestrict p_rot_local_exp_j = &rot_local_exp[Parent::harmonic.getPreExpRedirJ(Q) + Q]; for(int j = Q + 1; j <= Parent::devP ; ++j){ p_rot_local_exp_j += Parent::devP - j +1; const int min_j = Parent::devP-j; for(int nu = 0 ; nu <= j; ++nu){ - const FComplexe* FRestrict p_rcc_inner = &rcc_inner[j][RotationM2LTransfer::getRotationCoefP(j, nu, -min_j)]; - const FComplexe* FRestrict p_rot_local_exp = p_rot_local_exp_j; + const FComplex* FRestrict p_rcc_inner = &rcc_inner[j][RotationM2LTransfer::getRotationCoefP(j, nu, -min_j)]; + const FComplex* FRestrict p_rot_local_exp = p_rot_local_exp_j; FReal minus_1_pow_k = FReal(min_j&1 ? -1 : 1); for(int k = -min_j ; k < 0; ++k){ /* k < 0 */ diff --git a/Src/Kernels/Uniform/FUnifCell.hpp b/Src/Kernels/Uniform/FUnifCell.hpp index 8f5d9566842bdd15b2e120a41a9acb97e4ca88f9..a66f34667c8fc9f7cd97c7fb0b19643bc812a2a0 100644 --- a/Src/Kernels/Uniform/FUnifCell.hpp +++ b/Src/Kernels/Uniform/FUnifCell.hpp @@ -24,7 +24,7 @@ #include "../../Components/FBasicCell.hpp" #include "../../Extensions/FExtendCellType.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" /** * @author Pierre Blanchard (pierre.blanchard@inria.fr) @@ -47,17 +47,17 @@ class FUnifCell : public FBasicCell FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion // PB: Store multipole and local expansion in Fourier space - FComplexe transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize]; - FComplexe transformed_local_exp[NLHS * NVALS * TransformedVectorSize]; + FComplex transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize]; + FComplex transformed_local_exp[NLHS * NVALS * TransformedVectorSize]; public: FUnifCell(){ memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize); memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize); memset(transformed_multipole_exp, 0, - sizeof(FComplexe) * NRHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NRHS * NVALS * TransformedVectorSize); memset(transformed_local_exp, 0, - sizeof(FComplexe) * NLHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NLHS * NVALS * TransformedVectorSize); } ~FUnifCell() {} @@ -86,20 +86,20 @@ public: } /** Get Transformed Multipole */ - const FComplexe* getTransformedMultipole(const int inRhs) const{ + const FComplex* getTransformedMultipole(const int inRhs) const{ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; } /** Get Transformed Local */ - const FComplexe* getTransformedLocal(const int inRhs) const{ + const FComplex* getTransformedLocal(const int inRhs) const{ return this->transformed_local_exp + inRhs*TransformedVectorSize; } /** Get Transformed Multipole */ - FComplexe* getTransformedMultipole(const int inRhs){ + FComplex* getTransformedMultipole(const int inRhs){ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; } /** Get Transformed Local */ - FComplexe* getTransformedLocal(const int inRhs){ + FComplex* getTransformedLocal(const int inRhs){ return this->transformed_local_exp + inRhs*TransformedVectorSize; } @@ -113,9 +113,9 @@ public: memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize); memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize); memset(transformed_multipole_exp, 0, - sizeof(FComplexe) * NRHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NRHS * NVALS * TransformedVectorSize); memset(transformed_local_exp, 0, - sizeof(FComplexe) * NLHS * NVALS * TransformedVectorSize); + sizeof(FComplex) * NLHS * NVALS * TransformedVectorSize); } /////////////////////////////////////////////////////// @@ -167,7 +167,7 @@ public: } static constexpr int GetSize(){ - return (NRHS+NLHS)*NVALS*VectorSize * (int) sizeof(FReal) + (NRHS+NLHS)*NVALS*TransformedVectorSize * (int) sizeof(FComplexe); + return (NRHS+NLHS)*NVALS*VectorSize * (int) sizeof(FReal) + (NRHS+NLHS)*NVALS*TransformedVectorSize * (int) sizeof(FComplex); } template diff --git a/Src/Kernels/Uniform/FUnifDenseKernel.hpp b/Src/Kernels/Uniform/FUnifDenseKernel.hpp index 2f1fa753394d1401c5880dfc986eb599773f75e4..95a9e32695592d7ccf6580028159933a1813eac6 100644 --- a/Src/Kernels/Uniform/FUnifDenseKernel.hpp +++ b/Src/Kernels/Uniform/FUnifDenseKernel.hpp @@ -149,7 +149,7 @@ public: FUnifTensor::setRoots(AbstractBaseClass::getCellCenter(TargetCell->getCoordinate(),TreeLevel), CellWidth, X); for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ -// FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxRhs); +// FComplex *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxRhs); for (int idx=0; idx<343; ++idx){ if (SourceCells[idx]){ diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index 9af07ac02b3af04b3d52608215a340518eb5a534..a4893b70bbf2f8b06313791877375fe8f49e1405 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -139,7 +139,7 @@ public: const FReal scale(MatrixKernel->getScaleFactor(CellWidth)); for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxRhs); + FComplex *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxRhs); for (int idx=0; idx<343; ++idx){ if (SourceCells[idx]){ diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 4b52c2f82de5aec196c15528e9fedc484220c6aa..09fb6ca5e95ab2c8ec1f1c2503d4541ac988b0bd 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -27,7 +27,7 @@ #include "../../Utils/FTic.hpp" #include "../../Utils/FDft.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "./FUnifTensor.hpp" @@ -35,7 +35,7 @@ /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/ template -static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplexe* &FC) +static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex* &FC) { // allocate memory and store compressed M2L operators if (FC) throw std::runtime_error("M2L operators are already set"); @@ -54,12 +54,12 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel // allocate memory and compute 316 m2l operators FReal *_C; - FComplexe *_FC; + FComplex *_FC; // reduce storage from nnodes^2=order^6 to (2order-1)^3 const unsigned int rc = (2*order-1)*(2*order-1)*(2*order-1); _C = new FReal [rc]; - _FC = new FComplexe [rc * ninteractions]; + _FC = new FComplex [rc * ninteractions]; // initialize root node ids pairs unsigned int node_ids_pairs[rc][2]; @@ -121,7 +121,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel // reduce storage if real valued kernel const unsigned int opt_rc = rc/2+1; // allocate M2L - FC = new FComplexe[343 * opt_rc]; + FC = new FComplex[343 * opt_rc]; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) @@ -171,7 +171,7 @@ class FUnifM2LHandler rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)}; /// M2L Operators (stored in Fourier space) - FSmartPointer< FComplexe,FSmartArrayMemory> FC; + FSmartPointer< FComplex,FSmartArrayMemory> FC; /// Utils typedef FUnifTensor TensorType; @@ -240,12 +240,12 @@ public: if (FC) throw std::runtime_error("M2L operator already set"); // Compute matrix of interactions const FReal ReferenceCellWidth = FReal(2.); - FComplexe* pFC = NULL; + FComplex* pFC = NULL; Compute(MatrixKernel,ReferenceCellWidth,pFC); FC.assign(pFC); // Compute memory usage - unsigned long sizeM2L = 343*opt_rc*sizeof(FComplexe); + unsigned long sizeM2L = 343*opt_rc*sizeof(FComplex); // write info @@ -261,7 +261,7 @@ public: * @param[in] X transformed local expansion of size \f$r\f$ * @param[out] x local expansion of size \f$\ell^3\f$ */ - void unapplyZeroPaddingAndDFT(const FComplexe *const FX, FReal *const x) const + void unapplyZeroPaddingAndDFT(const FComplex *const FX, FReal *const x) const { FReal Px[rc]; FBlas::setzero(rc,Px); @@ -288,11 +288,11 @@ public: * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$. */ void applyFC(const unsigned int idx, const unsigned int, const FReal scale, - const FComplexe *const FY, FComplexe *const FX) const + const FComplex *const FY, FComplex *const FX) const { // Perform entrywise product manually for (unsigned int j=0; j rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)}; /// M2L Operators (stored in Fourier space for each level) - FSmartPointer< FComplexe*,FSmartArrayMemory> FC; + FSmartPointer< FComplex*,FSmartArrayMemory> FC; /// Homogeneity specific variables const unsigned int TreeHeight; const FReal RootCellWidth; @@ -375,7 +375,7 @@ public: TensorType::setNodeIdsDiff(node_diff); // init M2L operators - FC = new FComplexe*[TreeHeight]; + FC = new FComplex*[TreeHeight]; FC[0] = NULL; FC[1] = NULL; for (unsigned int l=2; lgetScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); - FComplexe tmpTLoc; + FComplex tmpTLoc; for (unsigned int pidx=0; pidx<343; ++pidx) { const unsigned int count = countExp[pidx]; if (count) { - FComplexe *const tmpK=const_cast(SymM2LHandler->getK(TreeLevel, pidx)); + FComplex *const tmpK=const_cast(SymM2LHandler->getK(TreeLevel, pidx)); for (unsigned int i=0; i static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, - FComplexe* FC[343]) + FComplex* FC[343]) { // std::cout << "\nComputing 16 far-field interactions (l=" << ORDER << ", eps=" << Epsilon // << ") for cells of width w = " << CellWidth << std::endl; @@ -52,12 +52,12 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal FUnifTensor::setRoots(FPoint(0.,0.,0.), CellWidth, X); // temporary matrices FReal *_C; - FComplexe *_FC; + FComplex *_FC; // reduce storage from nnodes^2=order^6 to (2order-1)^3 const unsigned int rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); _C = new FReal [rc]; - _FC = new FComplexe [rc]; // TODO: do it in the non-sym version!!! + _FC = new FComplex [rc]; // TODO: do it in the non-sym version!!! // init Discrete Fourier Transformator const int dimfft = 1; // unidim FFT since fully circulant embedding @@ -133,7 +133,7 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal { // allocate assert(FC[idx]==NULL); - FC[idx] = new FComplexe[opt_rc]; + FC[idx] = new FComplex[opt_rc]; FBlas::c_copy(opt_rc, reinterpret_cast(_FC), reinterpret_cast(FC[idx])); } @@ -148,7 +148,7 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal std::cout << "The approximation of the " << counter << " far-field interactions (sizeM2L= " - << counter*opt_rc*sizeof(FComplexe) << " B" + << counter*opt_rc*sizeof(FComplex) << " B" << ") took " << overall_time << "s\n" << std::endl; // Free _C & _FC @@ -182,7 +182,7 @@ class FUnifSymM2LHandler static const unsigned int nnodes = ORDER*ORDER*ORDER; // M2L operators - FComplexe* K[343]; + FComplex* K[343]; public: @@ -226,7 +226,7 @@ public: /*! return the t-th approximated far-field interactions*/ - const FComplexe *const getK(const unsigned int, const unsigned int t) const + const FComplex *const getK(const unsigned int, const unsigned int t) const { return K[t]; } }; @@ -246,7 +246,7 @@ class FUnifSymM2LHandler const unsigned int TreeHeight; // M2L operators for all levels in the octree - FComplexe*** K; + FComplex*** K; public: @@ -262,10 +262,10 @@ public: : TreeHeight(inTreeHeight) { // init all 343 item to zero, because effectively only 16 exist - K = new FComplexe** [TreeHeight]; + K = new FComplex** [TreeHeight]; K[0] = NULL; K[1] = NULL; for (unsigned int l=2; lgetTransformedLocal(idxLoc); + FComplex *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc); // update idxRhs const int idxRhs = idxLhs % nPV; diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index 9e8e4bdd4678c82b21bb893b915265c5a8fd7ec2..fa59c216c9df9c9b88e7ac6af3fbfd094b2c18dd 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -27,7 +27,7 @@ #include "../../Utils/FTic.hpp" #include "../../Utils/FDft.hpp" -#include "../../Utils/FComplexe.hpp" +#include "../../Utils/FComplex.hpp" #include "./FUnifTensor.hpp" @@ -39,7 +39,7 @@ template static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, const FReal CellWidthExtension, - FComplexe** &FC) + FComplex** &FC) { // dimensions of operators const unsigned int order = ORDER; @@ -62,16 +62,16 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, // allocate memory and compute 316 m2l operators FReal** _C; - FComplexe** _FC; + FComplex** _FC; // reduce storage from nnodes^2=order^6 to (2order-1)^3 const unsigned int rc = (2*order-1)*(2*order-1)*(2*order-1); _C = new FReal* [ncmp]; - _FC = new FComplexe* [ncmp]; + _FC = new FComplex* [ncmp]; for (unsigned int d=0; d ncmp = MatrixKernelClass::NCMP}; /// M2L Operators (stored in Fourier space for each component of tensor) - FSmartPointer< FComplexe*,FSmartArrayMemory> FC; + FSmartPointer< FComplex*,FSmartArrayMemory> FC; const FReal CellWidthExtension; //(MatrixKernel,ReferenceCellWidth, 0., FC); // Compute memory usage - unsigned long sizeM2L = 343*ncmp*opt_rc*sizeof(FComplexe); + unsigned long sizeM2L = 343*ncmp*opt_rc*sizeof(FComplex); // write info std::cout << "Compute and Set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" Bytes) in " @@ -295,7 +295,7 @@ public: * @param[in] X transformed local expansion of size \f$r\f$ * @param[out] x local expansion of size \f$\ell^3\f$ */ - void unapplyZeroPaddingAndDFT(const FComplexe *const FX, FReal *const x) const + void unapplyZeroPaddingAndDFT(const FComplex *const FX, FReal *const x) const { FReal Px[rc]; FBlas::setzero(rc,Px); @@ -323,11 +323,11 @@ public: */ void applyFC(const unsigned int idx, const unsigned int , const FReal scale, const unsigned int d, - const FComplexe *const FY, FComplexe *const FX) const + const FComplex *const FY, FComplex *const FX) const { // Perform entrywise product manually for (unsigned int j=0; j(tmpFX)); // FMkl::c_had(rc,reinterpret_cast(FC + idx*rc), // reinterpret_cast(FY), @@ -354,7 +354,7 @@ public: * @param[in] y multipole expansion of size \f$\ell^3\f$ * @param[out] Y transformed multipole expansion of size \f$r\f$ */ - void applyZeroPaddingAndDFT(FReal *const y, FComplexe *const FY) const + void applyZeroPaddingAndDFT(FReal *const y, FComplex *const FY) const { FReal Py[rc]; FBlas::setzero(rc,Py); @@ -384,7 +384,7 @@ class FUnifTensorialM2LHandler /// M2L Operators (stored in Fourier space for each component and each level) - FSmartPointer< FComplexe**,FSmartArrayMemory> FC; + FSmartPointer< FComplex**,FSmartArrayMemory> FC; /// Homogeneity specific variables const unsigned int TreeHeight; @@ -424,9 +424,9 @@ public: Dft.buildDFT(steps); // allocate FC - FC = new FComplexe**[TreeHeight]; + FC = new FComplex**[TreeHeight]; for (unsigned int l=0; l(tmpFX)); // FMkl::c_had(rc,reinterpret_cast(FC + idx*rc), // reinterpret_cast(FY), @@ -555,7 +555,7 @@ public: * @param[in] y multipole expansion of size \f$\ell^3\f$ * @param[out] Y transformed multipole expansion of size \f$r\f$ */ - void applyZeroPaddingAndDFT(FReal *const y, FComplexe *const FY) const + void applyZeroPaddingAndDFT(FReal *const y, FComplex *const FY) const { FReal Py[rc]; FBlas::setzero(rc,Py); diff --git a/Src/Utils/FComplexe.hpp b/Src/Utils/FComplex.hpp similarity index 86% rename from Src/Utils/FComplexe.hpp rename to Src/Utils/FComplex.hpp index 246f2badef61a481fe77823e245757b5a914091c..c8a89fa8cd2429b1bcca0b2a66f08f24f15bad68 100755 --- a/Src/Utils/FComplexe.hpp +++ b/Src/Utils/FComplex.hpp @@ -28,12 +28,12 @@ * It can be passed to blas fonction and has to be * 2 x complex[0] size only. */ -class FComplexe { +class FComplex { FReal complex[2]; //< Real & Imaginary public: /** Default Constructor (set complex[0]&imaginary to 0) */ - FComplexe() { + FComplex() { complex[0] = 0; complex[1] = 0; } @@ -42,32 +42,32 @@ public: * @param inImag the imaginary * @param inReal the complex[0] */ - explicit FComplexe(const FReal inReal, const FReal inImag) { + explicit FComplex(const FReal inReal, const FReal inImag) { complex[0] = inReal; complex[1] = inImag; } /** Copy constructor */ - FComplexe(const FComplexe& other){ + FComplex(const FComplex& other){ complex[0] = other.complex[0]; complex[1] = other.complex[1]; } /** Copy operator */ - FComplexe& operator=(const FComplexe& other){ + FComplex& operator=(const FComplex& other){ this->complex[0] = other.complex[0]; this->complex[1] = other.complex[1]; return *this; } /** Equality operator */ - bool operator==(const FComplexe& other){ + bool operator==(const FComplex& other){ return FMath::LookEqual(this->complex[1],other.complex[1]) && FMath::LookEqual(this->complex[0],other.complex[0]); } /** Different equal */ - bool operator!=(const FComplexe& other){ + bool operator!=(const FComplex& other){ return !(*this == other); } @@ -98,13 +98,13 @@ public: } /** Return the conjugate */ - FComplexe conjugate() const{ - return FComplexe(complex[0],-complex[1]); + FComplex conjugate() const{ + return FComplex(complex[0],-complex[1]); } /** Return the conjugate */ - FComplexe negate() const{ - return FComplexe(-complex[0],-complex[1]); + FComplex negate() const{ + return FComplex(-complex[0],-complex[1]); } /** * return the square of the modulus of the complex number @@ -123,7 +123,7 @@ public: * in complex[0] with other complex[0], same for complex[1] * @param other the complex to use data */ - FComplexe& operator+=(const FComplexe& other){ + FComplex& operator+=(const FComplex& other){ this->complex[0] += other.complex[0]; this->complex[1] += other.complex[1]; return *this; @@ -175,7 +175,7 @@ public: } /** Mul a complexe by another "c*=c2" */ - FComplexe& operator*=(const FComplexe& other){ + FComplex& operator*=(const FComplex& other){ const FReal tempReal = this->complex[0]; this->complex[0] = (tempReal * other.complex[0]) - (this->complex[1] * other.complex[1]); this->complex[1] = (tempReal * other.complex[1]) + (this->complex[1] * other.complex[0]); @@ -183,14 +183,14 @@ public: } /** Mul a complexe by another "c*=c2" */ - FComplexe& operator*=(const FReal& real){ + FComplex& operator*=(const FReal& real){ this->complex[0] *= real; this->complex[1] *= real; return *this; } /** Mul a complexe by another "c*=c2" */ - FComplexe& operator/=(const FReal& real){ + FComplex& operator/=(const FReal& real){ this->complex[0] /= real; this->complex[1] /= real; return *this; @@ -202,35 +202,35 @@ public: } /** Mul other and another and add the result to current complexe */ - void addMul(const FComplexe& other, const FComplexe& another){ + void addMul(const FComplex& other, const FComplex& another){ this->complex[0] += (other.complex[0] * another.complex[0]) - (other.complex[1] * another.complex[1]); this->complex[1] += (other.complex[0] * another.complex[1]) + (other.complex[1] * another.complex[0]); } /** Mul other and another and add the result to current complexe */ - void equalMul(const FComplexe& other, const FComplexe& another){ + void equalMul(const FComplex& other, const FComplex& another){ this->complex[0] = (other.complex[0] * another.complex[0]) - (other.complex[1] * another.complex[1]); this->complex[1] = (other.complex[0] * another.complex[1]) + (other.complex[1] * another.complex[0]); } /** To cast to FReal */ - static FReal* ToFReal(FComplexe*const array){ + static FReal* ToFReal(FComplex*const array){ return reinterpret_cast(array); } /** To cast to FReal */ - static const FReal* ToFReal(const FComplexe*const array){ + static const FReal* ToFReal(const FComplex*const array){ return reinterpret_cast(array); } /** - * Operator stream FComplexe to std::ostream + * Operator stream FComplex to std::ostream * This can be used to simpldata[1] write out a complex with format (Re,Im) * @param[in,out] output where to write the position * @param[in] inPosition the position to write out * @return the output for multiple << operators */ template - friend StreamClass& operator<<(StreamClass& output, const FComplexe& inC){ + friend StreamClass& operator<<(StreamClass& output, const FComplex& inC){ output << "(" << inC.getReal() << ", " << inC.getImag() << ")"; return output; // for multiple << operators. } @@ -238,8 +238,8 @@ public: }; /** Global operator Mul a complexe by another "c=c1*c2" */ -inline FComplexe operator*=(const FComplexe& first, const FComplexe& second){ - return FComplexe( +inline FComplex operator*=(const FComplex& first, const FComplex& second){ + return FComplex( (first.getReal() * second.getImag()) + (first.getImag() * second.getReal()), (first.getReal() * second.getReal()) - (first.getImag() * second.getImag()) ); diff --git a/Src/Utils/FDft.hpp b/Src/Utils/FDft.hpp index 38227a5ffb8c005413947b8d7039087e891250a4..887d1c0258e5d4c9e36be87f9af1f452893a6d12 100644 --- a/Src/Utils/FDft.hpp +++ b/Src/Utils/FDft.hpp @@ -27,7 +27,7 @@ #include #include "FGlobal.hpp" -#include "FComplexe.hpp" +#include "FComplex.hpp" #include "FMath.hpp" @@ -45,7 +45,7 @@ * Fourier Transform (FFT). The FFT algorithm can either be provided by the * FFTW(3) library itself or a version that is wrapped in Intel MKL. * - * The direct DFT is templatized with the input value type (FReal or FComplexe), + * The direct DFT is templatized with the input value type (FReal or FComplex), * while 2 distinct classes resp. @class FFft and @class FCFft are defined * resp. for the real and complex valued FFT. * @@ -68,7 +68,7 @@ class FDft { ValueType* data_; //< data in physical space - FComplexe* dataF_; //< data in Fourier space + FComplex* dataF_; //< data in Fourier space FReal *cosRS_, *sinRS_; @@ -79,7 +79,7 @@ private: { // allocate arrays data_ = new ValueType[nsteps_]; - dataF_ = new FComplexe[nsteps_]; + dataF_ = new FComplex[nsteps_]; // Beware this is extremely HEAVY to store!!! => Use FDft only for debug! cosRS_ = new FReal[nsteps_*nsteps_]; @@ -120,7 +120,7 @@ public: /// Forward DFT // Real valued DFT void applyDFT(const FReal* sampledData, - FComplexe* transformedData) const + FComplex* transformedData) const { // read sampled data FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); @@ -129,7 +129,7 @@ public: // perform direct forward transformation for(unsigned int r=0; r(transformedData)); } // Complexe valued DFT - void applyDFT(const FComplexe* sampledData, - FComplexe* transformedData) const + void applyDFT(const FComplex* sampledData, + FComplex* transformedData) const { // read sampled data FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); @@ -149,7 +149,7 @@ public: // perform direct forward transformation for(unsigned int r=0; r(data_)); @@ -195,7 +195,7 @@ public: // perform direct backward transformation for(unsigned int r=0; r(dataF_)); FBlas::copy(nsteps_, sampledData,data_); @@ -320,7 +320,7 @@ public: } - void applyIDFT(const FComplexe* transformedData, + void applyIDFT(const FComplex* transformedData, FReal* sampledData) const { // read transformed data @@ -354,8 +354,8 @@ class FCFft enum{dim = DIM}; // arrays - FComplexe* data_; //< data in physical space - FComplexe* dataF_; //< data in Fourier space + FComplex* data_; //< data in physical space + FComplex* dataF_; //< data in Fourier space // plans fftw_plan plan_b_; // backward FFT plan @@ -376,8 +376,8 @@ public: } // allocate arrays - data_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); - dataF_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); + data_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_); + dataF_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_); // multidim fftw plans plan_b_ = @@ -403,8 +403,8 @@ public: fftw_free(dataF_); } - void applyDFT(const FComplexe* sampledData, - FComplexe* transformedData) const + void applyDFT(const FComplex* sampledData, + FComplex* transformedData) const { FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); FBlas::c_copy(nsteps_, reinterpret_cast(sampledData), @@ -419,8 +419,8 @@ public: } - void applyIDFT(const FComplexe* transformedData, - FComplexe* sampledData) const + void applyIDFT(const FComplex* transformedData, + FComplex* sampledData) const { // read transformed data FBlas::c_setzero(nsteps_,reinterpret_cast(data_)); diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index 95b718d32d4c7b68feb9c9fdf5e87dbbb7ce566d..930c339d6a92fc5a48eb36e999757f16e43773f2 100755 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -24,7 +24,7 @@ #include "FMath.hpp" //Need that for converting datas -#include "FComplexe.hpp" +#include "FComplex.hpp" ///////////////////////////////////////////////////////////////////////////////////////// @@ -287,7 +287,7 @@ public: return MPI_CHAR; } - static MPI_Datatype GetType(const FComplexe& a){ + static MPI_Datatype GetType(const FComplex& a){ MPI_Datatype FMpiComplexe; MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe); return FMpiComplexe; diff --git a/Tests/Utils/testFFTW.cpp b/Tests/Utils/testFFTW.cpp index bb83b02cb91cd52ce291c722696d2bb80bf4fd99..b91e020b4be283f6d1f775390389fa9015ac35e9 100644 --- a/Tests/Utils/testFFTW.cpp +++ b/Tests/Utils/testFFTW.cpp @@ -24,7 +24,7 @@ #include #include "../../Src/Utils/FGlobal.hpp" -#include "../../Src/Utils/FComplexe.hpp" +#include "../../Src/Utils/FComplex.hpp" #include "../../Src/Utils/FTic.hpp" @@ -46,10 +46,10 @@ int main() // fftw arrays FReal* fftR_; - FComplexe* fftC_; + FComplex* fftC_; fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); - fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); + fftC_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_); // fftw plans // use routine defined in file: @@ -105,7 +105,7 @@ int main() // std::cout< #include "../../Src/Utils/FGlobal.hpp" -#include "../../Src/Utils/FComplexe.hpp" +#include "../../Src/Utils/FComplex.hpp" #include "../../Src/Utils/FTic.hpp" @@ -54,10 +54,10 @@ int main() std::cout<< "Test "<< dim <<"D FFT."<(FPLocalExp)); FBlas::setzero(rc,PLocalExp); @@ -358,8 +358,8 @@ int main(int, char **){ // std::cout<(FT)); // if first COLUMN (T) of C is used @@ -383,7 +383,7 @@ int main(int, char **){ // Set transformed MultExp to 0 - // for (unsigned int n=0; n(FPMultExp)); // Transform PaddedMultExp diff --git a/Tests/Utils/testUnifTensorialInterpolator.cpp b/Tests/Utils/testUnifTensorialInterpolator.cpp index 294085d8e3f75bc47ff20e075638e714106dfbbd..6875e605e083ec02bd606ebf72500143c604cbfc 100755 --- a/Tests/Utils/testUnifTensorialInterpolator.cpp +++ b/Tests/Utils/testUnifTensorialInterpolator.cpp @@ -41,7 +41,7 @@ // Check DFT #include "Utils/FDft.hpp" -#include "Utils/FComplexe.hpp" +#include "Utils/FComplex.hpp" #include "Kernels/P2P/FP2PParticleContainer.hpp" @@ -389,8 +389,8 @@ int main(int, char **){ // std::cout<(FT)); // if first COLUMN (T) of C is used @@ -399,11 +399,11 @@ int main(int, char **){ // // if first ROW of C is used // Dft.applyDFT(C,FT); - FComplexe FPMultExp[nrhs*rc]; - FComplexe FPLocalExp[nlhs*rc]; + FComplex FPMultExp[nrhs*rc]; + FComplex FPLocalExp[nlhs*rc]; FReal PLocalExp[nlhs*rc]; - //for (unsigned int n=0; n(FPLocalExp)); FBlas::setzero(nlhs*rc,PLocalExp); @@ -425,7 +425,7 @@ int main(int, char **){ // Set transformed MultExp to 0 - // for (unsigned int n=0; n(FPMultExp)); // Transform PaddedMultExp @@ -442,7 +442,7 @@ int main(int, char **){ // reinterpret_cast(FPMultExp), // reinterpret_cast(FPLocalExp)); // > or perform entrywise product manually - FComplexe tmpFX; + FComplex tmpFX; for (unsigned int idxLhs=0; idxLhs { MortonIndex ref = -1; int numMort = 0; if(app.global().processId()==0){ - for(int i=0 ; i<5000 ; ++i){ + for(int i=0 ; i { MPI_Exscan(&outputSize,&starter,1,MPI_LONG_LONG_INT,MPI_SUM,app.global().getComm()); - //We sort the output array relatvely to position inside leafs + //We sort the output array relatvely to line number in origin file FSize inc = 0; FMpiTreeBuilder::IndexedParticle * saveForSort = outputArray; int nbOfPartsInLeaf = 0; @@ -224,24 +224,28 @@ class TestMpiTreeBuilder : public FUTesterMpi< class TestMpiTreeBuilder> { } bool resultQsMpi = true; //passed.. + //Test + + int * ownerOfEachParts = new int[outputSize]; + for(int idP = 0) + //Test // printf("Fin tri : %d hold %d my first [%lld,%lld] last [%lld,%lld]\n",app.global().processId(),outputSize, // outputArray[0].index,outputArray[0].particle.indexInFile, // outputArray[outputSize-1].index,outputArray[outputSize-1].particle.indexInFile); - int tot = 0; - MPI_Reduce((int*)&outputSize,&tot,1,MPI_INT,MPI_SUM,0,app.global().getComm()); - if(app.global().processId() == 0){ - printf("Total Particule après tri = %d \n",tot); - } - for(int i=0 ; i { } Print("Test 2 : Output of merging leaves is tested"); uassert(resultMergeLeaves); + //Test the Equalize and Fill tree FLeafBalance balancer; FVector finalParticles; @@ -300,8 +305,9 @@ class TestMpiTreeBuilder : public FUTesterMpi< class TestMpiTreeBuilder> { } Print("Test 3 : Output of Equalize Tree is tested"); - uassert(resultEqualize); MPI_Barrier(MPI_COMM_WORLD); + uassert(resultEqualize); + delete [] originalArray; delete [] arrayOfParticles; delete [] arrayToBeSorted; diff --git a/UTests/utestRotation.cpp b/UTests/utestRotation.cpp index 488f4cd268f8d8ba52c6460cd0d4752284ab9136..019695ab5bf1fff063b83ccdb7b3076c5d879f61 100755 --- a/UTests/utestRotation.cpp +++ b/UTests/utestRotation.cpp @@ -4,17 +4,18 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #include "../Src/Utils/FGlobal.hpp" +#include "../Src/Utils/FTic.hpp" #include "../Src/Containers/FOctree.hpp" #include "../Src/Containers/FVector.hpp" @@ -38,7 +39,7 @@ class TestRotationDirect : public FUTester { /** The test method to factorize all the test based on different kernels */ template + class OctreeClass, class FmmClass, int ORDER> void RunTest(){ // // Load particles @@ -54,16 +55,16 @@ class TestRotationDirect : public FUTester { std::string filename(SCALFMMDataPath+parFile); // FFmaGenericLoader loader(filename); - if(!loader.isOpen()){ - Print("Cannot open particles file."); - uassert(false); - return; - } + if(!loader.isOpen()){ + Print("Cannot open particles file."); + uassert(false); + return; + } Print("Number of particles:"); - Print(loader.getNumberOfParticles()); + Print(loader.getNumberOfParticles()); - const int NbLevels = 4; - const int SizeSubLevels = 2; + const int NbLevels = 4; + const int SizeSubLevels = 2; // FSize nbParticles = loader.getNumberOfParticles() ; FmaRWParticle<8,8>* const particles = new FmaRWParticle<8,8>[nbParticles]; @@ -79,13 +80,15 @@ class TestRotationDirect : public FUTester { } - // Run FMM - Print("Fmm..."); - //KernelClass kernels(NbLevels,loader.getBoxWidth()); - KernelClass kernels(NbLevels,loader.getBoxWidth(), loader.getCenterOfBox()); - FmmClass algo(&tree,&kernels); - algo.execute(); - + // Run FMM + Print("Fmm..."); + //KernelClass kernels(NbLevels,loader.getBoxWidth()); + KernelClass* kernels = new KernelClass(NbLevels,loader.getBoxWidth(), loader.getCenterOfBox()); + FmmClass algo(&tree,kernels); + FTic timer; + algo.execute(); + timer.tac(); + std::cout << "Computation Time : " << ORDER <<" ; "<< timer.elapsed() << std::endl; // FReal energy= 0.0 , energyD = 0.0 ; ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -122,7 +125,7 @@ class TestRotationDirect : public FUTester { } }); } - + delete kernels; delete[] particles; // Print for information @@ -179,32 +182,33 @@ class TestRotationDirect : public FUTester { /** If memstas is running print the memory used */ void PostTest() { - if( FMemStats::controler.isUsed() ){ - std::cout << "Memory used at the end " << FMemStats::controler.getCurrentAllocated() << " Bytes (" << FMemStats::controler.getCurrentAllocatedMB() << "MB)\n"; - std::cout << "Max memory used " << FMemStats::controler.getMaxAllocated() << " Bytes (" << FMemStats::controler.getMaxAllocatedMB() << "MB)\n"; - std::cout << "Total memory used " << FMemStats::controler.getTotalAllocated() << " Bytes (" << FMemStats::controler.getTotalAllocatedMB() << "MB)\n"; - } + if( FMemStats::controler.isUsed() ){ + std::cout << "Memory used at the end " << FMemStats::controler.getCurrentAllocated() << " Bytes (" << FMemStats::controler.getCurrentAllocatedMB() << "MB)\n"; + std::cout << "Max memory used " << FMemStats::controler.getMaxAllocated() << " Bytes (" << FMemStats::controler.getMaxAllocatedMB() << "MB)\n"; + std::cout << "Total memory used " << FMemStats::controler.getTotalAllocated() << " Bytes (" << FMemStats::controler.getTotalAllocatedMB() << "MB)\n"; + } } /////////////////////////////////////////////////////////// // The tests! /////////////////////////////////////////////////////////// - static const int P = 9; + // static const int P = 9; /** Rotation */ + template void TestRotation(){ - typedef FRotationCell

CellClass; - typedef FP2PParticleContainerIndexed<> ContainerClass; + typedef FRotationCell

CellClass; + typedef FP2PParticleContainerIndexed<> ContainerClass; - typedef FRotationKernel KernelClass; + typedef FRotationKernel KernelClass; - typedef FSimpleLeaf LeafClass; - typedef FOctree< CellClass, ContainerClass , LeafClass > OctreeClass; + typedef FSimpleLeaf LeafClass; + typedef FOctree< CellClass, ContainerClass , LeafClass > OctreeClass; - typedef FFmmAlgorithm FmmClass; + typedef FFmmAlgorithm FmmClass; - RunTest(); + RunTest(); } /////////////////////////////////////////////////////////// @@ -213,13 +217,32 @@ class TestRotationDirect : public FUTester { /** set test */ void SetTests(){ - AddTest(&TestRotationDirect::TestRotation,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<4>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<6>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<8>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<10>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<12>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<14>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<16>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<18>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<20>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<22>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<24>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<26>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<28>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<32>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<35>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<40>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<45>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<50>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<55>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<60>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<65>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<70>,"Test Rotation Kernel"); + AddTest(&TestRotationDirect::TestRotation<75>,"Test Rotation Kernel"); } }; // You must do this TestClass(TestRotationDirect) - - - diff --git a/UTests/utestSphericalDirect.cpp b/UTests/utestSphericalDirect.cpp index 33c9be9a5bbb40d41a73d0e7e5bd857dacfd4d88..de539c1de09244bce6e2276c15a9e32002b54e04 100755 --- a/UTests/utestSphericalDirect.cpp +++ b/UTests/utestSphericalDirect.cpp @@ -4,17 +4,19 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== + #include "Utils/FGlobal.hpp" +#include "Utils/FTic.hpp" #include "Containers/FOctree.hpp" #include "Containers/FVector.hpp" @@ -44,10 +46,11 @@ class TestSphericalDirect : public FUTester { /** The test method to factorize all the test based on different kernels */ template < class CellClass, class ContainerClass, class KernelClass, class LeafClass, - class OctreeClass, class FmmClass> + class OctreeClass, class FmmClass, int ORDER> void RunTest( const bool isBlasKernel){ // - const int DevP = 9; + const int DevP = ORDER; + printf("---------------------------------------- \n ------------------ %d ------------------ \n ---------------------------------------- ",ORDER); // // Load particles // @@ -95,7 +98,10 @@ class TestSphericalDirect : public FUTester { //KernelClass kernels(NbLevels,loader.getBoxWidth()); KernelClass kernels(DevP,NbLevels,loader.getBoxWidth(), loader.getCenterOfBox()); FmmClass algo(&tree,&kernels); + FTic timer; algo.execute(); + timer.tac(); + std::cout << "Computation Time : " << ORDER <<" ; "<< timer.elapsed() << std::endl; // FReal energy= 0.0 , energyD = 0.0 ; ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -211,8 +217,37 @@ class TestSphericalDirect : public FUTester { typedef FFmmAlgorithm FmmClass; + printf("Spherical\n \n"); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 4>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 6>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 8>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 10>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 12>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 14>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 16>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 18>(false); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 20>(true); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 22>(true); + // RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + // OctreeClass, FmmClass, 24>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 50>(true); RunTest< CellClass, ContainerClass, KernelClass, LeafClass, - OctreeClass, FmmClass>(false); + OctreeClass, FmmClass, 55>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 60>(true); + + } @@ -230,8 +265,29 @@ class TestSphericalDirect : public FUTester { typedef FFmmAlgorithm FmmClass; + printf("Spherical BLAS\n \n"); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 4>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 6>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 8>(true); RunTest< CellClass, ContainerClass, KernelClass, LeafClass, - OctreeClass, FmmClass>(true); + OctreeClass, FmmClass, 10>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 12>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 14>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 16>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 18>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 20>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 22>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 24>(true); } /** Block blas */ @@ -246,21 +302,53 @@ class TestSphericalDirect : public FUTester { typedef FFmmAlgorithm FmmClass; + + printf("Spherical BLOCK BLAS\n \n"); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 4>(true); RunTest< CellClass, ContainerClass, KernelClass, LeafClass, - OctreeClass, FmmClass>(true); + OctreeClass, FmmClass, 6>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 8>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 10>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 12>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 14>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 16>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 18>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 20>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 22>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 24>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 26>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 28>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 30>(true); + RunTest< CellClass, ContainerClass, KernelClass, LeafClass, + OctreeClass, FmmClass, 32>(true); + } #endif + /////////////////////////////////////////////////////////// // Set the tests! /////////////////////////////////////////////////////////// /** set test */ void SetTests(){ - AddTest(&TestSphericalDirect::TestSpherical,"Test Spherical Kernel"); + //AddTest(&TestSphericalDirect::TestSpherical,"Test Spherical Kernel"); #ifdef ScalFMM_USE_BLAS - AddTest(&TestSphericalDirect::TestSphericalBlas,"Test Spherical Blas Kernel"); - AddTest(&TestSphericalDirect::TestSphericalBlockBlas,"Test Spherical Block Blas Kernel"); + //AddTest(&TestSphericalDirect::TestSphericalBlas,"Test Spherical Blas Kernel"); + AddTest(&TestSphericalDirect::TestSphericalBlockBlas,"Test Spherical Block Blas Kernel"); #endif } }; @@ -268,6 +356,3 @@ class TestSphericalDirect : public FUTester { // You must do this TestClass(TestSphericalDirect) - - -