diff --git a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp index 3996c0de101b814c79dbea2a99ecf1f7da9cb42a..c651b5096b297b71592cc96c3f261ce923b74219 100644 --- a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp +++ b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp @@ -6,9 +6,9 @@ #include "FGroupAttachedLeaf.hpp" -template<int NRHS = 1, int NLHS = 1> -class FP2PGroupParticleContainer : public FGroupAttachedLeaf<NRHS+4*NLHS, FReal> { - typedef FGroupAttachedLeaf<NRHS+4*NLHS, FReal> Parent; +template<int NRHS = 1, int NLHS = 1, int NVALS = 1> +class FP2PGroupParticleContainer : public FGroupAttachedLeaf<NVALS*(NRHS+4*NLHS), FReal> { + typedef FGroupAttachedLeaf<NVALS*(NRHS+4*NLHS), FReal> Parent; public: FP2PGroupParticleContainer(){} @@ -18,45 +18,101 @@ public: } - FReal* getPhysicalValues(const int idxRhs = 0){ - return Parent::getAttribute(0+idxRhs); + + FReal* getPhysicalValues(const int idxVals = 0, const int idxRhs = 0){ + return Parent::getAttribute((0+idxRhs)*NVALS+idxVals); + } + + const FReal* getPhysicalValues(const int idxVals = 0, const int idxRhs = 0) const { + return Parent::getAttribute((0+idxRhs)*NVALS+idxVals); + } + + FReal* getPhysicalValuesArray(const int idxVals = 0, const int idxRhs = 0){ + return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData(); + } + + const FReal* getPhysicalValuesArray(const int idxVals = 0, const int idxRhs = 0) const { + return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData(); + } + + int getLeadingDimension(){ + return Parent::getLeadingRawData(); + } + + FReal* getPotentials(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getAttribute((NRHS+idxLhs)*NVALS+idxVals); + } + + const FReal* getPotentials(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getAttribute((NRHS+idxLhs)*NVALS+idxVals); + } + + FReal* getPotentialsArray(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getRawData() + ((NRHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); + } + + const FReal* getPotentialsArray(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getRawData() + ((NRHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); + } + + FReal* getForcesX(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getAttribute((NRHS+NLHS+idxLhs)*NVALS+idxVals); + } + + const FReal* getForcesX(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getAttribute((NRHS+NLHS+idxLhs)*NVALS+idxVals); } - const FReal* getPhysicalValues(const int idxRhs = 0) const { - return Parent::getAttribute(0+idxRhs); + FReal* getForcesXArray(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getRawData() + ((NRHS+NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } - FReal* getPotentials(const int idxLhs = 0){ - return Parent::getAttribute(NRHS+idxLhs); + const FReal* getForcesXArray(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getRawData() + ((NRHS+NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } - const FReal* getPotentials(const int idxLhs = 0) const { - return Parent::getAttribute(NRHS+idxLhs); + FReal* getForcesY(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getAttribute((NRHS+2*NLHS+idxLhs)*NVALS+idxVals); } - FReal* getForcesX(const int idxLhs = 0){ - return Parent::getAttribute(NRHS+NLHS+idxLhs); + const FReal* getForcesY(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getAttribute((NRHS+2*NLHS+idxLhs)*NVALS+idxVals); } - const FReal* getForcesX(const int idxLhs = 0) const { - return Parent::getAttribute(NRHS+NLHS+idxLhs); + FReal* getForcesYArray(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getRawData() + ((NRHS+2*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } - FReal* getForcesY(const int idxLhs = 0){ - return Parent::getAttribute(NRHS+2*NLHS+idxLhs); + const FReal* getForcesYArray(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getRawData() + ((NRHS+2*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } - const FReal* getForcesY(const int idxLhs = 0) const { - return Parent::getAttribute(NRHS+2*NLHS+idxLhs); + FReal* getForcesZ(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getAttribute((NRHS+3*NLHS+idxLhs)*NVALS+idxVals); } - FReal* getForcesZ(const int idxLhs = 0){ - return Parent::getAttribute(NRHS+3*NLHS+idxLhs); + const FReal* getForcesZ(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getAttribute((NRHS+3*NLHS+idxLhs)*NVALS+idxVals); } - const FReal* getForcesZ(const int idxLhs = 0) const { - return Parent::getAttribute(NRHS+3*NLHS+idxLhs); + FReal* getForcesZArray(const int idxVals = 0, const int idxLhs = 0){ + return Parent::getRawData() + ((NRHS+3*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } + + const FReal* getForcesZArray(const int idxVals = 0, const int idxLhs = 0) const { + return Parent::getRawData() + ((NRHS+3*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData(); + } + + void resetForcesAndPotential(){ + for(int idx = 0 ; idx < 4*NLHS*NVALS ; ++idx){ + Parent::resetToInitialState(idx + NRHS*NVALS); + } + } + + int getNVALS() const { + return NVALS; + } + }; #endif // FP2PGROUPPARTICLECONTAINER_HPP diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp index ea42d90f07de62c9f434372cf7f93be809144062..02889934bdf867d385c23ade0b1f000c9f79f4ce 100755 --- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp +++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp @@ -648,12 +648,15 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyP2M(const FPo const map_glob_loc map(center, width); FPoint localPosition; + // number of multipole expansions + const int nMul = nVals*nRhs; + // Init W - FReal W1[nVals*nRhs]; - FReal W2[nVals*nRhs][3][ ORDER-1]; - FReal W4[nVals*nRhs][3][(ORDER-1)*(ORDER-1)]; - FReal W8[nVals*nRhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; - for(int idxMul = 0 ; idxMul < nVals*nRhs ; ++idxMul){ + FReal W1[nMul]; + FReal W2[nMul][3][ ORDER-1]; + FReal W4[nMul][3][(ORDER-1)*(ORDER-1)]; + FReal W8[nMul][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; + for(int idxMul = 0 ; idxMul < nMul ; ++idxMul){ W1[idxMul] = FReal(0.); for(unsigned int i=0; i<(ORDER-1); ++i) W2[idxMul][0][i] = W2[idxMul][1][i] = W2[idxMul][2][i] = FReal(0.); for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[idxMul][0][i] = W4[idxMul][1][i] = W4[idxMul][2][i] = FReal(0.); @@ -687,7 +690,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyP2M(const FPo for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){ - const int idxMul = idxVals*nRhs+idxRhs; + const int idxMul = idxRhs*nVals+idxVals; const FReal*const physicalValues = inParticles->getPhysicalValues(idxVals,idxRhs); @@ -720,7 +723,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyP2M(const FPo //////////////////////////////////////////////////////////////////// - for(int idxMul = 0 ; idxMul < nVals*nRhs ; ++idxMul){ + for(int idxMul = 0 ; idxMul < nMul ; ++idxMul){ // loop over interpolation points FReal F2[3][ORDER]; @@ -851,17 +854,17 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2P(const FPo ContainerClass *const inParticles) const { - FReal f1[nVals*nLhs]; - FReal W2[nVals*nLhs][3][ ORDER-1]; - FReal W4[nVals*nLhs][3][(ORDER-1)*(ORDER-1)]; - FReal W8[nVals*nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; - { + // number of local expansions + const int nLoc = nVals*nLhs; - for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){ + FReal f1[nLoc]; + FReal W2[nLoc][3][ ORDER-1]; + FReal W4[nLoc][3][(ORDER-1)*(ORDER-1)]; + FReal W8[nLoc][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; + { - for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - const int idxLoc = idxVals*nLhs+idxLhs; + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc){ // sum over interpolation points f1[idxLoc] = FReal(0.); @@ -897,8 +900,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2P(const FPo } // (ORDER-1) * (6 + (ORDER-1)*2) flops } // (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2)) flops } // ORDER*ORDER*ORDER * (1 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2))) flops - } // NLHS - } // NVALS + } // NLOC } @@ -936,7 +938,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2P(const FPo for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - const int idxLoc = idxVals*nLhs+idxLhs; + const int idxLoc = idxLhs*nVals+idxVals; // distribution over potential components: // We sum the multidim contribution of PhysValue @@ -1089,6 +1091,9 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c // TENSOR-PRODUCT INTERPOLUTION NOT IMPLEMENTED YET HERE!!! //////// //////////////////////////////////////////////////////////////////// + // number of local expansions + const int nLoc = nVals*nLhs; + // setup local to global mapping const map_glob_loc map(center, width); FPoint Jacobian; @@ -1136,8 +1141,8 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c } // apply P and increment forces - FReal forces[nVals*nLhs][3]; - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc) + FReal forces[nLoc][3]; + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc) for (unsigned int i=0; i<3; ++i) forces[idxLoc][i] = FReal(0.); @@ -1158,7 +1163,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c P[0] *= FReal(2.); P[1] *= FReal(2.); P[1] += FReal(1.); P[2] *= FReal(2.); P[2] += FReal(1.); - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc) + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc) forces[idxLoc][0] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n]; // f1 component ////////////////////////////////////// @@ -1173,7 +1178,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c P[0] *= FReal(2.); P[0] += FReal(1.); P[1] *= FReal(2.); P[2] *= FReal(2.); P[2] += FReal(1.); - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc) + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc) forces[idxLoc][1] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n]; // f2 component ////////////////////////////////////// @@ -1188,7 +1193,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c P[0] *= FReal(2.); P[0] += FReal(1.); P[1] *= FReal(2.); P[1] += FReal(1.); P[2] *= FReal(2.); - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc) + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc) forces[idxLoc][2] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n]; } @@ -1198,7 +1203,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(c for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){ - const int idxLoc = idxVals*nLhs+idxLhs; + const int idxLoc = idxLhs*nVals+idxVals; // scale forces forces[idxLoc][0] *= jacobian[0] / nnodes; @@ -1233,10 +1238,14 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PTotal(cons const FReal *const localExpansion, ContainerClass *const inParticles) const { - FReal f1[nVals*nLhs]; - FReal W2[nVals*nLhs][3][ ORDER-1]; - FReal W4[nVals*nLhs][3][(ORDER-1)*(ORDER-1)]; - FReal W8[nVals*nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; + + // number of local expansions + const int nLoc = nVals*nLhs; + + FReal f1[nLoc]; + FReal W2[nLoc][3][ ORDER-1]; + FReal W4[nLoc][3][(ORDER-1)*(ORDER-1)]; + FReal W8[nLoc][ (ORDER-1)*(ORDER-1)*(ORDER-1)]; { // for W2 @@ -1249,7 +1258,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PTotal(cons FReal G8[ORDER * (ORDER-1)*(ORDER-1)]; // sum local expansions - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc){ + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc){ f1[idxLoc] = FReal(0.); for (unsigned int idx=0; idx<nnodes; ++idx) f1[idxLoc] += localExpansion[2*idxLoc*nnodes + idx]; // 1 flop @@ -1346,9 +1355,9 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PTotal(cons } // 3 + (ORDER-2)*15 // apply P and increment forces - FReal potential[nVals*nLhs]; - FReal forces[nVals*nLhs][3]; - for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc){ + FReal potential[nLoc]; + FReal forces[nLoc][3]; + for(int idxLoc = 0 ; idxLoc < nLoc ; ++idxLoc){ potential[idxLoc]= FReal(0.); for (unsigned int i=0; i<3; ++i) forces[idxLoc][i] = FReal(0.); @@ -1358,7 +1367,7 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PTotal(cons for( int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - const int idxLoc = idxVals*nLhs+idxLhs; + const int idxLoc = idxLhs*nVals+idxVals; { FReal f2[4], f4[4], f8[4];