diff --git a/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp b/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp index b275001d59e4869011d47a661a106441db81bfad..f989acc716babde6bdb190e923b4ff4c4166043f 100644 --- a/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp +++ b/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp @@ -51,6 +51,7 @@ protected: int MaxThreads; //< The number of threads OctreeClass*const tree; //< The Tree + KernelClass*const originalCpuKernel; std::vector<starpu_data_handle_t>* handles_up; std::vector<starpu_data_handle_t>* handles_down; @@ -74,7 +75,7 @@ protected: public: FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1) - : MaxThreads(inMaxThreads), tree(inTree), + : MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels), handles_up(nullptr), handles_down(nullptr), cpuWrapper(tree->getHeight()), wrapperptr(&wrappers){ FAssertLF(tree, "tree cannot be null"); @@ -148,8 +149,12 @@ public: protected: void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); - p2m_cl.where = STARPU_CPU; - p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2M()){ + p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; + p2m_cl.where |= STARPU_CPU; + } +#endif p2m_cl.nbuffers = 2; p2m_cl.modes[0] = STARPU_RW; p2m_cl.modes[1] = STARPU_R; @@ -158,15 +163,23 @@ protected: memset(m2m_cl, 0, sizeof(m2m_cl[0])*9); memset(l2l_cl, 0, sizeof(l2l_cl[0])*9); for(int idx = 0 ; idx < 9 ; ++idx){ - m2m_cl[idx].where = STARPU_CPU; - m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2M()){ + m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; + m2m_cl[idx].where |= STARPU_CPU; + } +#endif m2m_cl[idx].nbuffers = idx+2; m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode)); m2m_cl[idx].dyn_modes[0] = STARPU_RW; m2m_cl[idx].name = "m2m_cl"; - l2l_cl[idx].where = STARPU_CPU; - l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportL2L()){ + l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; + l2l_cl[idx].where |= STARPU_CPU; + } +#endif l2l_cl[idx].nbuffers = idx+2; l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode)); l2l_cl[idx].dyn_modes[0] = STARPU_R; @@ -179,37 +192,57 @@ protected: } memset(&l2p_cl, 0, sizeof(l2p_cl)); - l2p_cl.where = STARPU_CPU; - l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportL2P()){ + l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; + l2p_cl.where |= STARPU_CPU; + } +#endif l2p_cl.nbuffers = 2; l2p_cl.modes[0] = STARPU_R; l2p_cl.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); l2p_cl.name = "l2p_cl"; memset(&p2p_cl_in, 0, sizeof(p2p_cl_in)); - p2p_cl_in.where = STARPU_CPU; - p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2P()){ + p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; + p2p_cl_in.where |= STARPU_CPU; + } +#endif p2p_cl_in.nbuffers = 1; p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_in.name = "p2p_cl_in"; memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout)); - p2p_cl_inout.where = STARPU_CPU; - p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2P()){ + p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; + p2p_cl_inout.where |= STARPU_CPU; + } +#endif p2p_cl_inout.nbuffers = 2; p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_inout.name = "p2p_cl_inout"; memset(&m2l_cl_in, 0, sizeof(m2l_cl_in)); - m2l_cl_in.where = STARPU_CPU; - m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; + m2l_cl_in.where |= STARPU_CPU; + } +#endif m2l_cl_in.nbuffers = 2; m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); m2l_cl_in.modes[1] = STARPU_R; m2l_cl_in.name = "m2l_cl_in"; memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout)); - m2l_cl_inout.where = STARPU_CPU; - m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; + m2l_cl_inout.where |= STARPU_CPU; + } +#endif m2l_cl_inout.nbuffers = 4; m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); diff --git a/Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp b/Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp index 9a2eeb4d749510ba76207597b0001bf6eee91b26..6289ea48e44728f64814666e63a46af28ab4e06b 100644 --- a/Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp +++ b/Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp @@ -64,6 +64,7 @@ protected: int MaxThreads; //< The number of threads OctreeClass*const tree; //< The Tree + KernelClass*const originalCpuKernel; std::vector<starpu_data_handle_t>* handles_up; std::vector<starpu_data_handle_t>* handles_down; @@ -89,7 +90,7 @@ protected: public: FGroupTaskStarPUMpiAlgorithm(const FMpi::FComm& inComm, OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1) - : comm(inComm), MaxThreads(inMaxThreads), tree(inTree), + : comm(inComm), MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels), handles_up(nullptr), handles_down(nullptr), cpuWrapper(tree->getHeight()), wrapperptr(&wrappers){ FAssertLF(tree, "tree cannot be null"); @@ -176,8 +177,12 @@ public: protected: void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); - p2m_cl.where = STARPU_CPU; - p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2M()){ + p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; + p2m_cl.where |= STARPU_CPU; + } +#endif p2m_cl.nbuffers = 2; p2m_cl.modes[0] = STARPU_RW; p2m_cl.modes[1] = STARPU_R; @@ -186,15 +191,23 @@ protected: memset(m2m_cl, 0, sizeof(m2m_cl[0])*9); memset(l2l_cl, 0, sizeof(l2l_cl[0])*9); for(int idx = 0 ; idx < 9 ; ++idx){ - m2m_cl[idx].where = STARPU_CPU; - m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2M()){ + m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; + m2m_cl[idx].where |= STARPU_CPU; + } +#endif m2m_cl[idx].nbuffers = idx+2; m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode)); m2m_cl[idx].dyn_modes[0] = STARPU_RW; m2m_cl[idx].name = "m2m_cl"; - l2l_cl[idx].where = STARPU_CPU; - l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportL2L()){ + l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; + l2l_cl[idx].where |= STARPU_CPU; + } +#endif l2l_cl[idx].nbuffers = idx+2; l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode)); l2l_cl[idx].dyn_modes[0] = STARPU_R; @@ -207,37 +220,57 @@ protected: } memset(&l2p_cl, 0, sizeof(l2p_cl)); - l2p_cl.where = STARPU_CPU; - l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportL2P()){ + l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; + l2p_cl.where |= STARPU_CPU; + } +#endif l2p_cl.nbuffers = 2; l2p_cl.modes[0] = STARPU_R; l2p_cl.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); l2p_cl.name = "l2p_cl"; memset(&p2p_cl_in, 0, sizeof(p2p_cl_in)); - p2p_cl_in.where = STARPU_CPU; - p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2P()){ + p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; + p2p_cl_in.where |= STARPU_CPU; + } +#endif p2p_cl_in.nbuffers = 1; p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_in.name = "p2p_cl_in"; memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout)); - p2p_cl_inout.where = STARPU_CPU; - p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportP2P()){ + p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; + p2p_cl_inout.where |= STARPU_CPU; + } +#endif p2p_cl_inout.nbuffers = 2; p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_inout.name = "p2p_cl_inout"; memset(&m2l_cl_in, 0, sizeof(m2l_cl_in)); - m2l_cl_in.where = STARPU_CPU; - m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; + m2l_cl_in.where |= STARPU_CPU; + } +#endif m2l_cl_in.nbuffers = 2; m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); m2l_cl_in.modes[1] = STARPU_R; m2l_cl_in.name = "m2l_cl_in"; memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout)); - m2l_cl_inout.where = STARPU_CPU; - m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; + m2l_cl_inout.where |= STARPU_CPU; + } +#endif m2l_cl_inout.nbuffers = 4; m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); @@ -274,17 +307,25 @@ protected: //////////////////////////////////////////////////////////////////////////// void initCodeletMpi(){ - memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi)); - p2p_cl_inout_mpi.where = STARPU_CPU; - p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi; + memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi)); +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + p2p_cl_inout_mpi.where |= STARPU_CPU; + p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi; + } +#endif p2p_cl_inout_mpi.nbuffers = 2; p2p_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); p2p_cl_inout_mpi.modes[1] = STARPU_R; p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi"; - memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi)); - m2l_cl_inout_mpi.where = STARPU_CPU; - m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi; + memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi)); +#ifdef STARPU_USE_CPU + if(originalCpuKernel->supportM2L()){ + m2l_cl_inout_mpi.where |= STARPU_CPU; + m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi; + } +#endif m2l_cl_inout_mpi.nbuffers = 2; m2l_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE); m2l_cl_inout_mpi.modes[1] = STARPU_R; diff --git a/Src/GroupTree/FStarPUKernelCapacities.hpp b/Src/GroupTree/FStarPUKernelCapacities.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6e3351107351c793cc44059f6f300bb6fcf2f330 --- /dev/null +++ b/Src/GroupTree/FStarPUKernelCapacities.hpp @@ -0,0 +1,47 @@ +#ifndef FSTARPUKERNELCAPACITIES_HPP +#define FSTARPUKERNELCAPACITIES_HPP + +/** A class used with the starpu system should + * implement this interface in order to inform the algorithm about what the kernel + * is doing. + */ +class FStarPUKernelCapacities { +public: + virtual bool supportP2M() const = 0; + virtual bool supportM2M() const = 0; + virtual bool supportM2L() const = 0; + virtual bool supportL2L() const = 0; + virtual bool supportL2P() const = 0; + virtual bool supportP2P() const = 0; +}; + +/** + * This is for the kernels that implement all the methods. + */ +template <class BaseClass> +class FStarPUAllYesCapacities : public BaseClass, public FStarPUKernelCapacities { +public: + using BaseClass::BaseClass; + + bool supportP2M() const override { + return true; + } + bool supportM2M() const override { + return true; + } + bool supportM2L() const override { + return true; + } + bool supportL2L() const override { + return true; + } + bool supportL2P() const override { + return true; + } + bool supportP2P() const override { + return true; + } +}; + +#endif // FSTARPUKERNELCAPACITIES_HPP + diff --git a/Tests/noDist/testBlockedAlgorithm.cpp b/Tests/noDist/testBlockedAlgorithm.cpp index 346f5fb63a82f62456670742c11d471545e6f5d4..5dd9cc2c4287a837919b4b0e915bff2113d7cab9 100644 --- a/Tests/noDist/testBlockedAlgorithm.cpp +++ b/Tests/noDist/testBlockedAlgorithm.cpp @@ -33,11 +33,13 @@ #include "../../Src/Components/FTestParticleContainer.hpp" #include "../../Src/Components/FTestCell.hpp" #include "../../Src/Components/FTestKernels.hpp" -#include "../Src/GroupTree/FGroupTestParticleContainer.hpp" +#include "../../Src/GroupTree/FGroupTestParticleContainer.hpp" #include "../../Src/Files/FFmaGenericLoader.hpp" #include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" + int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { {"-bs"}, @@ -50,7 +52,7 @@ int main(int argc, char* argv[]){ typedef FTestCell GroupCellClass; typedef FGroupTestParticleContainer GroupContainerClass; typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass; - typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass; + typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass; #ifdef ScalFMM_USE_STARPU typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; #elif defined(ScalFMM_USE_OMP4) diff --git a/Tests/noDist/testBlockedChebyshev.cpp b/Tests/noDist/testBlockedChebyshev.cpp index d2804d943d044f68058ffb731ceb39d423fb5a4f..c814e72966ea6cf46b50de3fe0b8317610a98e84 100644 --- a/Tests/noDist/testBlockedChebyshev.cpp +++ b/Tests/noDist/testBlockedChebyshev.cpp @@ -46,6 +46,7 @@ #include <memory> +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" @@ -63,7 +64,7 @@ int main(int argc, char* argv[]){ typedef FChebCell<ORDER> GroupCellClass; typedef FP2PGroupParticleContainer<> GroupContainerClass; typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass; - typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass; #ifdef ScalFMM_USE_STARPU typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; #elif defined(ScalFMM_USE_OMP4) diff --git a/Tests/noDist/testBlockedMpiAlgorithm.cpp b/Tests/noDist/testBlockedMpiAlgorithm.cpp index 72027c44ee072f3fc3354d3387549d084a60021d..45c12199b0b3c9cca9da83e50f0addbb16eab30c 100644 --- a/Tests/noDist/testBlockedMpiAlgorithm.cpp +++ b/Tests/noDist/testBlockedMpiAlgorithm.cpp @@ -37,6 +37,9 @@ #include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" + + int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidth, const FReal boxWidthAtLeafLevel, const int treeHeight) { FAssertLF( (inRelativePosition >= 0 && inRelativePosition <= boxWidth), "inRelativePosition : ",inRelativePosition ); @@ -75,7 +78,7 @@ int main(int argc, char* argv[]){ typedef FTestCell GroupCellClass; typedef FGroupTestParticleContainer GroupContainerClass; typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass; - typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass; + typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass; typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; FMpi mpiComm(argc, argv); diff --git a/Tests/noDist/testBlockedMpiChebyshev.cpp b/Tests/noDist/testBlockedMpiChebyshev.cpp index c1e12fa6ab0b30b293840e43b7124c45ba42040c..3baaea5020fcb9cea9363355b1ff58c1bc0afa37 100644 --- a/Tests/noDist/testBlockedMpiChebyshev.cpp +++ b/Tests/noDist/testBlockedMpiChebyshev.cpp @@ -45,6 +45,8 @@ #include "../../Src/Files/FMpiFmaGenericLoader.hpp" +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" + #include <memory> @@ -87,7 +89,7 @@ int main(int argc, char* argv[]){ typedef FChebCell<ORDER> GroupCellClass; typedef FP2PGroupParticleContainer<> GroupContainerClass; typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass; - typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass; typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; // Get params diff --git a/Tests/noDist/testBlockedRotation.cpp b/Tests/noDist/testBlockedRotation.cpp index cfe773c9bce5d7e767165c134443ad36af89ebb6..3ea4b308ad55ab86c067df2548b2970a1351fff3 100644 --- a/Tests/noDist/testBlockedRotation.cpp +++ b/Tests/noDist/testBlockedRotation.cpp @@ -40,6 +40,8 @@ #include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" + #include <memory> @@ -58,7 +60,7 @@ int main(int argc, char* argv[]){ typedef FRotationCell<P> GroupCellClass; typedef FP2PGroupParticleContainer<> GroupContainerClass; typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass; - typedef FRotationKernel< GroupCellClass, GroupContainerClass , P> GroupKernelClass; + typedef FStarPUAllYesCapacities<FRotationKernel< GroupCellClass, GroupContainerClass , P>> GroupKernelClass; #ifdef ScalFMM_USE_STARPU typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; #elif defined(ScalFMM_USE_OMP4) diff --git a/Tests/noDist/testBlockedTree.cpp b/Tests/noDist/testBlockedTree.cpp index f5fba698d8e20ba485cf5296465688c229d4ddff..3d543ecc0f7567f7ee3b1c8afef962ba553f22d4 100644 --- a/Tests/noDist/testBlockedTree.cpp +++ b/Tests/noDist/testBlockedTree.cpp @@ -40,6 +40,8 @@ #include "../../Src/Utils/FParameterNames.hpp" +#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp" + int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { {"-bs"}, @@ -95,7 +97,7 @@ int main(int argc, char* argv[]){ - typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P> KernelClass; + typedef FStarPUAllYesCapacities<FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>> KernelClass; #ifdef ScalFMM_USE_STARPU typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm; #elif defined(ScalFMM_USE_OMP4)