Commit 0679c0f9 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Add an interface to know what is supporting a kernel in starpu

parent 2979d418
......@@ -51,6 +51,7 @@ protected:
int MaxThreads; //< The number of threads
OctreeClass*const tree; //< The Tree
KernelClass*const originalCpuKernel;
std::vector<starpu_data_handle_t>* handles_up;
std::vector<starpu_data_handle_t>* handles_down;
......@@ -74,7 +75,7 @@ protected:
public:
FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
: MaxThreads(inMaxThreads), tree(inTree),
: MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels),
handles_up(nullptr), handles_down(nullptr),
cpuWrapper(tree->getHeight()), wrapperptr(&wrappers){
FAssertLF(tree, "tree cannot be null");
......@@ -148,8 +149,12 @@ public:
protected:
void initCodelet(){
memset(&p2m_cl, 0, sizeof(p2m_cl));
p2m_cl.where = STARPU_CPU;
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2M()){
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CPU;
}
#endif
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_RW;
p2m_cl.modes[1] = STARPU_R;
......@@ -158,15 +163,23 @@ protected:
memset(m2m_cl, 0, sizeof(m2m_cl[0])*9);
memset(l2l_cl, 0, sizeof(l2l_cl[0])*9);
for(int idx = 0 ; idx < 9 ; ++idx){
m2m_cl[idx].where = STARPU_CPU;
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CPU;
}
#endif
m2m_cl[idx].nbuffers = idx+2;
m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
m2m_cl[idx].dyn_modes[0] = STARPU_RW;
m2m_cl[idx].name = "m2m_cl";
l2l_cl[idx].where = STARPU_CPU;
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CPU;
}
#endif
l2l_cl[idx].nbuffers = idx+2;
l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
l2l_cl[idx].dyn_modes[0] = STARPU_R;
......@@ -179,37 +192,57 @@ protected:
}
memset(&l2p_cl, 0, sizeof(l2p_cl));
l2p_cl.where = STARPU_CPU;
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2P()){
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CPU;
}
#endif
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
l2p_cl.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
l2p_cl.name = "l2p_cl";
memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
p2p_cl_in.where = STARPU_CPU;
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CPU;
}
#endif
p2p_cl_in.nbuffers = 1;
p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_in.name = "p2p_cl_in";
memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
p2p_cl_inout.where = STARPU_CPU;
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CPU;
}
#endif
p2p_cl_inout.nbuffers = 2;
p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.name = "p2p_cl_inout";
memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
m2l_cl_in.where = STARPU_CPU;
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CPU;
}
#endif
m2l_cl_in.nbuffers = 2;
m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_in.modes[1] = STARPU_R;
m2l_cl_in.name = "m2l_cl_in";
memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
m2l_cl_inout.where = STARPU_CPU;
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CPU;
}
#endif
m2l_cl_inout.nbuffers = 4;
m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......
......@@ -64,6 +64,7 @@ protected:
int MaxThreads; //< The number of threads
OctreeClass*const tree; //< The Tree
KernelClass*const originalCpuKernel;
std::vector<starpu_data_handle_t>* handles_up;
std::vector<starpu_data_handle_t>* handles_down;
......@@ -89,7 +90,7 @@ protected:
public:
FGroupTaskStarPUMpiAlgorithm(const FMpi::FComm& inComm, OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
: comm(inComm), MaxThreads(inMaxThreads), tree(inTree),
: comm(inComm), MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels),
handles_up(nullptr), handles_down(nullptr),
cpuWrapper(tree->getHeight()), wrapperptr(&wrappers){
FAssertLF(tree, "tree cannot be null");
......@@ -176,8 +177,12 @@ public:
protected:
void initCodelet(){
memset(&p2m_cl, 0, sizeof(p2m_cl));
p2m_cl.where = STARPU_CPU;
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2M()){
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CPU;
}
#endif
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_RW;
p2m_cl.modes[1] = STARPU_R;
......@@ -186,15 +191,23 @@ protected:
memset(m2m_cl, 0, sizeof(m2m_cl[0])*9);
memset(l2l_cl, 0, sizeof(l2l_cl[0])*9);
for(int idx = 0 ; idx < 9 ; ++idx){
m2m_cl[idx].where = STARPU_CPU;
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CPU;
}
#endif
m2m_cl[idx].nbuffers = idx+2;
m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
m2m_cl[idx].dyn_modes[0] = STARPU_RW;
m2m_cl[idx].name = "m2m_cl";
l2l_cl[idx].where = STARPU_CPU;
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CPU;
}
#endif
l2l_cl[idx].nbuffers = idx+2;
l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
l2l_cl[idx].dyn_modes[0] = STARPU_R;
......@@ -207,37 +220,57 @@ protected:
}
memset(&l2p_cl, 0, sizeof(l2p_cl));
l2p_cl.where = STARPU_CPU;
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2P()){
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CPU;
}
#endif
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
l2p_cl.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
l2p_cl.name = "l2p_cl";
memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
p2p_cl_in.where = STARPU_CPU;
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CPU;
}
#endif
p2p_cl_in.nbuffers = 1;
p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_in.name = "p2p_cl_in";
memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
p2p_cl_inout.where = STARPU_CPU;
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CPU;
}
#endif
p2p_cl_inout.nbuffers = 2;
p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.name = "p2p_cl_inout";
memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
m2l_cl_in.where = STARPU_CPU;
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CPU;
}
#endif
m2l_cl_in.nbuffers = 2;
m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_in.modes[1] = STARPU_R;
m2l_cl_in.name = "m2l_cl_in";
memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
m2l_cl_inout.where = STARPU_CPU;
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CPU;
}
#endif
m2l_cl_inout.nbuffers = 4;
m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -274,17 +307,25 @@ protected:
////////////////////////////////////////////////////////////////////////////
void initCodeletMpi(){
memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi));
p2p_cl_inout_mpi.where = STARPU_CPU;
p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi));
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
p2p_cl_inout_mpi.where |= STARPU_CPU;
p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
}
#endif
p2p_cl_inout_mpi.nbuffers = 2;
p2p_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout_mpi.modes[1] = STARPU_R;
p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi";
memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi));
m2l_cl_inout_mpi.where = STARPU_CPU;
m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi));
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_inout_mpi.where |= STARPU_CPU;
m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
}
#endif
m2l_cl_inout_mpi.nbuffers = 2;
m2l_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_inout_mpi.modes[1] = STARPU_R;
......
#ifndef FSTARPUKERNELCAPACITIES_HPP
#define FSTARPUKERNELCAPACITIES_HPP
/** A class used with the starpu system should
* implement this interface in order to inform the algorithm about what the kernel
* is doing.
*/
class FStarPUKernelCapacities {
public:
virtual bool supportP2M() const = 0;
virtual bool supportM2M() const = 0;
virtual bool supportM2L() const = 0;
virtual bool supportL2L() const = 0;
virtual bool supportL2P() const = 0;
virtual bool supportP2P() const = 0;
};
/**
* This is for the kernels that implement all the methods.
*/
template <class BaseClass>
class FStarPUAllYesCapacities : public BaseClass, public FStarPUKernelCapacities {
public:
using BaseClass::BaseClass;
bool supportP2M() const override {
return true;
}
bool supportM2M() const override {
return true;
}
bool supportM2L() const override {
return true;
}
bool supportL2L() const override {
return true;
}
bool supportL2P() const override {
return true;
}
bool supportP2P() const override {
return true;
}
};
#endif // FSTARPUKERNELCAPACITIES_HPP
......@@ -33,11 +33,13 @@
#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"
#include "../Src/GroupTree/FGroupTestParticleContainer.hpp"
#include "../../Src/GroupTree/FGroupTestParticleContainer.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize {
{"-bs"},
......@@ -50,7 +52,7 @@ int main(int argc, char* argv[]){
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
......
......@@ -46,6 +46,7 @@
#include <memory>
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
......@@ -63,7 +64,7 @@ int main(int argc, char* argv[]){
typedef FChebCell<ORDER> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass;
typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
......
......@@ -37,6 +37,9 @@
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidth,
const FReal boxWidthAtLeafLevel, const int treeHeight) {
FAssertLF( (inRelativePosition >= 0 && inRelativePosition <= boxWidth), "inRelativePosition : ",inRelativePosition );
......@@ -75,7 +78,7 @@ int main(int argc, char* argv[]){
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
FMpi mpiComm(argc, argv);
......
......@@ -45,6 +45,8 @@
#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#include <memory>
......@@ -87,7 +89,7 @@ int main(int argc, char* argv[]){
typedef FChebCell<ORDER> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass;
typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
// Get params
......
......@@ -40,6 +40,8 @@
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#include <memory>
......@@ -58,7 +60,7 @@ int main(int argc, char* argv[]){
typedef FRotationCell<P> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass;
typedef FRotationKernel< GroupCellClass, GroupContainerClass , P> GroupKernelClass;
typedef FStarPUAllYesCapacities<FRotationKernel< GroupCellClass, GroupContainerClass , P>> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
......
......@@ -40,6 +40,8 @@
#include "../../Src/Utils/FParameterNames.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize {
{"-bs"},
......@@ -95,7 +97,7 @@ int main(int argc, char* argv[]){
typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P> KernelClass;
typedef FStarPUAllYesCapacities<FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>> KernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment