Commit 9625189c authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files
parents b778bf93 dca129bb
......@@ -43,7 +43,7 @@ template <class OctreeClass, class CellContainerClass, class CellClass, class Ke
class CudaKernelClass = FCudaEmptyKernel<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, class OpenCLDeviceWrapperClass = FOpenCLDeviceWrapper<KernelClass, nullptr>
, class OpenCLDeviceWrapperClass = FOpenCLDeviceWrapper<KernelClass>
#endif
>
class FGroupTaskStarPUAlgorithm {
......
......@@ -48,7 +48,7 @@ template <class OctreeClass, class CellContainerClass, class CellClass, class Ke
class CudaKernelClass = FCudaEmptyKernel<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, class OpenCLDeviceWrapperClass = FOpenCLDeviceWrapper<KernelClass, nullptr>
, class OpenCLDeviceWrapperClass = FOpenCLDeviceWrapper<KernelClass>
#endif
>
class FGroupTaskStarPUMpiAlgorithm {
......
......@@ -18,58 +18,91 @@ public:
virtual bool supportP2P(const FStarPUTypes inPu) const = 0;
};
/**
* This is for the kernels that implement all the methods.
*/
template <class BaseClass>
class FStarPUAllYesCapacities : public BaseClass, public FStarPUKernelCapacities {
class FStarPUAbstractCapacities : public FStarPUKernelCapacities {
protected:
virtual bool check(const FStarPUTypes inPu) const = 0;
public:
using BaseClass::BaseClass;
bool supportP2M(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportP2M(const FStarPUTypes inPu) const override {
return check(inPu);
}
bool supportM2M(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportM2M(const FStarPUTypes inPu) const override {
return check(inPu);
}
bool supportM2L(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportM2L(const FStarPUTypes inPu) const override {
return check(inPu);
}
bool supportL2L(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportL2L(const FStarPUTypes inPu) const override {
return check(inPu);
}
bool supportL2P(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportL2P(const FStarPUTypes inPu) const override {
return check(inPu);
}
bool supportP2P(const FStarPUTypes /*inPu*/) const override {
return true;
bool supportP2P(const FStarPUTypes inPu) const override {
return check(inPu);
}
};
/**
* This is for the kernels that implement all the methods.
*/
template <class BaseClass>
class FStarPUAllCpuCapacities : public BaseClass, public FStarPUKernelCapacities {
class FStarPUAllYesCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override{
return true;
}
public:
using BaseClass::BaseClass;
};
bool supportP2M(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX;
}
bool supportM2M(const FStarPUTypes inPu) const override {
template <class BaseClass>
class FStarPUAllCpuCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override{
return inPu == FSTARPU_CPU_IDX;
}
bool supportM2L(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX;
public:
using BaseClass::BaseClass;
};
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
template <class BaseClass>
class FStarPUAllCudaCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override{
return inPu == FSTARPU_CUDA_IDX;
}
bool supportL2L(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX;
public:
using BaseClass::BaseClass;
};
template <class BaseClass>
class FStarPUAllCpuCudaCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override{
return inPu == FSTARPU_CPU_IDX || inPu == FSTARPU_CUDA_IDX;
}
bool supportL2P(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX;
public:
using BaseClass::BaseClass;
};
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
template <class BaseClass>
class FStarPUAllOpenCLCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override{
return inPu == FSTARPU_OPENCL_IDX;
}
bool supportP2P(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX;
public:
using BaseClass::BaseClass;
};
template <class BaseClass>
class FStarPUAllCpuOpenCLCapacities : public BaseClass, public FStarPUAbstractCapacities {
bool check(const FStarPUTypes inPu) const override {
return inPu == FSTARPU_CPU_IDX || inPu == FSTARPU_OPENCL_IDX;
}
public:
using BaseClass::BaseClass;
};
#endif
#endif // FSTARPUKERNELCAPACITIES_HPP
......@@ -16,7 +16,13 @@
#include <starpu.h>
template <class OriginalKernelClass, const char* KernelFilename>
struct FEmptyOpenCLFilename{
operator const char*(){
return nullptr;
}
};
template <class OriginalKernelClass, class KernelFilenameClass = FEmptyOpenCLFilename>
class FOpenCLDeviceWrapper {
protected:
static void SetKernelArgs(cl_kernel& kernel, const int pos){
......@@ -69,10 +75,12 @@ public:
workerId = starpu_worker_get_id();
workerDevid = starpu_worker_get_devid(workerId);
if(KernelFilename){
KernelFilenameClass kernelFilename;
const char* filename = kernelFilename;
if(filename){
starpu_opencl_get_context (workerDevid, &context);
const int err = starpu_opencl_load_opencl_from_file(KernelFilename, &opencl_code, NULL);
const int err = starpu_opencl_load_opencl_from_file(filename, &opencl_code, NULL);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
FAssertLF( starpu_opencl_load_kernel(&kernel_bottomPassPerform, &queue_bottomPassPerform, &opencl_code, "bottomPassPerform", workerDevid) == CL_SUCCESS);
......@@ -97,7 +105,9 @@ public:
virtual ~FOpenCLDeviceWrapper(){
// Release
releaseKernel();
if(KernelFilename){
KernelFilenameClass kernelFilename;
const char* filename = kernelFilename;
if(filename){
const int err = starpu_opencl_unload_opencl(&opencl_code);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
......
......@@ -24,6 +24,7 @@
#endif
#ifdef ScalFMM_USE_STARPU
#include "../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#endif
#include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
#include "../../Src/GroupTree/FGroupTaskAlgorithm.hpp"
......@@ -38,8 +39,6 @@
#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize {
{"-bs"},
......@@ -52,14 +51,16 @@ int main(int argc, char* argv[]){
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FTestKernels< GroupCellClass, GroupContainerClass > GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
......
......@@ -32,6 +32,7 @@
#endif
#ifdef ScalFMM_USE_STARPU
#include "../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#endif
#include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
......@@ -46,8 +47,6 @@
#include <memory>
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
......@@ -64,14 +63,16 @@ int main(int argc, char* argv[]){
typedef FChebCell<ORDER> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FStarPUAllYesCapacities<FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
......
......@@ -28,6 +28,7 @@
#endif
#ifdef ScalFMM_USE_STARPU
#include "../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#endif
#include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
......@@ -40,8 +41,6 @@
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#include <memory>
......@@ -60,14 +59,16 @@ int main(int argc, char* argv[]){
typedef FRotationCell<P> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FRotationKernel< GroupCellClass, GroupContainerClass , P>> GroupKernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FStarPUAllYesCapacities<FRotationKernel< GroupCellClass, GroupContainerClass , P>> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
typedef FRotationKernel< GroupCellClass, GroupContainerClass , P> GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FRotationKernel< GroupCellClass, GroupContainerClass , P> GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
......
......@@ -35,12 +35,12 @@
#endif
#ifdef ScalFMM_USE_STARPU
#include "../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#endif
#include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
#include "../../Src/Utils/FParameterNames.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize {
......@@ -97,12 +97,14 @@ int main(int argc, char* argv[]){
typedef FStarPUAllYesCapacities<FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>> KernelClass;
#ifdef ScalFMM_USE_STARPU
typedef FStarPUAllYesCapacities<FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>> KernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P> KernelClass;
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
#else
typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P> KernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
#endif
......
......@@ -68,7 +68,7 @@ int main(int argc, char* argv[]){
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FStarPUAllCpuCudaCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass
, FCudaGroupOfCells<sizeof(FTestCell)>, FCudaGroupOfParticles<2, long long int>, FCudaGroupAttachedLeaf<2, long long int>, FTestCudaKernels< FCudaGroupAttachedLeaf<2, long long int> > > GroupAlgorithm;
......
......@@ -47,15 +47,22 @@ int main(int argc, char* argv[]){
FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbThreads,
FParameterDefinitions::NbParticles, LocalOptionBlocSize);
// Initialize the types
struct OpenCLSource{
operator const char*(){
return "../../Src/GroupTree/OpenCl/FEmptyKernel.cl";
}
};
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FStarPUAllCpuOpenCLCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, FCudaGroupOfCells<0>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>
#endif
, FOpenCLDeviceWrapper<GroupKernelClass, nullptr>
, FOpenCLDeviceWrapper<GroupKernelClass, OpenCLSource>
> GroupAlgorithm;
typedef FTestCell CellClass;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment