Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
ScalFMM
Commits
090158ca
Commit
090158ca
authored
Feb 25, 2015
by
BRAMAS Berenger
Browse files
Make the opencl starpu compile
parent
dd6edd46
Changes
5
Hide whitespace changes
Inline
Side-by-side
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
View file @
090158ca
...
...
@@ -93,7 +93,7 @@ protected:
typedef
FStarPUCudaWrapper
<
KernelClass
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
StarPUCudaWrapperClass
;
StarPUCudaWrapperClass
cudaWrapper
;
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
typedef
FStarPUOpenClWrapper
<
KernelClass
,
OpenCLDeviceWrapperClass
>
StarPUOpenClWrapperClass
;
StarPUOpenClWrapperClass
openclWrapper
;
#endif
...
...
@@ -111,7 +111,7 @@ public:
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
cudaWrapper
(
tree
->
getHeight
()),
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
openclWrapper
(
tree
->
getHeight
()),
#endif
wrapperptr
(
&
wrappers
){
...
...
@@ -142,7 +142,7 @@ public:
});
wrappers
.
set
(
FSTARPU_CUDA_IDX
,
&
cudaWrapper
);
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
FStarPUUtils
::
ExecOnWorkers
(
STARPU_OPENCL
,
[
&
](){
starpu_pthread_mutex_lock
(
&
initMutex
);
openclWrapper
.
initKernel
(
starpu_worker_get_id
(),
inKernels
);
...
...
@@ -215,7 +215,7 @@ protected:
p2m_cl
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2M
(
FSTARPU_OPENCL_IDX
)){
p2m_cl
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
bottomPassCallback
;
p2m_cl
.
where
|=
STARPU_OPENCL
;
...
...
@@ -241,7 +241,7 @@ protected:
m2m_cl
[
idx
].
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2M
(
FSTARPU_OPENCL_IDX
)){
m2m_cl
[
idx
].
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
upwardPassCallback
;
m2m_cl
[
idx
].
where
|=
STARPU_OPENCL
;
...
...
@@ -264,7 +264,7 @@ protected:
l2l_cl
[
idx
].
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportL2L
(
FSTARPU_OPENCL_IDX
)){
l2l_cl
[
idx
].
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
downardPassCallback
;
l2l_cl
[
idx
].
where
|=
STARPU_OPENCL
;
...
...
@@ -294,7 +294,7 @@ protected:
l2p_cl
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportL2P
(
FSTARPU_OPENCL_IDX
)){
l2p_cl
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
mergePassCallback
;
l2p_cl
.
where
|=
STARPU_OPENCL
;
...
...
@@ -318,7 +318,7 @@ protected:
p2p_cl_in
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2P
(
FSTARPU_OPENCL_IDX
)){
p2p_cl_in
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
directInPassCallback
;
p2p_cl_in
.
where
|=
STARPU_OPENCL
;
...
...
@@ -340,7 +340,7 @@ protected:
p2p_cl_inout
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2P
(
FSTARPU_OPENCL_IDX
)){
p2p_cl_inout
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
directInoutPassCallback
;
p2p_cl_inout
.
where
|=
STARPU_OPENCL
;
...
...
@@ -364,7 +364,7 @@ protected:
m2l_cl_in
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
m2l_cl_in
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
transferInPassCallback
;
m2l_cl_in
.
where
|=
STARPU_OPENCL
;
...
...
@@ -387,7 +387,7 @@ protected:
m2l_cl_inout
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef ScalFMM_ENABLE_
CUDA
_KERNEL
#ifdef ScalFMM_ENABLE_
OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
m2l_cl_inout
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
transferInoutPassCallback
;
m2l_cl_inout
.
where
|=
STARPU_OPENCL
;
...
...
Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp
View file @
090158ca
...
...
@@ -22,10 +22,9 @@
#include
<omp.h>
//extern "C"{
#include
<starpu.h>
#include
<starpu_mpi.h>
//}
#include
"FStarPUUtils.hpp"
#ifdef STARPU_USE_CPU
#include
"FStarPUCpuWrapper.hpp"
...
...
@@ -37,15 +36,19 @@
#include
"Cuda/FCudaGroupOfParticles.hpp"
#include
"Cuda/FCudaGroupOfCells.hpp"
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
#include
"FStarPUOpenClWrapper.hpp"
#include
"OpenCl/FOpenCLDeviceWrapper.hpp"
#endif
#include
"FStarPUUtils.hpp"
template
<
class
OctreeClass
,
class
CellContainerClass
,
class
CellClass
,
class
KernelClass
,
class
ParticleGroupClass
,
class
ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
,
class
CudaCellContainerClass
=
FCudaGroupOfCells
<
0
>,
class
CudaParticleGroupClass
=
FCudaGroupOfParticles
<
0
,
int
>
,
class
CudaParticleContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
,
class
CudaKernelClass
=
FCudaEmptyKernel
<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
,
class
OpenCLDeviceWrapperClass
=
FOpenCLDeviceWrapper
<
KernelClass
,
nullptr
>
#endif
>
class
FGroupTaskStarPUMpiAlgorithm
{
...
...
@@ -53,6 +56,9 @@ protected:
typedef
FGroupTaskStarPUMpiAlgorithm
<
OctreeClass
,
CellContainerClass
,
CellClass
,
KernelClass
,
ParticleGroupClass
,
ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
,
OpenCLDeviceWrapperClass
#endif
>
ThisClass
;
...
...
@@ -103,8 +109,8 @@ protected:
typedef
FStarPUCudaWrapper
<
KernelClass
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
StarPUCudaWrapperClass
;
StarPUCudaWrapperClass
cudaWrapper
;
#endif
#ifdef S
TARPU_US
E_OPENCL
typedef
FStarPUOpenClWrapper
<
CellContainerClass
,
CellClass
,
KernelClass
,
ParticleGroupClass
,
ParticleContain
erClass
>
StarPUOpenClWrapperClass
;
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
typedef
FStarPUOpenClWrapper
<
KernelClass
,
OpenCLDeviceWrapp
erClass
>
StarPUOpenClWrapperClass
;
StarPUOpenClWrapperClass
openclWrapper
;
#endif
...
...
@@ -121,7 +127,7 @@ public:
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
cudaWrapper
(
tree
->
getHeight
()),
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
openclWrapper
(
tree
->
getHeight
()),
#endif
wrapperptr
(
&
wrappers
){
...
...
@@ -153,7 +159,7 @@ public:
});
wrappers
.
set
(
FSTARPU_CUDA_IDX
,
&
cudaWrapper
);
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
FStarPUUtils
::
ExecOnWorkers
(
STARPU_OPENCL
,
[
&
](){
starpu_pthread_mutex_lock
(
&
initMutex
);
openclWrapper
.
initKernel
(
starpu_worker_get_id
(),
inKernels
);
...
...
@@ -238,7 +244,7 @@ protected:
p2m_cl
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2M
(
FSTARPU_OPENCL_IDX
)){
p2m_cl
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
bottomPassCallback
;
p2m_cl
.
where
|=
STARPU_OPENCL
;
...
...
@@ -264,7 +270,7 @@ protected:
m2m_cl
[
idx
].
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2M
(
FSTARPU_OPENCL_IDX
)){
m2m_cl
[
idx
].
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
upwardPassCallback
;
m2m_cl
[
idx
].
where
|=
STARPU_OPENCL
;
...
...
@@ -287,7 +293,7 @@ protected:
l2l_cl
[
idx
].
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportL2L
(
FSTARPU_OPENCL_IDX
)){
l2l_cl
[
idx
].
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
downardPassCallback
;
l2l_cl
[
idx
].
where
|=
STARPU_OPENCL
;
...
...
@@ -317,7 +323,7 @@ protected:
l2p_cl
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportL2P
(
FSTARPU_OPENCL_IDX
)){
l2p_cl
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
mergePassCallback
;
l2p_cl
.
where
|=
STARPU_OPENCL
;
...
...
@@ -341,7 +347,7 @@ protected:
p2p_cl_in
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2P
(
FSTARPU_OPENCL_IDX
)){
p2p_cl_in
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
directInPassCallback
;
p2p_cl_in
.
where
|=
STARPU_OPENCL
;
...
...
@@ -363,7 +369,7 @@ protected:
p2p_cl_inout
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportP2P
(
FSTARPU_OPENCL_IDX
)){
p2p_cl_inout
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
directInoutPassCallback
;
p2p_cl_inout
.
where
|=
STARPU_OPENCL
;
...
...
@@ -387,7 +393,7 @@ protected:
m2l_cl_in
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
m2l_cl_in
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
transferInPassCallback
;
m2l_cl_in
.
where
|=
STARPU_OPENCL
;
...
...
@@ -410,7 +416,7 @@ protected:
m2l_cl_inout
.
where
|=
STARPU_CUDA
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
m2l_cl_inout
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
transferInoutPassCallback
;
m2l_cl_inout
.
where
|=
STARPU_OPENCL
;
...
...
@@ -465,7 +471,7 @@ protected:
p2p_cl_inout_mpi
.
cuda_funcs
[
0
]
=
StarPUCudaWrapperClass
::
directInoutPassCallbackMpi
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
p2p_cl_inout_mpi
.
where
|=
STARPU_OPENCL
;
p2p_cl_inout_mpi
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
directInoutPassCallbackMpi
;
...
...
@@ -489,7 +495,7 @@ protected:
m2l_cl_inout_mpi
.
cuda_funcs
[
0
]
=
StarPUCudaWrapperClass
::
transferInoutPassCallbackMpi
;
}
#endif
#ifdef S
TARPU_US
E_OPENCL
#ifdef S
calFMM_ENABL
E_OPENCL
_KERNEL
if
(
originalCpuKernel
->
supportM2L
(
FSTARPU_OPENCL_IDX
)){
m2l_cl_inout_mpi
.
where
|=
STARPU_OPENCL
;
m2l_cl_inout_mpi
.
opencl_funcs
[
0
]
=
StarPUOpenClWrapperClass
::
transferInoutPassCallbackMpi
;
...
...
Src/GroupTree/FStarPUOpenClWrapper.hpp
View file @
090158ca
...
...
@@ -89,7 +89,7 @@ public:
int
idxLevel
=
0
;
starpu_codelet_unpack_args
(
cl_arg
,
&
worker
,
&
nbSubCellGroups
,
&
idxLevel
);
cl_mem
*
subCellGroupsPtr
[
9
];
cl_mem
subCellGroupsPtr
[
9
];
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
cl_mem
));
size_t
subCellGroupsSize
[
9
];
memset
(
subCellGroupsSize
,
0
,
9
*
sizeof
(
size_t
));
...
...
@@ -123,7 +123,7 @@ public:
cl_mem
outsideInteractionsCl
=
clCreateBuffer
(
kernel
->
getOpenCLContext
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
|
CL_MEM_HOST_NO_ACCESS
,
outsideInteractions
->
size
()
*
sizeof
(
OutOfBlockInteraction
),
outsideInteractions
->
data
(),
&
errcode_ret
);
(
void
*
)
outsideInteractions
->
data
(),
&
errcode_ret
);
FAssertLF
(
outsideInteractionsCl
&&
errcode_ret
==
CL_SUCCESS
);
kernel
->
transferInoutPassPerformMpi
(
currentCellsPtr
,
...
...
@@ -171,7 +171,7 @@ public:
cl_mem
outsideInteractionsCl
=
clCreateBuffer
(
kernel
->
getOpenCLContext
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
|
CL_MEM_HOST_NO_ACCESS
,
outsideInteractions
->
size
()
*
sizeof
(
OutOfBlockInteraction
),
outsideInteractions
->
data
(),
&
errcode_ret
);
(
void
*
)
outsideInteractions
->
data
(),
&
errcode_ret
);
FAssertLF
(
outsideInteractionsCl
&&
errcode_ret
==
CL_SUCCESS
);
kernel
->
transferInoutPassPerform
(
currentCellsPtr
,
...
...
@@ -194,7 +194,7 @@ public:
int
idxLevel
=
0
;
starpu_codelet_unpack_args
(
cl_arg
,
&
worker
,
&
nbSubCellGroups
,
&
idxLevel
);
cl_mem
*
subCellGroupsPtr
[
9
];
cl_mem
subCellGroupsPtr
[
9
];
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
cl_mem
));
size_t
subCellGroupsSize
[
9
];
memset
(
subCellGroupsSize
,
0
,
9
*
sizeof
(
size_t
));
...
...
@@ -228,7 +228,7 @@ public:
cl_mem
outsideInteractionsCl
=
clCreateBuffer
(
kernel
->
getOpenCLContext
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
|
CL_MEM_HOST_NO_ACCESS
,
outsideInteractions
->
size
()
*
sizeof
(
OutOfBlockInteraction
),
outsideInteractions
->
data
(),
&
errcode_ret
);
(
void
*
)
outsideInteractions
->
data
(),
&
errcode_ret
);
FAssertLF
(
outsideInteractionsCl
&&
errcode_ret
==
CL_SUCCESS
);
kernel
->
directInoutPassPerformMpi
(
containersPtr
,
...
...
@@ -267,7 +267,7 @@ public:
cl_mem
outsideInteractionsCl
=
clCreateBuffer
(
kernel
->
getOpenCLContext
(),
CL_MEM_READ_ONLY
|
CL_MEM_COPY_HOST_PTR
|
CL_MEM_HOST_NO_ACCESS
,
outsideInteractions
->
size
()
*
sizeof
(
OutOfBlockInteraction
),
outsideInteractions
->
data
(),
&
errcode_ret
);
(
void
*
)
outsideInteractions
->
data
(),
&
errcode_ret
);
FAssertLF
(
outsideInteractionsCl
&&
errcode_ret
==
CL_SUCCESS
);
kernel
->
directInoutPassPerform
(
containersPtr
,
...
...
Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp
View file @
090158ca
...
...
@@ -22,7 +22,7 @@ protected:
static
void
SetKernelArgs
(
cl_kernel
&
kernel
,
const
int
pos
){
}
template
<
class
ParamClass
,
class
...
Args
>
static
void
SetKernelArgs
(
cl_kernel
*
kernel
,
const
int
pos
,
ParamClass
*
param
,
Args
...
args
){
static
void
SetKernelArgs
(
cl_kernel
&
kernel
,
const
int
pos
,
ParamClass
*
param
,
Args
...
args
){
FAssertLF
(
clSetKernelArg
(
kernel
,
pos
,
sizeof
(
*
param
),
param
)
==
0
);
SetKernelArgs
(
kernel
,
pos
+
1
,
args
...);
}
...
...
@@ -32,6 +32,8 @@ protected:
struct
starpu_opencl_program
opencl_code
;
cl_context
context
;
cl_kernel
kernel_bottomPassPerform
;
cl_command_queue
queue_bottomPassPerform
;
...
...
@@ -68,6 +70,8 @@ public:
workerDevid
=
starpu_worker_get_devid
(
workerId
);
if
(
KernelFilename
){
starpu_opencl_get_context
(
workerDevid
,
&
context
);
const
int
err
=
starpu_opencl_load_opencl_from_file
(
KernelFilename
,
&
opencl_code
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
...
...
@@ -90,7 +94,7 @@ public:
virtual
void
releaseKernel
(){
}
~
FOpenCLDeviceWrapper
(){
virtual
~
FOpenCLDeviceWrapper
(){
// Release
releaseKernel
();
if
(
KernelFilename
){
...
...
@@ -99,86 +103,90 @@ public:
}
}
void
bottomPassPerform
(
unsigned
char
*
leafCellsPtr
,
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
size_t
containersSize
){
SetKernelArgs
(
&
kernel_bottomPassPerform
,
0
,
&
leafCellsPtr
,
&
leafCellsSize
,
&
containersPtr
,
&
containersSize
);
cl_context
&
getOpenCLContext
(){
return
context
;
}
void
bottomPassPerform
(
cl_mem
leafCellsPtr
,
size_t
leafCellsSize
,
cl_mem
containersPtr
,
size_t
containersSize
){
SetKernelArgs
(
kernel_bottomPassPerform
,
0
,
&
leafCellsPtr
,
&
leafCellsSize
,
&
containersPtr
,
&
containersSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_bottomPassPerform
,
kernel_bottomPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
upwardPassPerform
(
unsigned
char
*
currentCellsPtr
,
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
,
size_t
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
){
SetKernelArgs
(
&
kernel_upwardPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
subCellGroupsPtr
,
&
subCellGroupsSize
,
&
nbSubCellGroups
,
&
idxLevel
);
void
upwardPassPerform
(
cl_mem
currentCellsPtr
,
size_t
currentCellsSize
,
cl_mem
subCellGroupsPtr
[
9
]
,
size_t
subCellGroupsSize
[
9
]
,
int
nbSubCellGroups
,
int
idxLevel
){
SetKernelArgs
(
kernel_upwardPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
subCellGroupsPtr
,
&
subCellGroupsSize
,
&
nbSubCellGroups
,
&
idxLevel
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_upwardPassPerform
,
kernel_upwardPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
transferInoutPassPerformMpi
(
unsigned
char
*
currentCellsPtr
,
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
size_t
externalCellsSize
,
int
idxLevel
,
cl_mem
outsideInteractionsCl
,
void
transferInoutPassPerformMpi
(
cl_mem
currentCellsPtr
,
size_t
currentCellsSize
,
cl_mem
externalCellsPtr
,
size_t
externalCellsSize
,
int
idxLevel
,
cl_mem
outsideInteractionsCl
,
size_t
outsideInteractionsSize
){
SetKernelArgs
(
&
kernel_transferInoutPassPerformMpi
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
externalCellsPtr
,
&
externalCellsSize
,
&
idxLevel
,
&
outsideInteractionsCl
,
SetKernelArgs
(
kernel_transferInoutPassPerformMpi
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
externalCellsPtr
,
&
externalCellsSize
,
&
idxLevel
,
&
outsideInteractionsCl
,
&
outsideInteractionsSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_transferInoutPassPerformMpi
,
kernel_transferInoutPassPerformMpi
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
transferInPassPerform
(
unsigned
char
*
currentCellsPtr
,
size_t
currentCellsSize
,
int
idxLevel
){
SetKernelArgs
(
&
kernel_transferInPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
idxLevel
);
void
transferInPassPerform
(
cl_mem
currentCellsPtr
,
size_t
currentCellsSize
,
int
idxLevel
){
SetKernelArgs
(
kernel_transferInPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
idxLevel
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_transferInPassPerform
,
kernel_transferInPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
transferInoutPassPerform
(
unsigned
char
*
currentCellsPtr
,
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
size_t
externalCellsSize
,
int
idxLevel
,
cl_mem
outsideInteractionsCl
,
void
transferInoutPassPerform
(
cl_mem
currentCellsPtr
,
size_t
currentCellsSize
,
cl_mem
externalCellsPtr
,
size_t
externalCellsSize
,
int
idxLevel
,
cl_mem
outsideInteractionsCl
,
size_t
outsideInteractionsSize
){
SetKernelArgs
(
&
kernel_transferInoutPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
externalCellsPtr
,
&
externalCellsSize
,
&
idxLevel
,
&
outsideInteractionsCl
,
&
outsideInteractionsSize
);
SetKernelArgs
(
kernel_transferInoutPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
externalCellsPtr
,
&
externalCellsSize
,
&
idxLevel
,
&
outsideInteractionsCl
,
&
outsideInteractionsSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_transferInoutPassPerform
,
kernel_transferInoutPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
downardPassPerform
(
unsigned
char
*
currentCellsPtr
,
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
,
size_t
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
){
SetKernelArgs
(
&
kernel_downardPassPerform
,
0
,
&
currentCellsPtr
,
void
downardPassPerform
(
cl_mem
currentCellsPtr
,
size_t
currentCellsSize
,
cl_mem
subCellGroupsPtr
[
9
]
,
size_t
subCellGroupsSize
[
9
]
,
int
nbSubCellGroups
,
int
idxLevel
){
SetKernelArgs
(
kernel_downardPassPerform
,
0
,
&
currentCellsPtr
,
&
currentCellsSize
,
&
subCellGroupsPtr
,
&
subCellGroupsSize
,
&
nbSubCellGroups
,
&
idxLevel
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_downardPassPerform
,
kernel_downardPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
directInoutPassPerformMpi
(
unsigned
char
*
containersPtr
,
size_t
containersSize
,
unsigned
char
*
externalContainersPtr
,
size_t
externalContainersSize
,
cl_mem
outsideInteractionsCl
,
void
directInoutPassPerformMpi
(
cl_mem
containersPtr
,
size_t
containersSize
,
cl_mem
externalContainersPtr
,
size_t
externalContainersSize
,
cl_mem
outsideInteractionsCl
,
size_t
outsideInteractionsSize
){
SetKernelArgs
(
&
kernel_directInoutPassPerformMpi
,
0
,
&
containersPtr
,
SetKernelArgs
(
kernel_directInoutPassPerformMpi
,
0
,
&
containersPtr
,
&
containersSize
,
&
externalContainersPtr
,
&
externalContainersSize
,
&
outsideInteractionsCl
,
&
outsideInteractionsSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_directInoutPassPerformMpi
,
kernel_directInoutPassPerformMpi
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
directInPassPerform
(
unsigned
char
*
containersPtr
,
size_t
containerSize
){
SetKernelArgs
(
&
kernel_directInPassPerform
,
0
,
&
containersPtr
,
&
containerSize
);
void
directInPassPerform
(
cl_mem
containersPtr
,
size_t
containerSize
){
SetKernelArgs
(
kernel_directInPassPerform
,
0
,
&
containersPtr
,
&
containerSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_directInPassPerform
,
kernel_directInPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
directInoutPassPerform
(
unsigned
char
*
containersPtr
,
size_t
containerSize
,
unsigned
char
*
externalContainersPtr
,
size_t
externalContainersSize
,
cl_mem
outsideInteractionsCl
,
void
directInoutPassPerform
(
cl_mem
containersPtr
,
size_t
containerSize
,
cl_mem
externalContainersPtr
,
size_t
externalContainersSize
,
cl_mem
outsideInteractionsCl
,
size_t
outsideInteractionsSize
){
SetKernelArgs
(
&
kernel_directInoutPassPerform
,
0
,
&
containersPtr
,
SetKernelArgs
(
kernel_directInoutPassPerform
,
0
,
&
containersPtr
,
&
containerSize
,
&
externalContainersPtr
,
&
externalContainersSize
,
&
outsideInteractionsCl
,
&
outsideInteractionsSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_directInoutPassPerform
,
kernel_directInoutPassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
}
void
mergePassPerform
(
unsigned
char
*
leafCellsPtr
,
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
size_t
containersSize
){
SetKernelArgs
(
&
kernel_mergePassPerform
,
0
,
&
leafCellsPtr
,
&
leafCellsSize
,
&
containersPtr
,
&
containersSize
);
void
mergePassPerform
(
cl_mem
leafCellsPtr
,
size_t
leafCellsSize
,
cl_mem
containersPtr
,
size_t
containersSize
){
SetKernelArgs
(
kernel_mergePassPerform
,
0
,
&
leafCellsPtr
,
&
leafCellsSize
,
&
containersPtr
,
&
containersSize
);
size_t
dim
=
1
;
const
int
err
=
clEnqueueNDRangeKernel
(
queue_mergePassPerform
,
kernel_mergePassPerform
,
1
,
NULL
,
&
dim
,
NULL
,
0
,
NULL
,
NULL
);
if
(
err
!=
CL_SUCCESS
)
STARPU_OPENCL_REPORT_ERROR
(
err
);
...
...
Tests/noDist/testBlockedWithOpenCLAlgorithm.cpp
0 → 100644
View file @
090158ca
// Keep in private GIT
// @SCALFMM_PRIVATE
// @FUSE_STARPU
// @FUSE_OPENCL
#include
"../../Src/Utils/FGlobal.hpp"
#include
"../../Src/GroupTree/FGroupTree.hpp"
#include
"../../Src/Components/FSimpleLeaf.hpp"
#include
"../../Src/Containers/FVector.hpp"
#include
"../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
#include
"../../Src/Utils/FMath.hpp"
#include
"../../Src/Utils/FMemUtils.hpp"
#include
"../../Src/Utils/FParameters.hpp"
#include
"../../Src/Files/FRandomLoader.hpp"
#include
"../../Src/GroupTree/FGroupSeqAlgorithm.hpp"
#include
"../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include
"../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
#include
"../../Src/GroupTree/FGroupTaskAlgorithm.hpp"
#include
"../../Src/Utils/FParameterNames.hpp"
#include
"../../Src/Components/FTestParticleContainer.hpp"
#include
"../../Src/Components/FTestCell.hpp"
#include
"../../Src/Components/FTestKernels.hpp"
#include
"../../Src/GroupTree/FGroupTestParticleContainer.hpp"
#include
"../../Src/Files/FFmaGenericLoader.hpp"
#include
"../../Src/Core/FFmmAlgorithm.hpp"
#include
"../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int
main
(
int
argc
,
char
*
argv
[]){
const
FParameterNames
LocalOptionBlocSize
{
{
"-bs"
},
"The size of the block of the blocked tree"
};
FHelpDescribeAndExit
(
argc
,
argv
,
"Test the blocked tree by counting the particles."
,
FParameterDefinitions
::
OctreeHeight
,
FParameterDefinitions
::
NbThreads
,
FParameterDefinitions
::
NbParticles
,
LocalOptionBlocSize
);
// Initialize the types
typedef
FTestCell
GroupCellClass
;
typedef
FGroupTestParticleContainer
GroupContainerClass
;
typedef
FGroupTree
<
GroupCellClass
,
GroupContainerClass
,
2
,
long
long
int
>
GroupOctreeClass
;
typedef
FStarPUAllYesCapacities
<
FTestKernels
<
GroupCellClass
,
GroupContainerClass
>>
GroupKernelClass
;
typedef
FGroupTaskStarPUAlgorithm
<
GroupOctreeClass
,
typename
GroupOctreeClass
::
CellGroupClass
,
GroupCellClass
,
GroupKernelClass
,
typename
GroupOctreeClass
::
ParticleGroupClass
,
GroupContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
,
FCudaGroupOfCells
<
0
>
,
FCudaGroupOfParticles
<
0
,
int
>
,
FCudaGroupAttachedLeaf
<
0
,
int
>
,
FCudaEmptyKernel
<>
#endif
,
FOpenCLDeviceWrapper
<
GroupKernelClass
,
nullptr
>
>
GroupAlgorithm
;
typedef
FTestCell
CellClass
;
typedef
FTestParticleContainer
ContainerClass
;
typedef
FSimpleLeaf
<
ContainerClass
>
LeafClass
;
typedef
FOctree
<
CellClass
,
ContainerClass
,
LeafClass
>
OctreeClass
;
typedef
FTestKernels
<
CellClass
,
ContainerClass
>
KernelClass
;
// FFmmAlgorithmTask FFmmAlgorithmThread
typedef
FFmmAlgorithm
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
FmmClass
;
// Get params
const
int
NbLevels
=
FParameters
::
getValue
(
argc
,
argv
,
FParameterDefinitions
::
OctreeHeight
.
options
,
5
);
const
int
groupSize
=
FParameters
::
getValue
(
argc
,
argv
,
LocalOptionBlocSize
.
options
,
250
);
//#define LOAD_FILE
#ifndef LOAD_FILE
const
int
NbParticles
=
FParameters
::
getValue
(
argc
,
argv
,
FParameterDefinitions
::
NbParticles
.
options
,
20
);
FRandomLoader
loader
(
NbParticles
,
1.0
,
FPoint
(
0
,
0
,
0
),
0
);
#else
// Load the particles
const
char
*
const
filename
=
FParameters
::
getStr
(
argc
,
argv
,
FParameterDefinitions
::
InputFile
.
options
,
"../Data/test20k.fma"
);
FFmaGenericLoader
loader
(
filename
);
#endif
FAssertLF
(
loader
.
isOpen
());
// Usual octree
OctreeClass
tree
(
NbLevels
,
2
,
loader
.
getBoxWidth
(),
loader
.
getCenterOfBox
());
FP2PParticleContainer
<>
allParticles
;
for
(
int
idxPart
=
0
;
idxPart
<
loader
.
getNumberOfParticles
()
;
++
idxPart
){
FPoint
particlePosition
;
#ifndef LOAD_FILE
loader
.
fillParticle
(
&
particlePosition
);
#else
FReal
ph
;
loader
.
fillParticle
(
&
particlePosition
,
&
ph
);
#endif
allParticles
.
push
(
particlePosition
);
tree
.
insert
(
particlePosition
);
}
// Put the data into the tree
//GroupOctreeClass groupedTree(NbLevels, groupSize, &tree);
GroupOctreeClass
groupedTree
(
NbLevels
,
loader
.
getBoxWidth
(),
loader
.
getCenterOfBox
(),
groupSize
,
&
allParticles
);
groupedTree
.
printInfoBlocks
();
// Check tree structure at leaf level
groupedTree
.
forEachCellLeaf
<
FGroupTestParticleContainer
>
([
&
](
GroupCellClass
*
gcell
,
FGroupTestParticleContainer
*
gleaf
){
const
ContainerClass
*
src
=
tree
.
getLeafSrc
(
gcell
->
getMortonIndex
());
if
(
src
==
nullptr
){
std
::
cout
<<
"[PartEmpty] Error cell should not exist "
<<
gcell
->
getMortonIndex
()
<<
"
\n
"
;
}
else
{
if
(
src
->
getNbParticles
()
!=
gleaf
->
getNbParticles
()){
std
::
cout
<<
"[Part] Nb particles is different at index "
<<
gcell
->
getMortonIndex
()
<<
" is "
<<
gleaf
->
getNbParticles
()
<<
" should be "
<<
src
->
getNbParticles
()
<<
"
\n
"
;
}
}
});
// Run the algorithm
GroupKernelClass
groupkernel
;
GroupAlgorithm
groupalgo
(
&
groupedTree
,
&
groupkernel
);
groupalgo
.
execute
();
// Usual algorithm
KernelClass
kernels
;
// FTestKernels FBasicKernels
FmmClass
algo
(
&
tree
,
&
kernels
);
//FFmmAlgorithm FFmmAlgorithmThread
algo
.
execute
();
// Validate the result
groupedTree
.
forEachCellLeaf
<
FGroupTestParticleContainer
>
([
&
](
GroupCellClass
*
cell
,
FGroupTestParticleContainer
*
leaf
){
const
int
nbPartsInLeaf
=
leaf
->
getNbParticles
();
if
(
cell
->
getDataUp
()
!=
nbPartsInLeaf
){
std
::
cout
<<
"[P2M] Error a Cell has "
<<
cell
->
getDataUp
()
<<
" (it should be "
<<
nbPartsInLeaf
<<
")
\n
"
;
}
});
groupedTree
.
forEachCellLeaf
<
FGroupTestParticleContainer
>
([
&
](
GroupCellClass
*
cell
,
FGroupTestParticleC