Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
ScalFMM
Commits
a51a0b2a
Commit
a51a0b2a
authored
Mar 17, 2015
by
BRAMAS Berenger
Browse files
update starpu cuda
parent
cef64334
Changes
4
Hide whitespace changes
Inline
Side-by-side
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
View file @
a51a0b2a
...
...
@@ -2,7 +2,7 @@
#include
"FCudaDeviceWrapper.hpp"
#include
"FCudaTreeCoordinate.hpp"
#include
"FCudaStructParams.hpp"
static
const
int
nbCudaThreads
=
32
;
static
const
int
nbCudaBlocks
=
1
;
...
...
@@ -67,12 +67,12 @@ __host__ void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__upwardPassPerform
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
){
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
CellContainerClass
subCellGroups
[
9
];
for
(
int
idx
=
0
;
idx
<
nbSubCellGroups
;
++
idx
){
subCellGroups
[
idx
].
reset
(
subCellGroupsPtr
[
idx
],
subCellGroupsSize
[
idx
]);
subCellGroups
[
idx
].
reset
(
subCellGroupsPtr
.
values
[
idx
],
subCellGroupsSize
.
values
[
idx
]);
}
FCudaAssertLF
(
nbSubCellGroups
!=
0
);
...
...
@@ -107,7 +107,7 @@ __global__ void FCuda__upwardPassPerform(unsigned char* currentCellsPtr, std::si
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__upwardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
){
FCuda__upwardPassPerform
...
...
@@ -290,13 +290,13 @@ __host__ void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, s
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__downardPassPerform
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
){
FCudaAssertLF
(
nbSubCellGroups
!=
0
);
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
CellContainerClass
subCellGroups
[
9
];
for
(
int
idx
=
0
;
idx
<
nbSubCellGroups
;
++
idx
){
subCellGroups
[
idx
].
reset
(
subCellGroupsPtr
[
idx
],
subCellGroupsSize
[
idx
]);
subCellGroups
[
idx
].
reset
(
subCellGroupsPtr
.
values
[
idx
],
subCellGroupsSize
.
values
[
idx
]);
}
const
MortonIndex
blockStartIdx
=
FCudaMax
(
currentCells
.
getStartingIndex
(),
...
...
@@ -330,7 +330,7 @@ __global__ void FCuda__downardPassPerform(unsigned char* currentCellsPtr, std::s
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__downardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
){
FCuda__downardPassPerform
...
...
@@ -566,7 +566,7 @@ template void FCuda__bottomPassCallback<FCudaEmptyCell, FCudaGroupOfCells<FCudaE
template
void
FCuda__upwardPassCallback
<
FCudaEmptyCell
,
FCudaGroupOfCells
<
FCudaEmptyCell
>,
FCudaGroupOfParticles
<
0
,
int
>
,
FCudaGroupAttachedLeaf
<
0
,
int
>
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>
>
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>*
kernel
,
cudaStream_t
currentStream
);
template
void
FCuda__transferInoutPassCallbackMpi
<
FCudaEmptyCell
,
FCudaGroupOfCells
<
FCudaEmptyCell
>,
FCudaGroupOfParticles
<
0
,
int
>
,
FCudaGroupAttachedLeaf
<
0
,
int
>
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>
>
...
...
@@ -587,7 +587,7 @@ template void FCuda__transferInoutPassCallback<FCudaEmptyCell, FCudaGroupOfCells
template
void
FCuda__downardPassCallback
<
FCudaEmptyCell
,
FCudaGroupOfCells
<
FCudaEmptyCell
>,
FCudaGroupOfParticles
<
0
,
int
>
,
FCudaGroupAttachedLeaf
<
0
,
int
>
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>
>
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>*
kernel
,
cudaStream_t
currentStream
);
template
void
FCuda__directInoutPassCallbackMpi
<
FCudaEmptyCell
,
FCudaGroupOfCells
<
FCudaEmptyCell
>,
FCudaGroupOfParticles
<
0
,
int
>
,
FCudaGroupAttachedLeaf
<
0
,
int
>
,
FCudaEmptyKernel
<
FCudaEmptyCell
,
FCudaGroupAttachedLeaf
<
0
,
int
>>
>
...
...
@@ -628,7 +628,7 @@ template void FCuda__bottomPassCallback<FTestCellPODCore, FCudaGroupOfCells<FTes
template
void
FCuda__upwardPassCallback
<
FTestCellPODCore
,
FCudaGroupOfCells
<
FTestCellPODCore
>,
FCudaGroupOfParticles
<
2
,
long
long
int
>
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>
>
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>*
kernel
,
cudaStream_t
currentStream
);
template
void
FCuda__transferInoutPassCallbackMpi
<
FTestCellPODCore
,
FCudaGroupOfCells
<
FTestCellPODCore
>,
FCudaGroupOfParticles
<
2
,
long
long
int
>
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>
>
...
...
@@ -649,7 +649,7 @@ template void FCuda__transferInoutPassCallback<FTestCellPODCore, FCudaGroupOfCel
template
void
FCuda__downardPassCallback
<
FTestCellPODCore
,
FCudaGroupOfCells
<
FTestCellPODCore
>,
FCudaGroupOfParticles
<
2
,
long
long
int
>
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>
>
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>*
kernel
,
cudaStream_t
currentStream
);
template
void
FCuda__directInoutPassCallbackMpi
<
FTestCellPODCore
,
FCudaGroupOfCells
<
FTestCellPODCore
>,
FCudaGroupOfParticles
<
2
,
long
long
int
>
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>
,
FTestCudaKernels
<
FTestCellPODCore
,
FCudaGroupAttachedLeaf
<
2
,
long
long
int
>>
>
...
...
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
View file @
a51a0b2a
...
...
@@ -5,6 +5,7 @@
#include
"../../Utils/FGlobal.hpp"
#include
"../FOutOfBlockInteraction.hpp"
#include
"FCudaStructParams.hpp"
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__bottomPassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
...
...
@@ -13,7 +14,7 @@ void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCell
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__upwardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
...
...
@@ -34,7 +35,7 @@ void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__downardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
]
,
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
,
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
,
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
...
...
Src/GroupTree/Cuda/FCudaStructParams.hpp
0 → 100644
View file @
a51a0b2a
#ifndef FCUDASTRUCTPARAMS_HPP
#define FCUDASTRUCTPARAMS_HPP
#include
"../FStarPUDefaultAlign.hpp"
#include
"FCudaGlobal.hpp"
template
<
class
ArrayType
,
const
int
Size
>
struct
alignas
(
FStarPUDefaultAlign
::
StructAlign
)
FCudaParams
{
ArrayType
values
[
Size
];
};
#endif // FCUDASTRUCTPARAMS_HPP
Src/GroupTree/FStarPUCudaWrapper.hpp
View file @
a51a0b2a
...
...
@@ -101,13 +101,13 @@ public:
int
idxLevel
=
0
;
starpu_codelet_unpack_args
(
cl_arg
,
&
worker
,
&
nbSubCellGroups
,
&
idxLevel
);
unsigned
char
*
subCellGroupsPtr
[
9
]
;
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
unsigned
char
*
));
size_t
subCellGroupsSize
[
9
]
;
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
unsigned
char
*
));
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
;
memset
(
&
subCellGroupsPtr
,
0
,
sizeof
(
subCellGroupsPtr
));
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
;
memset
(
&
subCellGroupsPtr
,
0
,
sizeof
(
subCellGroupsSize
));
for
(
int
idxSubGroup
=
0
;
idxSubGroup
<
nbSubCellGroups
;
++
idxSubGroup
){
subCellGroupsPtr
[
idxSubGroup
]
=
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
idxSubGroup
+
1
]));
subCellGroupsSize
[
idxSubGroup
]
=
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
idxSubGroup
+
1
]);
subCellGroupsPtr
.
values
[
idxSubGroup
]
=
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
idxSubGroup
+
1
]));
subCellGroupsSize
.
values
[
idxSubGroup
]
=
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
idxSubGroup
+
1
]);
}
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
...
...
@@ -199,13 +199,13 @@ public:
int
idxLevel
=
0
;
starpu_codelet_unpack_args
(
cl_arg
,
&
worker
,
&
nbSubCellGroups
,
&
idxLevel
);
unsigned
char
*
subCellGroupsPtr
[
9
]
;
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
unsigned
char
*
));
size_t
subCellGroupsSize
[
9
]
;
memset
(
subCellGroupsPtr
,
0
,
9
*
sizeof
(
size
_t
));
FCudaParams
<
unsigned
char
*
,
9
>
subCellGroupsPtr
;
memset
(
&
subCellGroupsPtr
,
0
,
sizeof
(
subCellGroupsPtr
));
FCudaParams
<
std
::
size_t
,
9
>
subCellGroupsSize
;
memset
(
&
subCellGroupsPtr
,
0
,
sizeof
(
s
ubCellGroupsS
ize
));
for
(
int
idxSubGroup
=
0
;
idxSubGroup
<
nbSubCellGroups
;
++
idxSubGroup
){
subCellGroupsPtr
[
idxSubGroup
]
=
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
idxSubGroup
+
1
]));
subCellGroupsSize
[
idxSubGroup
]
=
(
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
idxSubGroup
+
1
]));
subCellGroupsPtr
.
values
[
idxSubGroup
]
=
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
idxSubGroup
+
1
]));
subCellGroupsSize
.
values
[
idxSubGroup
]
=
(
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
idxSubGroup
+
1
]));
}
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment