Commit 570c1f42 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Update starpu cuda

parent 6da8419e
This diff is collapsed.
......@@ -9,54 +9,54 @@
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel);
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
CudaKernelClass* kernel, int nbSubCellGroups, int idxLevel);
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
CudaKernelClass* kernel, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions);
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
CudaKernelClass* kernel, int idxLevel);
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
CudaKernelClass* kernel, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions);
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
CudaKernelClass* kernel, int nbSubCellGroups, int idxLevel);
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
CudaKernelClass* kernel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight);
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, const int treeHeight);
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
CudaKernelClass* kernel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight);
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel);
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CudaKernelClass>
CudaKernelClass* FCuda__BuildCudaKernel(void*);
......
......@@ -85,7 +85,7 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel);
kernel, starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -115,7 +115,7 @@ public:
FCuda__upwardPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
kernel, nbSubCellGroups, idxLevel);
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -139,7 +139,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, idxLevel, outsideInteractions->data(), outsideInteractions->size());
idxLevel, outsideInteractions->data(), outsideInteractions->size(), kernel,
starpu_cuda_get_local_stream());
}
#endif
/////////////////////////////////////////////////////////////////////////////////////
......@@ -159,7 +160,7 @@ public:
FCuda__transferInPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
kernel, idxLevel);
idxLevel, kernel, starpu_cuda_get_local_stream());
}
static void transferInoutPassCallback(void *buffers[], void *cl_arg){
......@@ -182,7 +183,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, idxLevel, outsideInteractions->data(), outsideInteractions->size());
idxLevel, outsideInteractions->data(), outsideInteractions->size(), kernel,
starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -211,7 +213,7 @@ public:
FCuda__downardPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
kernel, nbSubCellGroups, idxLevel);
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
/// Direct Pass MPI
......@@ -234,7 +236,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, outsideInteractions->data(), outsideInteractions->size(), worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight);
outsideInteractions->data(), outsideInteractions->size(),
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight ,kernel, starpu_cuda_get_local_stream());
}
#endif
/////////////////////////////////////////////////////////////////////////////////////
......@@ -251,7 +254,7 @@ public:
FCuda__directInPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
kernel, worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight, kernel, starpu_cuda_get_local_stream());
}
static void directInoutPassCallback(void *buffers[], void *cl_arg){
......@@ -270,7 +273,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, outsideInteractions->data(), outsideInteractions->size(), worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight);
outsideInteractions->data(), outsideInteractions->size(), worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight,
kernel, starpu_cuda_get_local_stream());
}
......@@ -293,7 +297,7 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel);
kernel, starpu_cuda_get_local_stream());
}
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment