Une nouvelle version du portail de gestion des comptes externes sera mise en production lundi 09 août. Elle permettra d'allonger la validité d'un compte externe jusqu'à 3 ans. Pour plus de détails sur cette version consulter : https://doc-si.inria.fr/x/FCeS

Commit a3dc4cf7 authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

update cuda interface

parent 3b1f85c9
......@@ -147,11 +147,7 @@ __global__ void FCuda__transferInoutPassPerformMpi(unsigned char* currentCellsP
typename CellContainerClass::CompleteCellClass cell = currentCells.getDownCell(outsideInteractions[outInterIdx].insideIdxInBlock);
FCudaAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
typename CellContainerClass::CompleteCellClass interactions[343];
memset(interactions, 0, 343*sizeof(interactions[0]));
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = interCell;
const int counter = 1;
kernel->M2L( cell , interactions, counter, idxLevel);
kernel->M2L( cell , &interCell, &outsideInteractions[outInterIdx].relativeOutPosition, 1, idxLevel);
}
}
}
......@@ -208,8 +204,7 @@ __global__ void FCuda__transferInPassPerform(unsigned char* currentCellsPtr, st
const int3 coord = (FCudaTreeCoordinate::ConvertCoordinate(cell.symb->coordinates));
int counter = FCudaTreeCoordinate::GetInteractionNeighbors(coord, idxLevel,interactionsIndexes,interactionsPosition);
typename CellContainerClass::CompleteCellClass interactions[343];
memset(interactions, 0, 343*sizeof(interactions[0]));
typename CellContainerClass::CompleteCellClass interactions[189];
int counterExistingCell = 0;
for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
......@@ -217,15 +212,14 @@ __global__ void FCuda__transferInPassPerform(unsigned char* currentCellsPtr, st
const int cellPos = currentCells.getCellIndex(interactionsIndexes[idxInter]);
if(cellPos != -1){
typename CellContainerClass::CompleteCellClass interCell = currentCells.getUpCell(cellPos);
FCudaAssertLF(interCell.symb->mortonIndex == interactionsIndexes[idxInter]);
FCudaAssertLF(interactions[interactionsPosition[idxInter]].symb == nullptr);
interactions[interactionsPosition[idxInter]] = interCell;
interactions[interactionsPosition[counterExistingCell]] = interCell;
interactionsPosition[counterExistingCell] = interactionsPosition[idxInter];
counterExistingCell += 1;
}
}
}
kernel->M2L( cell , interactions, counterExistingCell, idxLevel);
kernel->M2L( cell , interactions, interactionsPosition, counterExistingCell, idxLevel);
}
}
......@@ -262,24 +256,25 @@ __global__ void FCuda__transferInoutPassPerform(unsigned char* currentCellsPtr,
CellContainerClass currentCells(currentCellsPtr, currentCellsSize, nullptr, currentCellsDownPtr);
CellContainerClass cellsOther(externalCellsPtr, externalCellsSize, externalCellsUpPtr, nullptr);
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
const int cellPos = cellsOther.getCellIndex(outsideInteractions[outInterIdx].outIndex);
if(cellPos != -1){
typename CellContainerClass::CompleteCellClass interCell = cellsOther.getCompleteCell(outsideInteractions[outInterIdx].outIndex);
if(mode == 1){
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
typename CellContainerClass::CompleteCellClass interCell = cellsOther.getUpCell(outsideInteractions[outInterIdx].outsideIdxInBlock);
FCudaAssertLF(interCell.symb->mortonIndex == outsideInteractions[outInterIdx].outIndex);
typename CellContainerClass::CompleteCellClass cell = currentCells.getCompleteCell(outsideInteractions[outInterIdx].insideIdxInBlock);
FCudaAssertLF(cell.symb);
typename CellContainerClass::CompleteCellClass cell = currentCells.getDownCell(outsideInteractions[outInterIdx].insideIdxInBlock);
FCudaAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
typename CellContainerClass::CompleteCellClass interactions[343];
memset(interactions, 0, 343*sizeof(interactions[0]));
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = interCell;
const int counter = 1;
kernel->M2L( cell , interactions, counter, idxLevel);
kernel->M2L( cell , &interCell, &outsideInteractions[outInterIdx].relativeOutPosition, 1, idxLevel);
}
}
else{
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
typename CellContainerClass::CompleteCellClass cell = cellsOther.getUpCell(outsideInteractions[outInterIdx].insideIdxInBlock);
FCudaAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
typename CellContainerClass::CompleteCellClass interCell = currentCells.getDownCell(outsideInteractions[outInterIdx].outsideIdxInBlock);
FCudaAssertLF(interCell.symb->mortonIndex == outsideInteractions[outInterIdx].outIndex);
interactions[outsideInteractions[outInterIdx].relativeOutPosition].symb = nullptr;
interactions[FMGetOppositeInterIndex(outsideInteractions[outInterIdx].relativeOutPosition)] = cell;
kernel->M2L( interCell , interactions, counter, idxLevel);
const int otherPosition = FMGetOppositeInterIndex(outsideInteractions[outInterIdx].relativeOutPosition);
kernel->M2L( interCell , &cell, &otherPosition, 1, idxLevel);
}
}
}
......@@ -399,11 +394,9 @@ __global__ void FCuda__directInoutPassPerformMpi(unsigned char* containersPtr, s
if(leafPos != -1){
ParticleGroupClass interParticles = containersOther.template getLeaf<ParticleGroupClass>(leafPos);
ParticleGroupClass particles = containers.template getLeaf<ParticleGroupClass>(outsideInteractions[outInterIdx].insideIdxInBlock);
ParticleGroupClass* interactions[27];
memset(interactions, 0, 27*sizeof(ParticleGroupClass*));
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = &interParticles;
const int counter = 1;
kernel->P2PRemote( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), &particles, &particles , interactions, counter);
kernel->P2PRemote( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1),
&particles, &particles , &interParticles, &outsideInteractions[outInterIdx].relativeOutPosition, 1);
}
}
}
......@@ -459,9 +452,7 @@ __global__ void FCuda__directInPassPerform(unsigned char* containersPtr, std::si
const int3 coord = FCudaTreeCoordinate::GetPositionFromMorton(mindex, treeHeight-1);
int counter = FCudaTreeCoordinate::GetNeighborsIndexes(coord, treeHeight,interactionsIndexes,interactionsPosition);
ParticleGroupClass interactionsObjects[27];
ParticleGroupClass* interactions[27];
memset(interactions, 0, 27*sizeof(ParticleGroupClass*));
ParticleGroupClass interactionsObjects[26];
int counterExistingCell = 0;
for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
......@@ -469,14 +460,13 @@ __global__ void FCuda__directInPassPerform(unsigned char* containersPtr, std::si
const int leafPos = containers.getLeafIndex(interactionsIndexes[idxInter]);
if(leafPos != -1){
interactionsObjects[counterExistingCell] = containers.template getLeaf<ParticleGroupClass>(leafPos);
FCudaAssertLF(interactions[interactionsPosition[idxInter]] == nullptr);
interactions[interactionsPosition[idxInter]] = &interactionsObjects[counterExistingCell];
interactionsPosition[counterExistingCell] = interactionsPosition[idxInter];
counterExistingCell += 1;
}
}
}
kernel->P2P( coord, &particles, &particles , interactions, counterExistingCell);
kernel->P2P( coord, &particles, &particles , interactionsObjects, interactionsPosition, counterExistingCell);
}
}
......@@ -516,15 +506,13 @@ __global__ void FCuda__directInoutPassPerform(unsigned char* containersPtr, std:
FCudaAssertLF(containersOther.getLeafMortonIndex(leafPos) == outsideInteractions[outInterIdx].outIndex);
FCudaAssertLF(containers.getLeafMortonIndex(outsideInteractions[outInterIdx].insideIdxInBlock) == outsideInteractions[outInterIdx].insideIndex);
ParticleGroupClass* interactions[27];
memset(interactions, 0, 27*sizeof(ParticleGroupClass*));
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = &interParticles;
const int counter = 1;
kernel->P2PRemote( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), &particles, &particles , interactions, counter);
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = nullptr;
interactions[FMGetOppositeNeighIndex(outsideInteractions[outInterIdx].relativeOutPosition)] = &particles;
kernel->P2PRemote( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].outIndex, treeHeight-1), &interParticles, &interParticles , interactions, counter);
kernel->P2POuter( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1),
&particles , &interParticles, &outsideInteractions[outInterIdx].relativeOutPosition, 1);
const int otherPosition = FMGetOppositeNeighIndex(outsideInteractions[outInterIdx].relativeOutPosition);
kernel->P2POuter( FCudaTreeCoordinate::GetPositionFromMorton(outsideInteractions[outInterIdx].outIndex, treeHeight-1),
&interParticles , &particles, &otherPosition, 1);
}
}
}
......
......@@ -22,7 +22,8 @@ public:
__device__ void M2M(CellClass /*pole*/, const CellClass /*child*/[8], const int /*level*/) {
}
__device__ void M2L(CellClass /*pole*/, const CellClass /*distantNeighbors*/[343],
__device__ void M2L(CellClass /*pole*/, const CellClass* /*distantNeighbors*/,
const int* /*neighPositions*/,
const int /*size*/, const int /*level*/) {
}
......@@ -34,12 +35,20 @@ public:
__device__ void P2P(const int3& ,
ContainerClass* const /*targets*/, const ContainerClass* const /*sources*/,
ContainerClass* const /*directNeighborsParticles*/[27], const int ){
ContainerClass* const /*directNeighborsParticles*/,
const int* /*neighborPositions*/, const int ){
}
__device__ void P2POuter(const int3& ,
ContainerClass* const /*targets*/,
ContainerClass* const /*directNeighborsParticles*/,
const int* /*neighborPositions*/,const int ){
}
__device__ void P2PRemote(const int3& ,
ContainerClass* const /*targets*/, const ContainerClass* const /*sources*/,
ContainerClass* const /*directNeighborsParticles*/[27], const int ){
ContainerClass* const /*directNeighborsParticles*/,
const int* /*neighborPositions*/,const int ){
}
__host__ static FCudaEmptyKernel* InitKernelKernel(void*){
......
......@@ -59,7 +59,8 @@ public:
__device__ void M2M(CellClass /*pole*/, const CellClass /*child*/[8], const int /*level*/) {
}
__device__ void M2L(CellClass /*pole*/, const CellClass /*distantNeighbors*/[343],
__device__ void M2L(CellClass /*pole*/, const CellClass* /*distantNeighbors*/,
const int* /*neighPositions*/,
const int /*size*/, const int /*level*/) {
}
......@@ -71,9 +72,10 @@ public:
__device__ void P2P(const int3& pos,
ContainerClass* const targets, const ContainerClass* const sources,
ContainerClass* const directNeighborsParticles[27], const int counter){
ContainerClass* const directNeighborsParticles,
const int* neighborPositions, const int counter){
// Compute with other
P2PRemote(pos, targets, sources, directNeighborsParticles, counter);
P2PRemote(pos, targets, sources, directNeighborsParticles, neighborPositions, counter);
// Compute inside
const int nbLoops = (targets->getNbParticles()+blockDim.x-1)/blockDim.x;
......@@ -168,75 +170,148 @@ public:
__device__ void P2PRemote(const int3& ,
ContainerClass* const targets, const ContainerClass* const /*sources*/,
ContainerClass* const directNeighborsParticles[27], const int ){
for(int idxNeigh = 0 ; idxNeigh < 27 ; ++idxNeigh){
if(directNeighborsParticles[idxNeigh]){
const int nbLoops = (targets->getNbParticles()+blockDim.x-1)/blockDim.x;
ContainerClass* const directNeighborsParticles,
const int* /*neighborsPositions*/, const int counter){
for(int idxNeigh = 0 ; idxNeigh < counter ; ++idxNeigh){
const int nbLoops = (targets->getNbParticles()+blockDim.x-1)/blockDim.x;
for(int idxLoop = 0 ; idxLoop < nbLoops; ++idxLoop){
const int idxPart = (idxLoop*blockDim.x+threadIdx.x);
const bool threadCompute = (idxPart < targets->getNbParticles());
for(int idxLoop = 0 ; idxLoop < nbLoops; ++idxLoop){
const int idxPart = (idxLoop*blockDim.x+threadIdx.x);
const bool threadCompute = (idxPart < targets->getNbParticles());
FReal targetX, targetY, targetZ, targetPhys;
FReal forceX = 0, forceY = 0, forceZ = 0, potential = 0;
FReal targetX, targetY, targetZ, targetPhys;
FReal forceX = 0, forceY = 0, forceZ = 0, potential = 0;
if(threadCompute){
targetX = targets->getPositions()[0][idxPart];
targetY = targets->getPositions()[1][idxPart];
targetZ = targets->getPositions()[2][idxPart];
targetPhys = targets->getAttribute(0)[idxPart];
if(threadCompute){
targetX = targets->getPositions()[0][idxPart];
targetY = targets->getPositions()[1][idxPart];
targetZ = targets->getPositions()[2][idxPart];
targetPhys = targets->getAttribute(0)[idxPart];
}
for(int idxCopy = 0 ; idxCopy < directNeighborsParticles[idxNeigh].getNbParticles() ; idxCopy += SHARE_SIZE){
__shared__ FReal sourcesX[SHARE_SIZE];
__shared__ FReal sourcesY[SHARE_SIZE];
__shared__ FReal sourcesZ[SHARE_SIZE];
__shared__ FReal sourcesPhys[SHARE_SIZE];
const int nbCopies = Min(SHARE_SIZE, directNeighborsParticles[idxNeigh].getNbParticles()-idxCopy);
if(threadIdx.x < nbCopies){
sourcesX[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[0][idxPart];
sourcesY[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[1][idxPart];
sourcesZ[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[2][idxPart];
sourcesPhys[threadIdx.x] = directNeighborsParticles[idxNeigh].getAttribute(0)[idxPart];
}
for(int idxCopy = 0 ; idxCopy < directNeighborsParticles[idxNeigh]->getNbParticles() ; idxCopy += SHARE_SIZE){
__shared__ FReal sourcesX[SHARE_SIZE];
__shared__ FReal sourcesY[SHARE_SIZE];
__shared__ FReal sourcesZ[SHARE_SIZE];
__shared__ FReal sourcesPhys[SHARE_SIZE];
const int nbCopies = Min(SHARE_SIZE, directNeighborsParticles[idxNeigh]->getNbParticles()-idxCopy);
if(threadIdx.x < nbCopies){
sourcesX[threadIdx.x] = directNeighborsParticles[idxNeigh]->getPositions()[0][idxPart];
sourcesY[threadIdx.x] = directNeighborsParticles[idxNeigh]->getPositions()[1][idxPart];
sourcesZ[threadIdx.x] = directNeighborsParticles[idxNeigh]->getPositions()[2][idxPart];
sourcesPhys[threadIdx.x] = directNeighborsParticles[idxNeigh]->getAttribute(0)[idxPart];
__syncthreads();
if(threadCompute){
for(int otherIndex = 0; otherIndex < nbCopies - 3; otherIndex += 4) { // unrolling x4
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+1], sourcesY[otherIndex+1], sourcesZ[otherIndex+1], sourcesPhys[otherIndex+1]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+2], sourcesY[otherIndex+2], sourcesZ[otherIndex+2], sourcesPhys[otherIndex+2]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+3], sourcesY[otherIndex+3], sourcesZ[otherIndex+3], sourcesPhys[otherIndex+3]);
}
__syncthreads();
if(threadCompute){
for(int otherIndex = 0; otherIndex < nbCopies - 3; otherIndex += 4) { // unrolling x4
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+1], sourcesY[otherIndex+1], sourcesZ[otherIndex+1], sourcesPhys[otherIndex+1]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+2], sourcesY[otherIndex+2], sourcesZ[otherIndex+2], sourcesPhys[otherIndex+2]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+3], sourcesY[otherIndex+3], sourcesZ[otherIndex+3], sourcesPhys[otherIndex+3]);
}
for(int otherIndex = (nbCopies/4) * 4; otherIndex < nbCopies; ++otherIndex) { // if nk%4 is not zero
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
}
for(int otherIndex = (nbCopies/4) * 4; otherIndex < nbCopies; ++otherIndex) { // if nk%4 is not zero
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
}
}
__syncthreads();
}
if( threadCompute ){
targets->getAttribute(1)[idxPart] += forceX;
targets->getAttribute(2)[idxPart] += forceY;
targets->getAttribute(3)[idxPart] += forceZ;
targets->getAttribute(4)[idxPart] += potential;
}
}
}
}
__syncthreads();
__device__ void P2POuter(const int3& ,
ContainerClass* const targets,
ContainerClass* const directNeighborsParticles,
const int* /*neighborsPositions*/, const int counter){
for(int idxNeigh = 0 ; idxNeigh < counter ; ++idxNeigh){
const int nbLoops = (targets->getNbParticles()+blockDim.x-1)/blockDim.x;
for(int idxLoop = 0 ; idxLoop < nbLoops; ++idxLoop){
const int idxPart = (idxLoop*blockDim.x+threadIdx.x);
const bool threadCompute = (idxPart < targets->getNbParticles());
FReal targetX, targetY, targetZ, targetPhys;
FReal forceX = 0, forceY = 0, forceZ = 0, potential = 0;
if(threadCompute){
targetX = targets->getPositions()[0][idxPart];
targetY = targets->getPositions()[1][idxPart];
targetZ = targets->getPositions()[2][idxPart];
targetPhys = targets->getAttribute(0)[idxPart];
}
for(int idxCopy = 0 ; idxCopy < directNeighborsParticles[idxNeigh].getNbParticles() ; idxCopy += SHARE_SIZE){
__shared__ FReal sourcesX[SHARE_SIZE];
__shared__ FReal sourcesY[SHARE_SIZE];
__shared__ FReal sourcesZ[SHARE_SIZE];
__shared__ FReal sourcesPhys[SHARE_SIZE];
const int nbCopies = Min(SHARE_SIZE, directNeighborsParticles[idxNeigh].getNbParticles()-idxCopy);
if(threadIdx.x < nbCopies){
sourcesX[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[0][idxPart];
sourcesY[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[1][idxPart];
sourcesZ[threadIdx.x] = directNeighborsParticles[idxNeigh].getPositions()[2][idxPart];
sourcesPhys[threadIdx.x] = directNeighborsParticles[idxNeigh].getAttribute(0)[idxPart];
}
if( threadCompute ){
targets->getAttribute(1)[idxPart] += forceX;
targets->getAttribute(2)[idxPart] += forceY;
targets->getAttribute(3)[idxPart] += forceZ;
targets->getAttribute(4)[idxPart] += potential;
__syncthreads();
if(threadCompute){
for(int otherIndex = 0; otherIndex < nbCopies - 3; otherIndex += 4) { // unrolling x4
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+1], sourcesY[otherIndex+1], sourcesZ[otherIndex+1], sourcesPhys[otherIndex+1]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+2], sourcesY[otherIndex+2], sourcesZ[otherIndex+2], sourcesPhys[otherIndex+2]);
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex+3], sourcesY[otherIndex+3], sourcesZ[otherIndex+3], sourcesPhys[otherIndex+3]);
}
for(int otherIndex = (nbCopies/4) * 4; otherIndex < nbCopies; ++otherIndex) { // if nk%4 is not zero
DirectMacro(targetX, targetY, targetZ, targetPhys,
forceX, forceY, forceZ, potential,
sourcesX[otherIndex], sourcesY[otherIndex], sourcesZ[otherIndex], sourcesPhys[otherIndex]);
}
}
__syncthreads();
}
if( threadCompute ){
targets->getAttribute(1)[idxPart] += forceX;
targets->getAttribute(2)[idxPart] += forceY;
targets->getAttribute(3)[idxPart] += forceZ;
targets->getAttribute(4)[idxPart] += potential;
}
}
}
}
......
......@@ -34,13 +34,12 @@ public:
}
/** Before Downward */
__device__ void M2L(CellClass local, const CellClass distantNeighbors[343], const int /*size*/, const int /*level*/) {
__device__ void M2L(CellClass local, const CellClass* distantNeighbors,
const int* /*neighPositions*/, const int size, const int /*level*/) {
if(threadIdx.x == 0) {
// The pole is impacted by what represent other poles
for(int idx = 0 ; idx < 343 ; ++idx){
if(distantNeighbors[idx].symb){
*local.down += *distantNeighbors[idx].up;
}
for(int idx = 0 ; idx < size ; ++idx){
*local.down += *distantNeighbors[idx].up;
}
}
}
......@@ -71,18 +70,18 @@ public:
/** After Downward */
__device__ void P2P(const int3& ,
ContainerClass* const targets, const ContainerClass* const sources,
ContainerClass* const directNeighborsParticles[27], const int ){
ContainerClass* const targets, const ContainerClass* const sources,
ContainerClass* const directNeighborsParticles,
const int* /*neighborPositions*/,
const int counter){
if(threadIdx.x == 0) {
// Each particles targeted is impacted by the particles sources
long long int inc = sources->getNbParticles();
if(targets == sources){
inc -= 1;
}
for(int idx = 0 ; idx < 27 ; ++idx){
if( directNeighborsParticles[idx] ){
inc += directNeighborsParticles[idx]->getNbParticles();
}
for(int idx = 0 ; idx < counter ; ++idx){
inc += directNeighborsParticles[idx].getNbParticles();
}
long long int*const particlesAttributes = targets->template getAttribute<0>();
......@@ -94,15 +93,35 @@ public:
/** After Downward */
__device__ void P2PRemote(const int3& ,
ContainerClass* const targets, const ContainerClass* const sources,
ContainerClass* const directNeighborsParticles[27], const int ){
ContainerClass* const targets,
const ContainerClass* const sources,
ContainerClass* const directNeighborsParticles,
const int* /*neighborPositions*/,
const int counter){
if(threadIdx.x == 0) {
// Each particles targeted is impacted by the particles sources
long long int inc = 0;
for(int idx = 0 ; idx < 27 ; ++idx){
if( directNeighborsParticles[idx] ){
inc += directNeighborsParticles[idx]->getNbParticles();
}
for(int idx = 0 ; idx < counter ; ++idx){
inc += directNeighborsParticles[idx].getNbParticles();
}
long long int*const particlesAttributes = targets->template getAttribute<0>();
for(FSize idxPart = 0 ; idxPart < targets->getNbParticles() ; ++idxPart){
particlesAttributes[idxPart] += inc;
}
}
}
__device__ void P2POuter(const int3& ,
ContainerClass* const targets,
ContainerClass* const directNeighborsParticles,
const int* /*neighborPositions*/,
const int counter){
if(threadIdx.x == 0) {
// Each particles targeted is impacted by the particles sources
long long int inc = 0;
for(int idx = 0 ; idx < counter ; ++idx){
inc += directNeighborsParticles[idx].getNbParticles();
}
long long int*const particlesAttributes = targets->template getAttribute<0>();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment