Commit 52004cf5 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files

FP2PAvx should work :Note that you still need to unset ScalFMM_USE_SSE in...

FP2PAvx should work :Note that you still need to unset ScalFMM_USE_SSE in order to have AVX, if both are set to ON, SSE will do the work
parent 476eb362
......@@ -24,9 +24,9 @@ namespace FP2P{
FReal*const targetsForcesZ = inTargets->getForcesZ();
FReal*const targetsPotentials = inTargets->getPotentials();
// std::cout << " OK AVX " << std::endl;
const __m256d mOne = _mm256_set1_pd(1.0);
const __m256d mOne = _mm256_set1_pd(1.0);
//P2P between target cells and others ones
for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
if( inNeighbors[idxNeighbors] ){
const int nbParticlesSources = (inNeighbors[idxNeighbors]->getNbParticles()+3)/4;
......@@ -91,6 +91,7 @@ namespace FP2P{
}
}
}
//P2P mutual (between targets cells and its own parts 4 by 4)
{
const int nbParticlesSources = (nbParticlesTargets+3)/4;
......@@ -113,7 +114,7 @@ namespace FP2P{
__m256d tfz = _mm256_setzero_pd();
__m256d tpo = _mm256_setzero_pd();
for(int idxSource = (idxTarget+2)/2 ; idxSource < nbParticlesSources ; ++idxSource){
for(int idxSource = (idxTarget+4)/4 ; idxSource < nbParticlesSources ; ++idxSource){
__m256d dx = sourcesX[idxSource] - tx;
__m256d dy = sourcesY[idxSource] - ty;
__m256d dz = sourcesZ[idxSource] - tz;
......@@ -156,8 +157,11 @@ namespace FP2P{
}
}
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; idxTarget += 2){
const int idxSource = idxTarget + 1;
//P2P with the last ones
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; /*idxTarget += 4*/idxTarget++){
for(int idxS = 1 ; idxS < 4-(idxTarget%4) ; ++idxS){
int idxSource = idxTarget + idxS;
FReal dx = targetsX[idxSource] - targetsX[idxTarget];
FReal dy = targetsY[idxSource] - targetsY[idxTarget];
FReal dz = targetsZ[idxSource] - targetsZ[idxTarget];
......@@ -183,6 +187,7 @@ namespace FP2P{
targetsPotentials[idxSource] += inv_distance * targetsPhysicalValues[idxTarget];
}
}
}
template <class ContainerClass>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment