Attention une mise à jour du serveur va être effectuée le vendredi 16 avril entre 12h et 12h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit 17149fd9 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Make a generic P2PR

parent d0beb94a
......@@ -77,14 +77,316 @@ inline void NonMutualParticles(const FReal sourceX,const FReal sourceY,const FRe
*targetPotential += ( inv_distance * sourcePhysicalValue );
}
template <class ContainerClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericFullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
const int nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesX();
FReal*const targetsForcesY = inTargets->getForcesY();
FReal*const targetsForcesZ = inTargets->getForcesZ();
FReal*const targetsPotentials = inTargets->getPotentials();
const ComputeClass mOne = FMath::One<ComputeClass>();
for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
if( inNeighbors[idxNeighbors] ){
const int nbParticlesSources = (inNeighbors[idxNeighbors]->getNbParticles()+NbFRealInComputeClass-1)/NbFRealInComputeClass;
const ComputeClass*const sourcesPhysicalValues = (const ComputeClass*)inNeighbors[idxNeighbors]->getPhysicalValues();
const ComputeClass*const sourcesX = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[0];
const ComputeClass*const sourcesY = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[1];
const ComputeClass*const sourcesZ = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[2];
ComputeClass*const sourcesForcesX = (ComputeClass*)inNeighbors[idxNeighbors]->getForcesX();
ComputeClass*const sourcesForcesY = (ComputeClass*)inNeighbors[idxNeighbors]->getForcesY();
ComputeClass*const sourcesForcesZ = (ComputeClass*)inNeighbors[idxNeighbors]->getForcesZ();
ComputeClass*const sourcesPotentials = (ComputeClass*)inNeighbors[idxNeighbors]->getPotentials();
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
const ComputeClass tx = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsX[idxTarget]);
const ComputeClass ty = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsY[idxTarget]);
const ComputeClass tz = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsZ[idxTarget]);
const ComputeClass tv = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsPhysicalValues[idxTarget]);
ComputeClass tfx = FMath::Zero<ComputeClass>();
ComputeClass tfy = FMath::Zero<ComputeClass>();
ComputeClass tfz = FMath::Zero<ComputeClass>();
ComputeClass tpo = FMath::Zero<ComputeClass>();
for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
ComputeClass dx = sourcesX[idxSource] - tx;
ComputeClass dy = sourcesY[idxSource] - ty;
ComputeClass dz = sourcesZ[idxSource] - tz;
ComputeClass inv_square_distance = mOne / (dx*dx + dy*dy + dz*dz);
const ComputeClass inv_distance = FMath::Sqrt(inv_square_distance);
inv_square_distance *= inv_distance;
inv_square_distance *= tv * sourcesPhysicalValues[idxSource];
dx *= inv_square_distance;
dy *= inv_square_distance;
dz *= inv_square_distance;
tfx += dx;
tfy += dy;
tfz += dz;
tpo += inv_distance * sourcesPhysicalValues[idxSource];
sourcesForcesX[idxSource] -= dx;
sourcesForcesY[idxSource] -= dy;
sourcesForcesZ[idxSource] -= dz;
sourcesPotentials[idxSource] += inv_distance * tv;
}
targetsForcesX[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfx);
targetsForcesY[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfy);
targetsForcesZ[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfz);
targetsPotentials[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tpo);
}
}
}
{//In this part, we compute (vectorially) the interaction
//within the target leaf.
const int nbParticlesSources = (nbParticlesTargets+NbFRealInComputeClass-1)/NbFRealInComputeClass;
const ComputeClass*const sourcesPhysicalValues = (const ComputeClass*)targetsPhysicalValues;
const ComputeClass*const sourcesX = (const ComputeClass*)targetsX;
const ComputeClass*const sourcesY = (const ComputeClass*)targetsY;
const ComputeClass*const sourcesZ = (const ComputeClass*)targetsZ;
ComputeClass*const sourcesForcesX = (ComputeClass*)targetsForcesX;
ComputeClass*const sourcesForcesY = (ComputeClass*)targetsForcesY;
ComputeClass*const sourcesForcesZ = (ComputeClass*)targetsForcesZ;
ComputeClass*const sourcesPotentials = (ComputeClass*)targetsPotentials;
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
const ComputeClass tx = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsX[idxTarget]);
const ComputeClass ty = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsY[idxTarget]);
const ComputeClass tz = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsZ[idxTarget]);
const ComputeClass tv = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsPhysicalValues[idxTarget]);
ComputeClass tfx = FMath::Zero<ComputeClass>();
ComputeClass tfy = FMath::Zero<ComputeClass>();
ComputeClass tfz = FMath::Zero<ComputeClass>();
ComputeClass tpo = FMath::Zero<ComputeClass>();
for(int idxSource = (idxTarget+NbFRealInComputeClass)/NbFRealInComputeClass ; idxSource < nbParticlesSources ; ++idxSource){
ComputeClass dx = sourcesX[idxSource] - tx;
ComputeClass dy = sourcesY[idxSource] - ty;
ComputeClass dz = sourcesZ[idxSource] - tz;
ComputeClass inv_square_distance = mOne / (dx*dx + dy*dy + dz*dz);
const ComputeClass inv_distance = FMath::Sqrt(inv_square_distance);
inv_square_distance *= inv_distance;
inv_square_distance *= tv * sourcesPhysicalValues[idxSource];
dx *= inv_square_distance;
dy *= inv_square_distance;
dz *= inv_square_distance;
tfx += dx;
tfy += dy;
tfz += dz;
tpo += inv_distance * sourcesPhysicalValues[idxSource];
sourcesForcesX[idxSource] -= dx;
sourcesForcesY[idxSource] -= dy;
sourcesForcesZ[idxSource] -= dz;
sourcesPotentials[idxSource] += inv_distance * tv;
}
targetsForcesX[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfx);
targetsForcesY[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfy);
targetsForcesZ[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfz);
targetsPotentials[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tpo);
}
}
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
const int limitForTarget = NbFRealInComputeClass-(idxTarget%NbFRealInComputeClass);
for(int idxS = 1 ; idxS < limitForTarget ; ++idxS){
const int idxSource = idxTarget + idxS;
FReal dx = targetsX[idxSource] - targetsX[idxTarget];
FReal dy = targetsY[idxSource] - targetsY[idxTarget];
FReal dz = targetsZ[idxSource] - targetsZ[idxTarget];
FReal inv_square_distance = FReal(1.0) / (dx*dx + dy*dy + dz*dz);
const FReal inv_distance = FMath::Sqrt(inv_square_distance);
inv_square_distance *= inv_distance;
inv_square_distance *= targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource];
dx *= inv_square_distance;
dy *= inv_square_distance;
dz *= inv_square_distance;
targetsForcesX[idxTarget] += dx;
targetsForcesY[idxTarget] += dy;
targetsForcesZ[idxTarget] += dz;
targetsPotentials[idxTarget] += inv_distance * targetsPhysicalValues[idxSource];
targetsForcesX[idxSource] -= dx;
targetsForcesY[idxSource] -= dy;
targetsForcesZ[idxSource] -= dz;
targetsPotentials[idxSource] += inv_distance * targetsPhysicalValues[idxTarget];
}
}
}
template <class ContainerClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
const int nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesX();
FReal*const targetsForcesY = inTargets->getForcesY();
FReal*const targetsForcesZ = inTargets->getForcesZ();
FReal*const targetsPotentials = inTargets->getPotentials();
const ComputeClass mOne = FMath::One<ComputeClass>();
for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
if( inNeighbors[idxNeighbors] ){
const int nbParticlesSources = (inNeighbors[idxNeighbors]->getNbParticles()+NbFRealInComputeClass-1)/NbFRealInComputeClass;
const ComputeClass*const sourcesPhysicalValues = (const ComputeClass*)inNeighbors[idxNeighbors]->getPhysicalValues();
const ComputeClass*const sourcesX = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[0];
const ComputeClass*const sourcesY = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[1];
const ComputeClass*const sourcesZ = (const ComputeClass*)inNeighbors[idxNeighbors]->getPositions()[2];
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
const ComputeClass tx = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsX[idxTarget]);
const ComputeClass ty = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsY[idxTarget]);
const ComputeClass tz = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsZ[idxTarget]);
const ComputeClass tv = FMath::ConvertTo<ComputeClass, const FReal*>(&targetsPhysicalValues[idxTarget]);
ComputeClass tfx = FMath::Zero<ComputeClass>();
ComputeClass tfy = FMath::Zero<ComputeClass>();
ComputeClass tfz = FMath::Zero<ComputeClass>();
ComputeClass tpo = FMath::Zero<ComputeClass>();
for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
ComputeClass dx = sourcesX[idxSource] - tx;
ComputeClass dy = sourcesY[idxSource] - ty;
ComputeClass dz = sourcesZ[idxSource] - tz;
ComputeClass inv_square_distance = mOne / (dx*dx + dy*dy + dz*dz);
const ComputeClass inv_distance = FMath::Sqrt(inv_square_distance);
inv_square_distance *= inv_distance;
inv_square_distance *= tv * sourcesPhysicalValues[idxSource];
dx *= inv_square_distance;
dy *= inv_square_distance;
dz *= inv_square_distance;
tfx += dx;
tfy += dy;
tfz += dz;
tpo += inv_distance * sourcesPhysicalValues[idxSource];
}
targetsForcesX[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfx);
targetsForcesY[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfy);
targetsForcesZ[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tfz);
targetsPotentials[idxTarget] += FMath::ConvertTo<FReal, ComputeClass>(tpo);
}
}
}
}
#ifdef ScalFMM_USE_DOUBLE_PRECISION
#if defined(ScalFMM_USE_AVX)
// AVX DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors);
}
#elif defined(ScalFMM_USE_SSE)
// SSE DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors);
}
#else
// SSE DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, FReal, 1>(inTargets, inNeighbors, limiteNeighbors);
}
#ifdef ScalFMM_USE_SSE
#include "FP2PRSse.hpp"
#elif defined(ScalFMM_USE_AVX)
#include "FP2PRAvx.hpp"
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, FReal, 1>(inTargets, inNeighbors, limiteNeighbors);
}
#endif
#else
#if defined(ScalFMM_USE_AVX)
// AVX DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors);
}
#elif defined(ScalFMM_USE_SSE)
// SSE DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors);
}
#else
#include "FP2PRClassic.hpp"
#endif //Includes
// SSE DOUBLE
template <class ContainerClass>
static void FullMutual(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullMutual<ContainerClass, FReal, 1>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
GenericFullRemote<ContainerClass, FReal, 1>(inTargets, inNeighbors, limiteNeighbors);
}
#endif
#endif
}
#endif // FP2PR_HPP
......@@ -47,302 +47,302 @@ struct FMath{
/** To get absolute value */
template <class NumType>
static NumType Abs(const NumType inV){
return (inV < 0 ? -inV : inV);
return (inV < 0 ? -inV : inV);
}
#ifdef ScalFMM_USE_SSE
static __m128 Abs(const __m128 inV){
return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inV), inV);
return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inV), inV);
}
static __m128d Abs(const __m128d inV){
return _mm_max_pd(_mm_sub_pd(_mm_setzero_pd(), inV), inV);
return _mm_max_pd(_mm_sub_pd(_mm_setzero_pd(), inV), inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Abs(const __m256 inV){
return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inV), inV);
return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inV), inV);
}
static __m256d Abs(const __m256d inV){
return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), inV), inV);
return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), inV), inV);
}
#endif
/** To get max between 2 values */
template <class NumType>
static NumType Max(const NumType inV1, const NumType inV2){
return (inV1 > inV2 ? inV1 : inV2);
return (inV1 > inV2 ? inV1 : inV2);
}
/** To get min between 2 values */
template <class NumType>
static NumType Min(const NumType inV1, const NumType inV2){
return (inV1 < inV2 ? inV1 : inV2);
return (inV1 < inV2 ? inV1 : inV2);
}
#ifdef ScalFMM_USE_SSE
static __m128 Max(const __m128 inV1, const __m128 inV2){
return _mm_max_ps(inV1, inV2);
return _mm_max_ps(inV1, inV2);
}
static __m128 Min(const __m128 inV1, const __m128 inV2){
return _mm_min_ps(inV1, inV2);
return _mm_min_ps(inV1, inV2);
}
static __m128d Max(const __m128d inV1, const __m128d inV2){
return _mm_max_pd(inV1, inV2);
return _mm_max_pd(inV1, inV2);
}
static __m128d Min(const __m128d inV1, const __m128d inV2){
return _mm_min_pd(inV1, inV2);
return _mm_min_pd(inV1, inV2);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Max(const __m256 inV1, const __m256 inV2){
return _mm256_max_ps(inV1, inV2);
return _mm256_max_ps(inV1, inV2);
}
static __m256 Min(const __m256 inV1, const __m256 inV2){
return _mm256_min_ps(inV1, inV2);
return _mm256_min_ps(inV1, inV2);
}
static __m256d Max(const __m256d inV1, const __m256d inV2){
return _mm256_max_pd(inV1, inV2);
return _mm256_max_pd(inV1, inV2);
}
static __m256d Min(const __m256d inV1, const __m256d inV2){
return _mm256_min_pd(inV1, inV2);
return _mm256_min_pd(inV1, inV2);
}
#endif
/** To know if 2 values seems to be equal */
template <class NumType>
static bool LookEqual(const NumType inV1, const NumType inV2){
return (Abs(inV1-inV2) < std::numeric_limits<NumType>::epsilon());
//const FReal relTol = FReal(0.00001);
//const FReal absTol = FReal(0.00001);
//return (Abs(inV1 - inV2) <= Max(absTol, relTol * Max(Abs(inV1), Abs(inV2))));
return (Abs(inV1-inV2) < std::numeric_limits<NumType>::epsilon());
//const FReal relTol = FReal(0.00001);
//const FReal absTol = FReal(0.00001);
//return (Abs(inV1 - inV2) <= Max(absTol, relTol * Max(Abs(inV1), Abs(inV2))));
}
/** To know if 2 values seems to be equal */
template <class NumType>
static FReal RelatifDiff(const NumType inV1, const NumType inV2){
return Abs(inV1 - inV2)*Abs(inV1 - inV2)/Max(Abs(inV1*inV1), Abs(inV2*inV2));
return Abs(inV1 - inV2)*Abs(inV1 - inV2)/Max(Abs(inV1*inV1), Abs(inV2*inV2));
}
/** To get floor of a FReal */
static float dfloor(const float inValue){
return floorf(inValue);
return floorf(inValue);
}
static double dfloor(const double inValue){
return floor(inValue);
return floor(inValue);
}
/** To get ceil of a FReal */
static float Ceil(const float inValue){
return ceilf(inValue);
return ceilf(inValue);
}
static double Ceil(const double inValue){
return ceil(inValue);
return ceil(inValue);
}
#if defined(ScalFMM_USE_SSE ) && defined(__SSSE4_1__)
static __m128 dfloor(const __m128 inV){
return _mm_floor_ps(inV);
return _mm_floor_ps(inV);
}
static __m128d dfloor(const __m128d inV){
return _mm_floor_pd(inV);
return _mm_floor_pd(inV);
}
static __m128 Ceil(const __m128 inV){
return _mm_ceil_ps(inV);
return _mm_ceil_ps(inV);
}
static __m128d Ceil(const __m128d inV){
return _mm_ceil_pd(inV);
return _mm_ceil_pd(inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 dfloor(const __m256 inV){
return _mm256_floor_ps(inV);
return _mm256_floor_ps(inV);
}
static __m256d dfloor(const __m256d inV){
return _mm256_floor_pd(inV);
return _mm256_floor_pd(inV);
}
static __m256 Ceil(const __m256 inV){
return _mm256_ceil_ps(inV);
return _mm256_ceil_ps(inV);
}
static __m256d Ceil(const __m256d inV){
return _mm256_ceil_pd(inV);
return _mm256_ceil_pd(inV);
}
#endif
/** To get pow */
static double pow(double x, double y){
return ::pow(x,y);
return ::pow(x,y);
}
static double pow(float x, float y){
return ::powf(x,y);
return ::powf(x,y);
}
template <class NumType>
static NumType pow(const NumType inValue, int power){
NumType result = 1;
while(power-- > 0) result *= inValue;
return result;
NumType result = 1;
while(power-- > 0) result *= inValue;
return result;
}
/** To get pow of 2 */
static int pow2(const int power){
return (1 << power);
return (1 << power);
}
/** To get factorial */
template <class NumType>
static double factorial(int inValue){
if(inValue==0) return NumType(1.);
else {
NumType result = NumType(inValue);
while(--inValue > 1) result *= NumType(inValue);
return result;
}
if(inValue==0) return NumType(1.);
else {
NumType result = NumType(inValue);
while(--inValue > 1) result *= NumType(inValue);
return result;
}
}
/** To know if a value is between two others */
template <class NumType>
static bool Between(const NumType inValue, const NumType inMin, const NumType inMax){
return ( inMin <= inValue && inValue < inMax );
return ( inMin <= inValue && inValue < inMax );
}
/** To get sqrt of a FReal */
static float Sqrt(const float inValue){
return sqrtf(inValue);
return sqrtf(inValue);
}
static double Sqrt(const double inValue){
return sqrt(inValue);
return sqrt(inValue);
}
static float Rsqrt(const float inValue){
return float(1.0)/sqrtf(inValue);
return float(1.0)/sqrtf(inValue);
}
static double Rsqrt(const double inValue){
return 1.0/sqrt(inValue);
return 1.0/sqrt(inValue);
}
#ifdef ScalFMM_USE_SSE
static __m128 Sqrt(const __m128 inV){
return _mm_sqrt_ps(inV);
return _mm_sqrt_ps(inV);
}
static __m128d Sqrt(const __m128d inV){
return _mm_sqrt_pd(inV);
return _mm_sqrt_pd(inV);
}
static __m128 Rsqrt(const __m128 inV){
return _mm_rsqrt_ps(inV);
return _mm_rsqrt_ps(inV);
}
static __m128d Rsqrt(const __m128d inV){
return _mm_set_pd1(1.0) / _mm_sqrt_pd(inV);
return _mm_set_pd1(1.0) / _mm_sqrt_pd(inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Sqrt(const __m256 inV){
return _mm256_sqrt_ps(inV);
return _mm256_sqrt_ps(inV);
}
static __m256d Sqrt(const __m256d inV){
return _mm256_sqrt_pd(inV);
return _mm256_sqrt_pd(inV);
}
static __m256 Rsqrt(const __m256 inV){
return _mm256_rsqrt_ps(inV);
return _mm256_rsqrt_ps(inV);
}
static __m256d Rsqrt(const __m256d inV){
return _mm256_set1_pd(1.0) / _mm256_sqrt_pd(inV);
return _mm256_set1_pd(1.0) / _mm256_sqrt_pd(inV);
}
#endif