Commit f0b0460d authored by Berenger Bramas's avatar Berenger Bramas

Use inastemp

parent 2c5546a4
[submodule "CMakeModules/morse_cmake"] [submodule "CMakeModules/morse_cmake"]
path = CMakeModules/morse_cmake path = CMakeModules/morse_cmake
url = https://gitlab.inria.fr/solverstack/morse_cmake.git url = https://gitlab.inria.fr/solverstack/morse_cmake.git
[submodule "inastemp"]
path = inastemp
url = https://gitlab.mpcdf.mpg.de/bbramas/inastemp.git
This diff is collapsed.
###########################################################################################
# Berenger Bramas Inria
# This goes with the getCpuInfos.cpp
# This will create one CMAKE value per output option from the cpp file.
# For example the output of the CPP file can be:
# SSE3=TRUE;AVX=FALSE
# Then it will create:
# CPUOPTION_SSE3 = TRUE
# CPUOPTION_AVX = FALSE
#
# The binary should return 0 on success.
###########################################################################################
macro(GetCpuInfos)
# The original CPP file
set(GetCpuInfosFile "${PROJECT_SOURCE_DIR}/CMakeModules/getCpuInfos.cpp")
# Fatal error if the file does not exist
if(NOT EXISTS ${GetCpuInfosFile})
message(FATAL_ERROR "The GetCpuInfosFile does not exist (${GetCpuInfosFile})")
endif()
# Compile and execute the file
try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
${CMAKE_BINARY_DIR} ${GetCpuInfosFile} # [CMAKE_FLAGS <Flags>] [COMPILE_DEFINITIONS <flags>]
COMPILE_OUTPUT_VARIABLE comp
RUN_OUTPUT_VARIABLE run)
# If it has successfuly compiled an run
if(COMPILE_RESULT_VAR AND (RUN_RESULT_VAR EQUAL 0) )
set( CPU_OPTIONS ${run} )
# For each value
foreach(optionNode ${run})
# Get name and value
string(REPLACE "=" ";" optionNameAndValue ${optionNode})
list(LENGTH optionNameAndValue optionLength)
# If we get both
if(optionLength EQUAL 2)
list(GET optionNameAndValue 0 optionName)
list(GET optionNameAndValue 1 optionValue)
# create cmake variable
set(CPUOPTION_${optionName} ${optionValue})
else()
message(WARNING "GetCpuInfosFile wrong format for ${optionNode}.")
endif()
endforeach()
# output the sentence from the binrary
message(STATUS "CPUOPTION : ${CPU_OPTIONS}")
else()
message(WARNING "GetCpuInfosFile did not return correctly.")
endif()
endmacro(GetCpuInfos)
#include "immintrin.h"
int main() {
#ifdef __MIC__
__m512 tx, ty ;
tx += ty ;
#endif
return 0;
}
#include "immintrin.h"
int main() {
__m256d tx, ty ;
tx += ty ;
return 0;
}
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
#ifdef __SSSE3__
#include <tmmintrin.h> //SSSE3
#endif
#ifdef __SSSE4_1__
#include <smmintrin.h> // SSE4
#endif
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
{
__m256d res0d, res1d;
res0d = _mm256_hadd_pd(res0d, res1d);
__m256 res0, res1;
res0 = _mm256_hadd_ps(res0, res1);
}
{
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
}
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
{
#ifdef __MIC__
__m512d res0d, res1d;
res0d = _mm512_hadd_pd(res0d, res1d);
__m512 res0, res1;
res0 = _mm512_hadd_ps(res0, res1);
#endif
}
{
__m256d res0d, res1d;
res0d = _mm256_hadd_pd(res0d, res1d);
__m256 res0, res1;
res0 = _mm256_hadd_ps(res0, res1);
}
{
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
}
return 0;
}
int main(){
int i ;
#ifdef __INTEL_COMPILER
i = 0;
#else
#error 'Not Intel Compiler "
#endif
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#ifdef __SSSE3__
#include <tmmintrin.h> //SSSE3
#endif
#ifdef __SSSE4_1__
#include <smmintrin.h> // SSE4
#endif
int main(){
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
return 0;
}
This diff is collapsed.
...@@ -30,7 +30,7 @@ The following are optional: ...@@ -30,7 +30,7 @@ The following are optional:
### Get and Build ScalFMM ### Get and Build ScalFMM
To use last development states of ScalFMM, please clone the develop To use last development states of ScalFMM, please clone the develop
branch. Note that ScalFMM contains a git submodule `morse_cmake`. branch. Note that ScalFMM contains two git submodules `morse_cmake` and `inastemp`.
To get sources please use these commands: To get sources please use these commands:
``` bash ``` bash
git clone --recursive git@gitlab.inria.fr:solverstack/ScalFMM.git -b develop git clone --recursive git@gitlab.inria.fr:solverstack/ScalFMM.git -b develop
......
...@@ -95,13 +95,13 @@ public: ...@@ -95,13 +95,13 @@ public:
// apply P2L // apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
for (unsigned int m = 0; m<FBase::nnodes; ++m) { for (unsigned int m = 0; m<FBase::nnodes; ++m) {
ComputeClass XX = FMath::ConvertTo<ComputeClass>(X[m].getX()); ComputeClass XX = ComputeClass(X[m].getX());
ComputeClass XY = FMath::ConvertTo<ComputeClass>(X[m].getY()); ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = FMath::ConvertTo<ComputeClass>(X[m].getZ()); ComputeClass XZ = ComputeClass(X[m].getZ());
std::size_t idxPart = 0; std::size_t idxPart = 0;
// Compute using vectorization for all but the last array elements // Compute using vectorization for all but the last array elements
ComputeClass tmpLocalExp = FMath::Zero<ComputeClass>(); ComputeClass tmpLocalExp = ComputeClass(0);
for (; for (;
idxPart < ((particles->getNbParticles()) idxPart < ((particles->getNbParticles())
/ FRealCount); / FRealCount);
...@@ -114,7 +114,7 @@ public: ...@@ -114,7 +114,7 @@ public:
* physicalValues[idxPart]; * physicalValues[idxPart];
} }
local->get(idxRhs)[m] += FMath::ConvertTo<FReal>(tmpLocalExp); local->get(idxRhs)[m] += ComputeClass(tmpLocalExp);
// Compute the last array elements one by one if they exist // Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) { if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
...@@ -172,11 +172,11 @@ public: ...@@ -172,11 +172,11 @@ public:
for (unsigned int n=0; n<FBase::nnodes; ++n){ for (unsigned int n=0; n<FBase::nnodes; ++n){
ComputeClass MultipoleExpansion = ComputeClass MultipoleExpansion =
FMath::ConvertTo<ComputeClass, FReal>(pole->get(idxRhs)[n]); ComputeClass(pole->get(idxRhs)[n]);
ComputeClass YX = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getX()); ComputeClass YX = ComputeClass(Y[n].getX());
ComputeClass YY = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getY()); ComputeClass YY = ComputeClass(Y[n].getY());
ComputeClass YZ = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getZ()); ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0; for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1) idxPart < ( (particles->getNbParticles() + FRealCount - 1)
......
...@@ -140,11 +140,11 @@ public: ...@@ -140,11 +140,11 @@ public:
// apply P2L // apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
for (unsigned int m = 0; m < FBase::nnodes; ++m) { for (unsigned int m = 0; m < FBase::nnodes; ++m) {
ComputeClass XX = FMath::ConvertTo<ComputeClass>(X[m].getX()); ComputeClass XX = ComputeClass(X[m].getX());
ComputeClass XY = FMath::ConvertTo<ComputeClass>(X[m].getY()); ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = FMath::ConvertTo<ComputeClass>(X[m].getZ()); ComputeClass XZ = ComputeClass(X[m].getZ());
ComputeClass tmpLocalExp = FMath::Zero<ComputeClass>(); ComputeClass tmpLocalExp = ComputeClass(0);
// Compute using vectorization for all but the last array elements // Compute using vectorization for all but the last array elements
std::size_t idxPart = 0; std::size_t idxPart = 0;
for (; idxPart < (particles->getNbParticles() / FRealCount); for (; idxPart < (particles->getNbParticles() / FRealCount);
...@@ -157,7 +157,7 @@ public: ...@@ -157,7 +157,7 @@ public:
* physicalValues[idxPart]; * physicalValues[idxPart];
} }
local->get(idxRhs)[m] += FMath::ConvertTo<FReal>(tmpLocalExp); local->get(idxRhs)[m] += FReal(tmpLocalExp);
// Compute the last array elements one by one if they exist // Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) { if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
...@@ -221,11 +221,11 @@ public: ...@@ -221,11 +221,11 @@ public:
for (unsigned int n=0; n<FBase::nnodes; ++n){ for (unsigned int n=0; n<FBase::nnodes; ++n){
ComputeClass MultipoleExpansion = ComputeClass MultipoleExpansion =
FMath::ConvertTo<ComputeClass, FReal>(pole->get(idxRhs)[n]); ComputeClass(pole->get(idxRhs)[n]);
ComputeClass YX = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getX()); ComputeClass YX = ComputeClass(Y[n].getX());
ComputeClass YY = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getY()); ComputeClass YY = ComputeClass(Y[n].getY());
ComputeClass YZ = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getZ()); ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0; for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1) idxPart < ( (particles->getNbParticles() + FRealCount - 1)
......
...@@ -89,7 +89,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal> ...@@ -89,7 +89,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs); const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz); return ValueClass(1) / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
} }
// evaluate interaction (blockwise) // evaluate interaction (blockwise)
...@@ -110,7 +110,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal> ...@@ -110,7 +110,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs); const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass one_over_r = FMath::One<ValueClass>() / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz); const ValueClass one_over_r = ValueClass(1) / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r; const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r;
...@@ -176,9 +176,9 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{ ...@@ -176,9 +176,9 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{
const ValueClass diffx = (xt-xs); const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FMath::Sqrt(FMath::ConvertTo<ValueClass,FReal>(LX)*diffx*diffx + return ValueClass(1) / FMath::Sqrt(ValueClass(LX)*diffx*diffx +
FMath::ConvertTo<ValueClass,FReal>(LY)*diffy*diffy + ValueClass(LY)*diffy*diffy +
FMath::ConvertTo<ValueClass,FReal>(LZ)*diffz*diffz); ValueClass(LZ)*diffz*diffz);
} }
void setCoeff(const FReal& a, const FReal& b, const FReal& c) void setCoeff(const FReal& a, const FReal& b, const FReal& c)
{LX= a*a ; LY = b*b ; LZ = c *c;} {LX= a*a ; LY = b*b ; LZ = c *c;}
...@@ -208,16 +208,16 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{ ...@@ -208,16 +208,16 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{
const ValueClass diffx = (xt-xs); const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass one_over_rL = FMath::One<ValueClass>() / FMath::Sqrt(FMath::ConvertTo<ValueClass,FReal>(LX)*diffx*diffx + const ValueClass one_over_rL = ValueClass(1) / (ValueClass(LX)*diffx*diffx +
FMath::ConvertTo<ValueClass,FReal>(LY)*diffy*diffy + ValueClass(LY)*diffy*diffy +
FMath::ConvertTo<ValueClass,FReal>(LZ)*diffz*diffz); ValueClass(LZ)*diffz*diffz);
const ValueClass one_over_rL3 = one_over_rL*one_over_rL*one_over_rL; const ValueClass one_over_rL3 = one_over_rL*one_over_rL*one_over_rL;
block[0] = one_over_rL; block[0] = one_over_rL;
blockDerivative[0] = FMath::ConvertTo<ValueClass,FReal>(LX) * one_over_rL3 * diffx; blockDerivative[0] = ValueClass(LX) * one_over_rL3 * diffx;
blockDerivative[1] = FMath::ConvertTo<ValueClass,FReal>(LY)* one_over_rL3 * diffy; blockDerivative[1] = ValueClass(LY)* one_over_rL3 * diffy;
blockDerivative[2] = FMath::ConvertTo<ValueClass,FReal>(LZ)* one_over_rL3 * diffz; blockDerivative[2] = ValueClass(LZ)* one_over_rL3 * diffz;
} }
...@@ -283,7 +283,7 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal> ...@@ -283,7 +283,7 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs); const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FReal(diffx*diffx+diffy*diffy+diffz*diffz); return ValueClass(1) / FReal(diffx*diffx+diffy*diffy+diffz*diffz);
} }
// evaluate interaction (blockwise) // evaluate interaction (blockwise)
...@@ -305,12 +305,12 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal> ...@@ -305,12 +305,12 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz); const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz);
const ValueClass one_over_r2 = FMath::One<ValueClass>() / (r2); const ValueClass one_over_r2 = ValueClass(1) / (r2);
const ValueClass one_over_r4 = one_over_r2*one_over_r2; const ValueClass one_over_r4 = one_over_r2*one_over_r2;
block[0] = one_over_r2; block[0] = one_over_r2;
const ValueClass coef = FMath::ConvertTo<ValueClass,FReal>(-2.) * one_over_r4; const ValueClass coef = ValueClass(-2.) * one_over_r4;
blockDerivative[0] = coef * diffx; blockDerivative[0] = coef * diffx;
blockDerivative[1] = coef * diffy; blockDerivative[1] = coef * diffy;
blockDerivative[2] = coef * diffz; blockDerivative[2] = coef * diffz;
...@@ -382,7 +382,7 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal> ...@@ -382,7 +382,7 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r = FMath::Sqrt(diffx*diffx+diffy*diffy+diffz*diffz); const ValueClass r = FMath::Sqrt(diffx*diffx+diffy*diffy+diffz*diffz);
const ValueClass r3 = r*r*r; const ValueClass r3 = r*r*r;
const ValueClass one_over_r6 = FMath::One<ValueClass>() / (r3*r3); const ValueClass one_over_r6 = ValueClass(1) / (r3*r3);
//return one_over_r6 * one_over_r6; //return one_over_r6 * one_over_r6;
//return one_over_r6; //return one_over_r6;
return one_over_r6 * one_over_r6 - one_over_r6; return one_over_r6 * one_over_r6 - one_over_r6;
...@@ -409,12 +409,12 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal> ...@@ -409,12 +409,12 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal>
const ValueClass r = FMath::Sqrt(diffx*diffx+diffy*diffy+diffz*diffz); const ValueClass r = FMath::Sqrt(diffx*diffx+diffy*diffy+diffz*diffz);
const ValueClass r2 = r*r; const ValueClass r2 = r*r;
const ValueClass r3 = r2*r; const ValueClass r3 = r2*r;
const ValueClass one_over_r6 = FMath::One<ValueClass>() / (r3*r3); const ValueClass one_over_r6 = ValueClass(1) / (r3*r3);
const ValueClass one_over_r8 = one_over_r6 / (r2); const ValueClass one_over_r8 = one_over_r6 / (r2);
block[0] = one_over_r6 * one_over_r6 - one_over_r6; block[0] = one_over_r6 * one_over_r6 - one_over_r6;
const FReal coef = FMath::ConvertTo<ValueClass,FReal>(12.0)*one_over_r6*one_over_r8 - FMath::ConvertTo<ValueClass,FReal>(6.0)*one_over_r8; const FReal coef = ValueClass(12.0)*one_over_r6*one_over_r8 - ValueClass(6.0)*one_over_r8;
blockDerivative[0]= coef * diffx; blockDerivative[0]= coef * diffx;
blockDerivative[1]= coef * diffy; blockDerivative[1]= coef * diffy;
blockDerivative[2]= coef * diffz; blockDerivative[2]= coef * diffz;
...@@ -493,7 +493,7 @@ struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal> ...@@ -493,7 +493,7 @@ struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz); const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz);
return FMath::One<ValueClass>() / (r2 + FMath::ConvertTo<ValueClass,FReal>(CoreWidth)); return ValueClass(1) / (r2 + ValueClass(CoreWidth));
} }
// evaluate interaction (blockwise) // evaluate interaction (blockwise)
...@@ -515,13 +515,13 @@ struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal> ...@@ -515,13 +515,13 @@ struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz); const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz);
const ValueClass one_over_a_plus_r2 = FMath::One<ValueClass>() / (r2 + FMath::ConvertTo<ValueClass,FReal>(CoreWidth)); const ValueClass one_over_a_plus_r2 = ValueClass(1) / (r2 + ValueClass(CoreWidth));
const ValueClass one_over_a_plus_r2_squared = one_over_a_plus_r2*one_over_a_plus_r2; const ValueClass one_over_a_plus_r2_squared = one_over_a_plus_r2*one_over_a_plus_r2;
block[0] = one_over_a_plus_r2; block[0] = one_over_a_plus_r2;
// TODO Fix derivative // TODO Fix derivative
const ValueClass coef = FMath::ConvertTo<ValueClass,FReal>(-2.) * one_over_a_plus_r2_squared; const ValueClass coef = ValueClass(-2.) * one_over_a_plus_r2_squared;
blockDerivative[0] = coef * diffx; blockDerivative[0] = coef * diffx;
blockDerivative[1] = coef * diffy; blockDerivative[1] = coef * diffy;
blockDerivative[2] = coef * diffz; blockDerivative[2] = coef * diffz;
......
...@@ -120,13 +120,13 @@ struct FInterpMatrixKernelGauss : FAbstractCorrelationKernel<FReal> ...@@ -120,13 +120,13 @@ struct FInterpMatrixKernelGauss : FAbstractCorrelationKernel<FReal>
{ {
const ValueClass diff[3] = {(x1-x2),(y1-y2),(z1-z2)}; const ValueClass diff[3] = {(x1-x2),(y1-y2),(z1-z2)};
ValueClass dist2 = FMath::Zero<ValueClass>(); ValueClass dist2 = ValueClass(0.);
for(int d=0; d<3; ++d){ for(int d=0; d<3; ++d){
const ValueClass distX = diff[d] / FMath::ConvertTo<ValueClass,FReal>(lengthScale_); const ValueClass distX = diff[d] / ValueClass(lengthScale_);
dist2 += distX*distX; dist2 += distX*distX;
} }
return FMath::Exp(FMath::ConvertTo<ValueClass,FReal>(-0.5)*dist2); return FMath::Exp(ValueClass(-0.5)*dist2);
} }
...@@ -145,7 +145,7 @@ struct FInterpMatrixKernelGauss : FAbstractCorrelationKernel<FReal> ...@@ -145,7 +145,7 @@ struct FInterpMatrixKernelGauss : FAbstractCorrelationKernel<FReal>
ValueClass block[1], ValueClass blockDerivative[3]) const ValueClass block[1], ValueClass blockDerivative[3]) const
{ {
block[0]=this->evaluate(x1,y1,z1,x2,y2,z2); block[0]=this->evaluate(x1,y1,z1,x2,y2,z2);
const ValueClass lengthScaleOpt = FMath::ConvertTo<ValueClass,FReal>(-1/(lengthScale_*lengthScale_)); const ValueClass lengthScaleOpt = ValueClass(-1/(lengthScale_*lengthScale_));
blockDerivative[0] = block[0]*(x1-x2) * lengthScaleOpt; blockDerivative[0] = block[0]*(x1-x2) * lengthScaleOpt;
blockDerivative[1] = block[0]*(y1-y2) * lengthScaleOpt; blockDerivative[1] = block[0]*(y1-y2) * lengthScaleOpt;
blockDerivative[2] = block[0]*(z1-z2) * lengthScaleOpt; blockDerivative[2] = block[0]*(z1-z2) * lengthScaleOpt;
......
...@@ -161,7 +161,7 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal> ...@@ -161,7 +161,7 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2 = diffx*diffx+diffy*diffy+diffz*diffz; const ValueClass r2 = diffx*diffx+diffy*diffy+diffz*diffz;
const ValueClass one_over_r = FMath::One<ValueClass>()/FMath::Sqrt(r2 + FMath::ConvertTo<ValueClass,FReal>(_CoreWidth2)); const ValueClass one_over_r = ValueClass(1)/FMath::Sqrt(r2 + ValueClass(_CoreWidth2));
const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r; const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r;
ValueClass ri,rj; ValueClass ri,rj;
...@@ -192,7 +192,7 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal> ...@@ -192,7 +192,7 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2 = diffx*diffx+diffy*diffy+diffz*diffz; const ValueClass r2 = diffx*diffx+diffy*diffy+diffz*diffz;
const ValueClass one_over_r = FMath::One<ValueClass>()/FMath::Sqrt(r2 + FMath::ConvertTo<ValueClass,FReal>(_CoreWidth2)); const ValueClass one_over_r = ValueClass(1)/FMath::Sqrt(r2 + ValueClass(_CoreWidth2));
const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r; const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r;
const ValueClass r[3] = {diffx,diffy,diffz}; const ValueClass r[3] = {diffx,diffy,diffz};
...@@ -219,14 +219,14 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal> ...@@ -219,14 +219,14 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys); const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs); const ValueClass diffz = (zt-zs);
const ValueClass r2[3] = {diffx*diffx,diffy*diffy,diffz*diffz}; const ValueClass r2[3] = {diffx*diffx,diffy*diffy,diffz*diffz};
const ValueClass one_over_r2 = FMath::One<ValueClass>() / (r2[0] + r2[1] + r2[2] + FMath::ConvertTo<ValueClass,FReal>(_CoreWidth2)); const ValueClass one_over_r2 = ValueClass(1) / (r2[0] + r2[1] + r2[2] + ValueClass(_CoreWidth2));
const ValueClass one_over_r = FMath::Sqrt(one_over_r2); const ValueClass one_over_r = FMath::Sqrt(one_over_r2);
const ValueClass one_over_r3 = one_over_r2*one_over_r; const ValueClass one_over_r3 = one_over_r2*one_over_r;
const ValueClass r[3] = {diffx,diffy,diffz}; const ValueClass r[3] = {diffx,diffy,diffz};
const ValueClass Three = FMath::ConvertTo<ValueClass,FReal>(3.); const ValueClass Three = ValueClass(3.);
const ValueClass MinusOne = - FMath::One<ValueClass>(); const ValueClass MinusOne = - ValueClass(1);
for(unsigned int d=0;d<NCMP;++d){ for(unsigned int d=0;d<NCMP;++d){
unsigned int i = indexTab[d]; unsigned int i = indexTab[d];
......
This diff is collapsed.
This diff is collapsed.
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#cmakedefine SCALFMM_BLAS_ADD_ #cmakedefine SCALFMM_BLAS_ADD_
#cmakedefine SCALFMM_BLAS_UPCASE #cmakedefine SCALFMM_BLAS_UPCASE
#cmakedefine SCALFMM_BLAS_NOCHANGE #cmakedefine SCALFMM_BLAS_NOCHANGE
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// FFT // FFT
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
...@@ -68,20 +67,6 @@ ...@@ -68,20 +67,6 @@
#cmakedefine SCALFMM_USE_STARPU #cmakedefine SCALFMM_USE_STARPU
#cmakedefine SCALFMM_DISABLE_NATIVE_OMP4 #cmakedefine SCALFMM_DISABLE_NATIVE_OMP4
///////////////////////////////////////////////////////
// SSE
///////////////////////////////////////////////////////
#cmakedefine SCALFMM_USE_SSE
#cmakedefine __AVXPE_INTEL_COMPILER
///////////////////////////////////////////////////////
// AVX
///////////////////////////////////////////////////////
#cmakedefine SCALFMM_USE_AVX
#cmakedefine __SSEPE_INTEL_COMPILER
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// EZTRACE // EZTRACE
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
......
// See LICENCE file at project root
#ifndef FAVX_HPP
#define FAVX_HPP
#include "FGlobal.hpp"
#ifndef SCALFMM_USE_AVX
#error The AVX header is included while SCALFMM_USE_AVX is turned OFF
#else
#include <immintrin.h>
#ifdef __AVXPE_INTEL_COMPILER
//Side effect operators DOUBLE
inline __m256d& operator+=(__m256d & a, const __m256d & b){
return (a = _mm256_add_pd (a,b));
}
inline __m256d& operator-=(__m256d& a, const __m256d& b){
return (a = _mm256_sub_pd (a,b));
}
inline __m256d& operator*=(__m256d& a, const __m256d& b){
return (a = _mm256_mul_pd (a,b));
}