Commit b126bba0 authored by Berenger Bramas's avatar Berenger Bramas

Merge branch 'use-inastemp-submodule' into 'develop'

Use inastemp

See merge request solverstack/ScalFMM!15
parents 2c5546a4 d9cc3c50
[submodule "CMakeModules/morse_cmake"]
path = CMakeModules/morse_cmake
url = https://gitlab.inria.fr/solverstack/morse_cmake.git
[submodule "inastemp"]
path = inastemp
url = https://gitlab.inria.fr/coulaud/inastemp
This diff is collapsed.
###########################################################################################
# Berenger Bramas Inria
# This goes with the getCpuInfos.cpp
# This will create one CMAKE value per output option from the cpp file.
# For example the output of the CPP file can be:
# SSE3=TRUE;AVX=FALSE
# Then it will create:
# CPUOPTION_SSE3 = TRUE
# CPUOPTION_AVX = FALSE
#
# The binary should return 0 on success.
###########################################################################################
macro(GetCpuInfos)
# The original CPP file
set(GetCpuInfosFile "${PROJECT_SOURCE_DIR}/CMakeModules/getCpuInfos.cpp")
# Fatal error if the file does not exist
if(NOT EXISTS ${GetCpuInfosFile})
message(FATAL_ERROR "The GetCpuInfosFile does not exist (${GetCpuInfosFile})")
endif()
# Compile and execute the file
try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
${CMAKE_BINARY_DIR} ${GetCpuInfosFile} # [CMAKE_FLAGS <Flags>] [COMPILE_DEFINITIONS <flags>]
COMPILE_OUTPUT_VARIABLE comp
RUN_OUTPUT_VARIABLE run)
# If it has successfuly compiled an run
if(COMPILE_RESULT_VAR AND (RUN_RESULT_VAR EQUAL 0) )
set( CPU_OPTIONS ${run} )
# For each value
foreach(optionNode ${run})
# Get name and value
string(REPLACE "=" ";" optionNameAndValue ${optionNode})
list(LENGTH optionNameAndValue optionLength)
# If we get both
if(optionLength EQUAL 2)
list(GET optionNameAndValue 0 optionName)
list(GET optionNameAndValue 1 optionValue)
# create cmake variable
set(CPUOPTION_${optionName} ${optionValue})
else()
message(WARNING "GetCpuInfosFile wrong format for ${optionNode}.")
endif()
endforeach()
# output the sentence from the binrary
message(STATUS "CPUOPTION : ${CPU_OPTIONS}")
else()
message(WARNING "GetCpuInfosFile did not return correctly.")
endif()
endmacro(GetCpuInfos)
#include "immintrin.h"
int main() {
#ifdef __MIC__
__m512 tx, ty ;
tx += ty ;
#endif
return 0;
}
#include "immintrin.h"
int main() {
__m256d tx, ty ;
tx += ty ;
return 0;
}
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
#ifdef __SSSE3__
#include <tmmintrin.h> //SSSE3
#endif
#ifdef __SSSE4_1__
#include <smmintrin.h> // SSE4
#endif
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
{
__m256d res0d, res1d;
res0d = _mm256_hadd_pd(res0d, res1d);
__m256 res0, res1;
res0 = _mm256_hadd_ps(res0, res1);
}
{
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
}
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
{
#ifdef __MIC__
__m512d res0d, res1d;
res0d = _mm512_hadd_pd(res0d, res1d);
__m512 res0, res1;
res0 = _mm512_hadd_ps(res0, res1);
#endif
}
{
__m256d res0d, res1d;
res0d = _mm256_hadd_pd(res0d, res1d);
__m256 res0, res1;
res0 = _mm256_hadd_ps(res0, res1);
}
{
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
}
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#ifdef __SSSE3__
#include <tmmintrin.h> //SSSE3
#endif
#ifdef __SSSE4_1__
#include <smmintrin.h> // SSE4
#endif
int main(){
__m128d res0d, res1d;
res0d = _mm_hadd_pd(res0d, res1d);
__m128 res0, res1;
res0 = _mm_hadd_ps(res0, res1);
return 0;
}
This diff is collapsed.
......@@ -30,7 +30,7 @@ The following are optional:
### Get and Build ScalFMM
To use last development states of ScalFMM, please clone the develop
branch. Note that ScalFMM contains a git submodule `morse_cmake`.
branch. Note that ScalFMM contains two git submodules `morse_cmake` and `inastemp`.
To get sources please use these commands:
``` bash
git clone --recursive git@gitlab.inria.fr:solverstack/ScalFMM.git -b develop
......
......@@ -2,8 +2,7 @@
#define FADAPTCHEBKERNEL_HPP
#include "Kernels/Chebyshev/FChebSymKernel.hpp"
#include "FComputeClassDescriptor.hpp"
#include "InastempCompileConfig.h"
#ifdef _OPENMP
#include <omp.h>
......@@ -69,8 +68,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -82,57 +81,35 @@ public:
FChebTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
for (unsigned int m = 0; m<FBase::nnodes; ++m) {
ComputeClass XX = FMath::ConvertTo<ComputeClass>(X[m].getX());
ComputeClass XY = FMath::ConvertTo<ComputeClass>(X[m].getY());
ComputeClass XZ = FMath::ConvertTo<ComputeClass>(X[m].getZ());
ComputeClass XX = ComputeClass(X[m].getX());
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
std::size_t idxPart = 0;
// Compute using vectorization for all but the last array elements
ComputeClass tmpLocalExp = FMath::Zero<ComputeClass>();
for (;
idxPart < ((particles->getNbParticles())
/ FRealCount);
++idxPart)
ComputeClass tmpLocalExp = ComputeClass::GetZero();
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += FMath::ConvertTo<FReal>(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -143,8 +120,8 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
const FReal poleCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -156,44 +133,45 @@ public:
FChebTensor<FReal,ORDER>::setRoots(poleCellCenter, poleCellWidth, Y);
// read positions
const ComputeClass* const posX = (const ComputeClass* const)(particles->getPositions()[0]);
const ComputeClass* const posY = (const ComputeClass* const)(particles->getPositions()[1]);
const ComputeClass* const posZ = (const ComputeClass* const)(particles->getPositions()[2]);
const FReal* const posX = (particles->getPositions()[0]);
const FReal* const posY = (particles->getPositions()[1]);
const FReal* const posZ = (particles->getPositions()[2]);
// get potential
ComputeClass* const physVal = (ComputeClass* const)(particles->getPhysicalValues());
ComputeClass* const potentials = (ComputeClass* const)(particles->getPotentials());
ComputeClass* const fx = (ComputeClass* const)(particles->getForcesX());
ComputeClass* const fy = (ComputeClass* const)(particles->getForcesY());
ComputeClass* const fz = (ComputeClass* const)(particles->getForcesZ());
FReal* const physVal = (particles->getPhysicalValues());
FReal* const potentials = (particles->getPotentials());
FReal* const fx = (particles->getForcesX());
FReal* const fy = (particles->getForcesY());
FReal* const fz = (particles->getForcesZ());
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// apply M2P
for (unsigned int n=0; n<FBase::nnodes; ++n){
ComputeClass MultipoleExpansion =
FMath::ConvertTo<ComputeClass, FReal>(pole->get(idxRhs)[n]);
ComputeClass(pole->get(idxRhs)[n]);
ComputeClass YX = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getX());
ComputeClass YY = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getY());
ComputeClass YZ = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getZ());
ComputeClass YX = ComputeClass(Y[n].getX());
ComputeClass YY = ComputeClass(Y[n].getY());
ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1)
/ FRealCount);
++idxPart)
idxPart < particles->getNbParticles();
idxPart += FRealCount)
{
ComputeClass Kxy[1];
ComputeClass dKxy[3];
FBase::MatrixKernel->evaluateBlockAndDerivative(
posX[idxPart], posY[idxPart], posZ[idxPart],
ComputeClass(&posX[idxPart]),
ComputeClass(&posY[idxPart]),
ComputeClass(&posZ[idxPart]),
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
}// Particles
}// NVALS
......
......@@ -4,11 +4,10 @@
#include <cassert>
#include "Kernels/Uniform/FUnifKernel.hpp"
#include "InastempCompileConfig.h"
#include "Utils/FMath.hpp"
#include "FComputeClassDescriptor.hpp"
#include <fstream>
......@@ -114,8 +113,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -127,55 +126,36 @@ public:
FUnifTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
for (unsigned int m = 0; m < FBase::nnodes; ++m) {
ComputeClass XX = FMath::ConvertTo<ComputeClass>(X[m].getX());
ComputeClass XY = FMath::ConvertTo<ComputeClass>(X[m].getY());
ComputeClass XZ = FMath::ConvertTo<ComputeClass>(X[m].getZ());
ComputeClass XX = ComputeClass(X[m].getX());
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
ComputeClass tmpLocalExp = FMath::Zero<ComputeClass>();
ComputeClass tmpLocalExp = ComputeClass::GetZero();
// Compute using vectorization for all but the last array elements
std::size_t idxPart = 0;
for (; idxPart < (particles->getNbParticles() / FRealCount);
++idxPart)
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += FMath::ConvertTo<FReal>(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -191,8 +171,8 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*target_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
const FReal poleCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -204,16 +184,16 @@ public:
FUnifTensor<FReal,ORDER>::setRoots(poleCellCenter, poleCellWidth, Y);
// read positions
const ComputeClass* const posX = (const ComputeClass* const)(particles->getPositions()[0]);
const ComputeClass* const posY = (const ComputeClass* const)(particles->getPositions()[1]);
const ComputeClass* const posZ = (const ComputeClass* const)(particles->getPositions()[2]);
const FReal* const posX = (particles->getPositions()[0]);
const FReal* const posY = (particles->getPositions()[1]);
const FReal* const posZ = (particles->getPositions()[2]);
// get potential
ComputeClass* const physVal = (ComputeClass* const)(particles->getPhysicalValues());
ComputeClass* const potentials = (ComputeClass* const)(particles->getPotentials());
ComputeClass* const fx = (ComputeClass* const)(particles->getForcesX());
ComputeClass* const fy = (ComputeClass* const)(particles->getForcesY());
ComputeClass* const fz = (ComputeClass* const)(particles->getForcesZ());
FReal* const physVal = (particles->getPhysicalValues());
FReal* const potentials = (particles->getPotentials());
FReal* const fx = (particles->getForcesX());
FReal* const fy = (particles->getForcesY());
FReal* const fz = (particles->getForcesZ());
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -221,29 +201,30 @@ public:
for (unsigned int n=0; n<FBase::nnodes; ++n){
ComputeClass MultipoleExpansion =
FMath::ConvertTo<ComputeClass, FReal>(pole->get(idxRhs)[n]);
ComputeClass(pole->get(idxRhs)[n]);
ComputeClass YX = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getX());
ComputeClass YY = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getY());
ComputeClass YZ = FMath::ConvertTo<ComputeClass, FReal>(Y[n].getZ());
ComputeClass YX = ComputeClass(Y[n].getX());
ComputeClass YY = ComputeClass(Y[n].getY());
ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1)
/ FRealCount);
++idxPart)
idxPart < particles->getNbParticles();
idxPart += FRealCount)
{
ComputeClass Kxy[1];
ComputeClass dKxy[3];
FBase::MatrixKernel->evaluateBlockAndDerivative(
posX[idxPart], posY[idxPart], posZ[idxPart],
ComputeClass(&posX[idxPart]),
ComputeClass(&posY[idxPart]),
ComputeClass(&posZ[idxPart]),
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
......
#ifndef FCOMPUTECLASSDESCRIPTOR_HPP
#define FCOMPUTECLASSDESCRIPTOR_HPP
template<typename FReal>
struct ComputeClassDescriptor {};
template<>
struct ComputeClassDescriptor<double> {
#if 0 // for easy macro reordering
#elif defined SCALFMM_USE_SSE
using type = __m128d;
enum {count = 2};
#elif defined SCALFMM_USE_AVX
using type = __m256d;
enum {count = 4};
#elif defined SCALFMM_USE_AVX2
using type = __m512d;
enum {count = 8};
#else
using type = double;
enum {count = 1};
#endif
};
template<>
struct ComputeClassDescriptor<float> {
#if 0
#elif defined SCALFMM_USE_SSE
using type = __m128;
enum {count = 4};
#elif defined SCALFMM_USE_AVX
using type = __m256;
enum {count = 8};
#elif defined SCALFMM_USE_AVX2
using type = __m512;
enum {count = 16};
#else
using type = float;
enum {count = 1};
#endif
};
#endif /* FCOMPUTECLASSDESCRIPTOR_HPP */
......@@ -89,7 +89,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
return ValueClass(1) / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
}
// evaluate interaction (blockwise)
......@@ -110,7 +110,7 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
const ValueClass one_over_r = FMath::One<ValueClass>() / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
const ValueClass one_over_r = ValueClass(1) / FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz);
const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r;
......@@ -176,9 +176,9 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{
const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FMath::Sqrt(FMath::ConvertTo<ValueClass,FReal>(LX)*diffx*diffx +
FMath::ConvertTo<ValueClass,FReal>(LY)*diffy*diffy +
FMath::ConvertTo<ValueClass,FReal>(LZ)*diffz*diffz);
return ValueClass(1) / FMath::Sqrt(ValueClass(LX)*diffx*diffx +
ValueClass(LY)*diffy*diffy +
ValueClass(LZ)*diffz*diffz);
}
void setCoeff(const FReal& a, const FReal& b, const FReal& c)
{LX= a*a ; LY = b*b ; LZ = c *c;}
......@@ -208,16 +208,16 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR<FReal>{
const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
const ValueClass one_over_rL = FMath::One<ValueClass>() / FMath::Sqrt(FMath::ConvertTo<ValueClass,FReal>(LX)*diffx*diffx +
FMath::ConvertTo<ValueClass,FReal>(LY)*diffy*diffy +
FMath::ConvertTo<ValueClass,FReal>(LZ)*diffz*diffz);
const ValueClass one_over_rL = ValueClass(1) / (ValueClass(LX)*diffx*diffx +
ValueClass(LY)*diffy*diffy +
ValueClass(LZ)*diffz*diffz);
const ValueClass one_over_rL3 = one_over_rL*one_over_rL*one_over_rL;
block[0] = one_over_rL;
blockDerivative[0] = FMath::ConvertTo<ValueClass,FReal>(LX) * one_over_rL3 * diffx;
blockDerivative[1] = FMath::ConvertTo<ValueClass,FReal>(LY)* one_over_rL3 * diffy;
blockDerivative[2] = FMath::ConvertTo<ValueClass,FReal>(LZ)* one_over_rL3 * diffz;
blockDerivative[0] = ValueClass(LX) * one_over_rL3 * diffx;
blockDerivative[1] = ValueClass(LY)* one_over_rL3 * diffy;
blockDerivative[2] = ValueClass(LZ)* one_over_rL3 * diffz;
}
......@@ -283,7 +283,7 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffx = (xt-xs);
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
return FMath::One<ValueClass>() / FReal(diffx*diffx+diffy*diffy+diffz*diffz);
return ValueClass(1) / FReal(diffx*diffx+diffy*diffy+diffz*diffz);
}
// evaluate interaction (blockwise)
......@@ -305,12 +305,12 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel<FReal>
const ValueClass diffy = (yt-ys);
const ValueClass diffz = (zt-zs);
const ValueClass r2 = (diffx*diffx+diffy*diffy+diffz*diffz);
const ValueClass one_over_r2 = FMath::One<ValueClass>() / (r2);
const ValueClass one_over_r2 = ValueClass(1) / (r2);
const ValueClass one_over_r4 = one_over_r2*one_over_r2;
block[0] = one_over_r2;
const ValueClass coef = FMath::ConvertTo<ValueClass,FReal>(-2.) * one_over_r4;
const ValueClass coef = ValueClass(-2.) * one_over_r4;