Commit 077ce97b authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille

Changing in AVX part, now with AVX and Blas, all the UTEsts and Tests compile finely

parent c4b4aad2
......@@ -129,12 +129,6 @@ public:
const FComplexe* const FRestrict M2L_Outer_transfer){
int index_j_k = 0;
std::cout << "$$$$$$$$$$$$$$$$$ multipole To Local $$$$$$$$$$$$$$$" << std::endl
<< " devM2lP " << this->devM2lP << std::endl ;
for (int i = 0 ; i < this->devM2lP ; ++i ) {
std::cout << " " << M2L_Outer_transfer[i] ;
}
std::cout << std::endl;
// L_j^k
// HPMSTART(51, "M2L computation (loops)");
// j from 0 to P
......@@ -145,11 +139,9 @@ public:
for (int k = 0 ; k <= j ; ++k, ++index_j_k){
// (-1)^n
FReal pow_of_minus_1_for_n = 1.0;
std::cout << "Begin j: " << j << " k: " << k << std::endl;
// work with a local variable
FComplexe L_j_k = local_exp[index_j_k];
L_j_k.setRealImag(0.0, 0.0);
// n from 0 to P or do P-j
// for (int n = 0 ; n <= Parent::devP /*or*/ /*Parent::devP-j*/ ; ++n){
for (int n = 0 ; n <= Parent::devP-j ; ++n){ // faster than double height Parent::devP
......@@ -159,13 +151,11 @@ public:
// Outer_{j+n}^{-k-l} : here points on the M2L transfer function/expansion term of degree j+n and order |-k-l|
const int index_n_j = Parent::harmonic.getPreExpRedirJ(n+j);
// due to the symmetry
FReal pow_of_minus_1_for_l = pow_of_minus_1_for_n; // (-1)^l
// We start with l=n (and not l=-n) so that we always set p_Outer_term to a correct value in the first loop.
int l = n;
for(/* l = n */ ; l >= 0 ; --l){ // we have -k-l<0 and l>0
std::cout << " Loop 1 k "<< k << " n+j " << n+j <<" n " << n << " l " << l << " -k-l " << -k-l << " (-1)^(k+l) " << pow_of_minus_1_for_l * pow_of_minus_1_for_k << " index O " << index_n_j + (k + l)
<< " index M " << index_n + l << std::endl;
for(/* l = n */ ; l > 0 ; --l){ // we have -k-l<0 and l>0
const FComplexe M_n_l = multipole_exp_src[index_n + l];
const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l];
......@@ -175,21 +165,12 @@ public:
L_j_k.incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k *
((M_n_l.getImag() * O_n_j__k_l.getReal()) -
(M_n_l.getReal() * O_n_j__k_l.getImag())));
std::cout << " M_n_l "<< M_n_l << " O_n+j_(-k-l) " << O_n_j__k_l << std::endl;
pow_of_minus_1_for_l = -pow_of_minus_1_for_l;
}
// l= 0
// Now l=-1, we have to conjugate M and O because we store only positive indexes
//
// for(l = 0 ; l <= 1-k; --l){ // we have -k-l<0 and l<=0
// for(/* l = 0 */; l >= -n && (-k< l; --l){ // we have -k-l<0 and l<=0
for(/* l = 0 */; l >= -n && (-k-l) < 0 ; --l){ // we have -k-l<0 and l<=0
std::cout << " Loop 2 k "<< k << " n+j " << n+j <<" n " << n << " l " << l << " -k-l " << -k-l << " (-1)^(k) " << pow_of_minus_1_for_k << " index O " << index_n_j + (k + l)
<< " index M " << index_n - l << std::endl;
const FComplexe M_n_l = multipole_exp_src[index_n - l];
for(/* l = 0 */; l >= -n && (-k-l) < 0 ; --l){ // we have -k-l<0 and l<=0
const FComplexe M_n_l = multipole_exp_src[index_n - l];
const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j + k + l];
L_j_k.incReal( pow_of_minus_1_for_k *
......@@ -198,14 +179,11 @@ public:
L_j_k.decImag( pow_of_minus_1_for_k *
((M_n_l.getImag() * O_n_j__k_l.getReal()) +
(M_n_l.getReal() * O_n_j__k_l.getImag())));
std::cout << " M_n_l "<< M_n_l << " O_n+j_(-k-) " << O_n_j__k_l << std::endl;
pow_of_minus_1_for_l = -pow_of_minus_1_for_l;
}
// pow_of_minus_1_for_l = pow_of_minus_1_for_n ;
// for(l=-k; l <= -n ; --l){ // we have -k-l>=0 and l<=0
for(/*l = -n-1 or l = -k */; l >= -n ; --l){ // we have -k-l>=0 and l<=0
std::cout << " Loop 3 k "<< k <<" n+j " << n+j <<" n " << n << " l " << l << " -k-l " << -k-l << " (-1)^(l) " << pow_of_minus_1_for_l << " index O " << index_n_j - (k + l)
<< " index M " << index_n - l << std::endl;
for(/*l = -n-1 or l = -k-1 */; l >= -n ; --l){ // we have -k-l>=0 and l<=0
const FComplexe M_n_l = multipole_exp_src[index_n - l];
const FComplexe O_n_j__k_l = M2L_Outer_transfer[index_n_j - (k + l)];
......@@ -215,7 +193,6 @@ public:
L_j_k.incImag( pow_of_minus_1_for_l *
((M_n_l.getReal() * O_n_j__k_l.getImag()) -
(M_n_l.getImag() * O_n_j__k_l.getReal())));
std::cout << " M_n_l "<< M_n_l << " O_n+j_(-k-l) " << O_n_j__k_l << std::endl;
pow_of_minus_1_for_l = -pow_of_minus_1_for_l;
}
......@@ -225,7 +202,6 @@ public:
// put in the local vector
local_exp[index_j_k] = L_j_k;
std::cout << " index_j_k "<<index_j_k<< " L_j_k " << L_j_k<< std::endl;
pow_of_minus_1_for_k = -pow_of_minus_1_for_k;
}//k
......
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FMATH_HPP
......@@ -47,302 +47,302 @@ struct FMath{
/** To get absolute value */
template <class NumType>
static NumType Abs(const NumType inV){
return (inV < 0 ? -inV : inV);
return (inV < 0 ? -inV : inV);
}
#ifdef ScalFMM_USE_SSE
static __m128 Abs(const __m128 inV){
return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inV), inV);
return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inV), inV);
}
static __m128d Abs(const __m128d inV){
return _mm_max_pd(_mm_sub_pd(_mm_setzero_pd(), inV), inV);
return _mm_max_pd(_mm_sub_pd(_mm_setzero_pd(), inV), inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Abs(const __m256 inV){
return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inV), inV);
return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inV), inV);
}
static __m256d Abs(const __m256d inV){
return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), inV), inV);
return _mm256_max_pd(_mm256_sub_pd(_mm256_setzero_pd(), inV), inV);
}
#endif
/** To get max between 2 values */
template <class NumType>
static NumType Max(const NumType inV1, const NumType inV2){
return (inV1 > inV2 ? inV1 : inV2);
return (inV1 > inV2 ? inV1 : inV2);
}
/** To get min between 2 values */
template <class NumType>
static NumType Min(const NumType inV1, const NumType inV2){
return (inV1 < inV2 ? inV1 : inV2);
return (inV1 < inV2 ? inV1 : inV2);
}
#ifdef ScalFMM_USE_SSE
static __m128 Max(const __m128 inV1, const __m128 inV2){
return _mm_max_ps(inV1, inV2);
return _mm_max_ps(inV1, inV2);
}
static __m128 Min(const __m128 inV1, const __m128 inV2){
return _mm_min_ps(inV1, inV2);
return _mm_min_ps(inV1, inV2);
}
static __m128d Max(const __m128d inV1, const __m128d inV2){
return _mm_max_pd(inV1, inV2);
return _mm_max_pd(inV1, inV2);
}
static __m128d Min(const __m128d inV1, const __m128d inV2){
return _mm_min_pd(inV1, inV2);
return _mm_min_pd(inV1, inV2);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Max(const __m256 inV1, const __m256 inV2){
return _mm256_max_ps(inV1, inV2);
return _mm256_max_ps(inV1, inV2);
}
static __m256 Min(const __m256 inV1, const __m256 inV2){
return _mm256_min_ps(inV1, inV2);
return _mm256_min_ps(inV1, inV2);
}
static __m256d Max(const __m256d inV1, const __m256d inV2){
return _mm256_max_pd(inV1, inV2);
return _mm256_max_pd(inV1, inV2);
}
static __m256d Min(const __m256d inV1, const __m256d inV2){
return _mm256_min_pd(inV1, inV2);
return _mm256_min_pd(inV1, inV2);
}
#endif
/** To know if 2 values seems to be equal */
template <class NumType>
static bool LookEqual(const NumType inV1, const NumType inV2){
return (Abs(inV1-inV2) < std::numeric_limits<NumType>::epsilon());
//const FReal relTol = FReal(0.00001);
//const FReal absTol = FReal(0.00001);
//return (Abs(inV1 - inV2) <= Max(absTol, relTol * Max(Abs(inV1), Abs(inV2))));
return (Abs(inV1-inV2) < std::numeric_limits<NumType>::epsilon());
//const FReal relTol = FReal(0.00001);
//const FReal absTol = FReal(0.00001);
//return (Abs(inV1 - inV2) <= Max(absTol, relTol * Max(Abs(inV1), Abs(inV2))));
}
/** To know if 2 values seems to be equal */
template <class NumType>
static FReal RelatifDiff(const NumType inV1, const NumType inV2){
return Abs(inV1 - inV2)*Abs(inV1 - inV2)/Max(Abs(inV1*inV1), Abs(inV2*inV2));
return Abs(inV1 - inV2)*Abs(inV1 - inV2)/Max(Abs(inV1*inV1), Abs(inV2*inV2));
}
/** To get floor of a FReal */
static float dfloor(const float inValue){
return floorf(inValue);
return floorf(inValue);
}
static double dfloor(const double inValue){
return floor(inValue);
return floor(inValue);
}
/** To get ceil of a FReal */
static float Ceil(const float inValue){
return ceilf(inValue);
return ceilf(inValue);
}
static double Ceil(const double inValue){
return ceil(inValue);
return ceil(inValue);
}
#if defined(ScalFMM_USE_SSE ) && defined(__SSSE4_1__)
static __m128 dfloor(const __m128 inV){
return _mm_floor_ps(inV);
return _mm_floor_ps(inV);
}
static __m128d dfloor(const __m128d inV){
return _mm_floor_pd(inV);
return _mm_floor_pd(inV);
}
static __m128 Ceil(const __m128 inV){
return _mm_ceil_ps(inV);
return _mm_ceil_ps(inV);
}
static __m128d Ceil(const __m128d inV){
return _mm_ceil_pd(inV);
return _mm_ceil_pd(inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 dfloor(const __m256 inV){
return _mm256_floor_ps(inV);
return _mm256_floor_ps(inV);
}
static __m256d dfloor(const __m256d inV){
return _mm256_floor_pd(inV);
return _mm256_floor_pd(inV);
}
static __m256 Ceil(const __m256 inV){
return _mm256_ceil_ps(inV);
return _mm256_ceil_ps(inV);
}
static __m256d Ceil(const __m256d inV){
return _mm256_ceil_pd(inV);
return _mm256_ceil_pd(inV);
}
#endif
/** To get pow */
static double pow(double x, double y){
return ::pow(x,y);
return ::pow(x,y);
}
static double pow(float x, float y){
return ::powf(x,y);
return ::powf(x,y);
}
template <class NumType>
static NumType pow(const NumType inValue, int power){
NumType result = 1;
while(power-- > 0) result *= inValue;
return result;
NumType result = 1;
while(power-- > 0) result *= inValue;
return result;
}
/** To get pow of 2 */
static int pow2(const int power){
return (1 << power);
return (1 << power);
}
/** To get factorial */
template <class NumType>
static double factorial(int inValue){
if(inValue==0) return NumType(1.);
else {
NumType result = NumType(inValue);
while(--inValue > 1) result *= NumType(inValue);
return result;
}
if(inValue==0) return NumType(1.);
else {
NumType result = NumType(inValue);
while(--inValue > 1) result *= NumType(inValue);
return result;
}
}
/** To know if a value is between two others */
template <class NumType>
static bool Between(const NumType inValue, const NumType inMin, const NumType inMax){
return ( inMin <= inValue && inValue < inMax );
return ( inMin <= inValue && inValue < inMax );
}
/** To get sqrt of a FReal */
static float Sqrt(const float inValue){
return sqrtf(inValue);
return sqrtf(inValue);
}
static double Sqrt(const double inValue){
return sqrt(inValue);
return sqrt(inValue);
}
static float Rsqrt(const float inValue){
return float(1.0)/sqrtf(inValue);
return float(1.0)/sqrtf(inValue);
}
static double Rsqrt(const double inValue){
return 1.0/sqrt(inValue);
return 1.0/sqrt(inValue);
}
#ifdef ScalFMM_USE_SSE
static __m128 Sqrt(const __m128 inV){
return _mm_sqrt_ps(inV);
return _mm_sqrt_ps(inV);
}
static __m128d Sqrt(const __m128d inV){
return _mm_sqrt_pd(inV);
return _mm_sqrt_pd(inV);
}
static __m128 Rsqrt(const __m128 inV){
return _mm_rsqrt_ps(inV);
return _mm_rsqrt_ps(inV);
}
static __m128d Rsqrt(const __m128d inV){
return _mm_set_pd1(1.0) / _mm_sqrt_pd(inV);
return _mm_set_pd1(1.0) / _mm_sqrt_pd(inV);
}
#endif
#ifdef ScalFMM_USE_AVX
static __m256 Sqrt(const __m256 inV){
return _mm256_sqrt_ps(inV);
return _mm256_sqrt_ps(inV);
}
static __m256d Sqrt(const __m256d inV){
return _mm256_sqrt_pd(inV);
return _mm256_sqrt_pd(inV);
}
static __m256 Rsqrt(const __m256 inV){
return _mm256_rsqrt_ps(inV);
return _mm256_rsqrt_ps(inV);
}
static __m256d Rsqrt(const __m256d inV){
return _mm256_set_pd1(1.0) / _mm256_sqrt_pd(inV);
return _mm256_set1_pd(1.0) / _mm256_sqrt_pd(inV);
}
#endif
/** To get Log of a FReal */
static float Log(const float inValue){
return logf(inValue);
return logf(inValue);
}
static double Log(const double inValue){
return log(inValue);
return log(inValue);
}
/** To get Log2 of a FReal */
static float Log2(const float inValue){
return log2f(inValue);
return log2f(inValue);
}
static double Log2(const double inValue){
return log2(inValue);
return log2(inValue);
}
/** To get atan2 of a 2 FReal, The return value is given in radians and is in the
range -pi to pi, inclusive. */
static float Atan2(const float inValue1,const float inValue2){
return atan2f(inValue1,inValue2);
return atan2f(inValue1,inValue2);
}
static double Atan2(const double inValue1,const double inValue2){
return atan2(inValue1,inValue2);
return atan2(inValue1,inValue2);
}
/** To get sin of a FReal */
static float Sin(const float inValue){
return sinf(inValue);
return sinf(inValue);
}
static double Sin(const double inValue){
return sin(inValue);
return sin(inValue);
}
/** To get asinf of a float. The result is in the range [0, pi]*/
static float ASin(const float inValue){
return asinf(inValue);
return asinf(inValue);
}
/** To get asinf of a double. The result is in the range [0, pi]*/
static double ASin(const double inValue){
return asin(inValue);
return asin(inValue);
}
/** To get cos of a FReal */
static float Cos(const float inValue){
return cosf(inValue);
return cosf(inValue);
}
static double Cos(const double inValue){
return cos(inValue);
return cos(inValue);
}
/** To get arccos of a float. The result is in the range [0, pi]*/
static float ACos(const float inValue){
return acosf(inValue);
return acosf(inValue);
}
/** To get arccos of a double. The result is in the range [0, pi]*/
static double ACos(const double inValue){
return acos(inValue);
return acos(inValue);
}
/** To get atan2 of a 2 FReal */
static float Fmod(const float inValue1,const float inValue2){
return fmodf(inValue1,inValue2);
return fmodf(inValue1,inValue2);
}
/** return the floating-point remainder of inValue1 / inValue2 */
static double Fmod(const double inValue1,const double inValue2){
return fmod(inValue1,inValue2);
return fmod(inValue1,inValue2);
}
/** To know if a variable is nan, based on the C++0x */
template <class TestClass>
static bool IsNan(const TestClass& value){
//volatile const TestClass* const pvalue = &value;
//return (*pvalue) != value;
return std::isnan(value);
//volatile const TestClass* const pvalue = &value;
//return (*pvalue) != value;
return std::isnan(value);
}
/** To know if a variable is not inf, based on the C++0x */
template <class TestClass>
static bool IsFinite(const TestClass& value){
// return !(value <= std::numeric_limits<T>::min()) && !(std::numeric_limits<T>::max() <= value);
return std::isfinite(value);
// return !(value <= std::numeric_limits<T>::min()) && !(std::numeric_limits<T>::max() <= value);
return std::isfinite(value);
}
template <class NumType>
......@@ -357,98 +357,98 @@ struct FMath{
/** A class to compute accuracy */
class FAccurater {
int nbElements;
FReal l2Dot;
FReal l2Diff;
FReal max;
FReal maxDiff;
int nbElements;
FReal l2Dot;
FReal l2Diff;
FReal max;
FReal maxDiff;
public:
FAccurater() : nbElements(0),l2Dot(0.0), l2Diff(0.0), max(0.0), maxDiff(0.0) {
}
/** with inital values */
FAccurater(const FReal inGood[], const FReal inBad[], const int nbValues)
: nbElements(0),l2Dot(0.0), l2Diff(0.0), max(0.0), maxDiff(0.0) {
add(inGood, inBad, nbValues);
}
/** Add value to the current list */
void add(const FReal inGood, const FReal inBad){
l2Diff += (inBad - inGood) * (inBad - inGood);
l2Dot += inGood * inGood;
max = Max(max , Abs(inGood));
maxDiff = Max(maxDiff, Abs(inGood-inBad));
nbElements += 1 ;
}
/** Add array of values */
void add(const FReal inGood[], const FReal inBad[], const int nbValues){
for(int idx = 0 ; idx < nbValues ; ++idx){
add(inGood[idx],inBad[idx]);
}
nbElements += nbValues ;
}
/** Add an accurater*/
void add(const FAccurater& inAcc){
l2Diff += inAcc.getl2Diff();
l2Dot += inAcc.getl2Dot();
max = Max(max,inAcc.getmax());
maxDiff = Max(maxDiff,inAcc.getInfNorm());
nbElements += inAcc.getNbElements();
}
FReal getl2Diff() const{
return l2Diff;
}
FReal getl2Dot() const{
return l2Dot;
}
FReal getmax() const{
return max;
}
int getNbElements() const{
return nbElements;
}
void setNbElements(const int & n) {
nbElements = n;
}
/** Get the root mean squared error*/
FReal getL2Norm() const{
return Sqrt(l2Diff );
}
/** Get the L2 norm */
FReal getRMSError() const{
return Sqrt(l2Diff /static_cast<FReal>(nbElements));
}
/** Get the inf norm */
FReal getInfNorm() const{
return maxDiff;
}
/** Get the L2 norm */
FReal getRelativeL2Norm() const{
return Sqrt(l2Diff / l2Dot);
}
/** Get the inf norm */
FReal getRelativeInfNorm() const{
return maxDiff / max;
}
/** Print */
template <class StreamClass>
friend StreamClass& operator<<(StreamClass& output, const FAccurater& inAccurater){
output << "[Error] Relative L2Norm = " << inAccurater.getRelativeL2Norm() << " \t RMS Norm = " << inAccurater.getRMSError() << " \t Relative Inf = " << inAccurater.getRelativeInfNorm();
return output;
}
void reset()
{
l2Dot = FReal(0);
l2Diff = FReal(0);;
max = FReal(0);
maxDiff = FReal(0);
nbElements = 0 ;
}
FAccurater() : nbElements(0),l2Dot(0.0), l2Diff(0.0), max(0.0), maxDiff(0.0) {
}
/** with inital values */
FAccurater(const FReal inGood[], const FReal inBad[], const int nbValues)
: nbElements(0),l2Dot(0.0), l2Diff(0.0), max(0.0), maxDiff(0.0) {
add(inGood, inBad, nbValues);
}
/** Add value to the current list */
void add(const FReal inGood, const FReal inBad){
l2Diff += (inBad - inGood) * (inBad - inGood);
l2Dot += inGood * inGood;
max = Max(max , Abs(inGood));
maxDiff = Max(maxDiff, Abs(inGood-inBad));
nbElements += 1 ;
}
/** Add array of values */
void add(const FReal inGood[], const FReal inBad[], const int nbValues){
for(int idx = 0 ; idx < nbValues ; ++idx){
add(inGood[idx],inBad[idx]);
}
nbElements += nbValues ;
}
/** Add an accurater*/
void add(const FAccurater& inAcc){
l2Diff += inAcc.getl2Diff();
l2Dot += inAcc.getl2Dot();
max = Max(max,inAcc.getmax());
maxDiff = Max(maxDiff,inAcc.getInfNorm());
nbElements += inAcc.getNbElements();
}
FReal getl2Diff() const{
return l2Diff;
}
FReal getl2Dot() const{
return l2Dot;
}
FReal getmax() const{
return max;
}
int getNbElements() const{
return nbElements;
}
void setNbElements(const int & n) {
nbElements = n;
}
/** Get the root mean squared error*/
FReal getL2Norm() const{
return Sqrt(l2Diff );
}
/** Get the L2 norm */
FReal getRMSError() const{
return Sqrt(l2Diff /static_cast<FReal>(nbElements));
}
/** Get the inf norm */
FReal getInfNorm() const{
return maxDiff;
}
/** Get the L2 norm */
FReal getRelativeL2Norm() const{
return Sqrt(l2Diff / l2Dot);
}
/** Get the inf norm */
FReal getRelativeInfNorm() const{
return maxDiff / max;
}
/** Print */
template <class StreamClass>
friend StreamClass& operator<<(StreamClass& output, const FAccurater& inAccurater){
output << "[Error] Relative L2Norm = " << inAccurater.getRelativeL2Norm() << " \t RMS Norm = " << inAccurater.getRMSError() << " \t Relative Inf = " << inAccurater.getRelativeInfNorm();
return output;
}
void reset()
{
l2Dot = FReal(0);
l2Diff = FReal(0);;
max = FReal(0);
maxDiff = FReal(0);
nbElements = 0 ;
}
};
};
......@@ -517,12 +517,12 @@ inline __m128d FMath::ConvertTo<__m128d,double>(const double val){
#ifdef ScalFMM_USE_AVX
template <>
inline __m256 FMath::One<__m256>(){
return _mm256_set_ps1(1.0);
return _mm256_set1_ps(1.0);
}
template <>
inline __m256d FMath::One<__m256d>(){
return _mm256_set_pd1(1.0);
return _mm256_set1_pd(1.0);
}
template <>
......@@ -537,12 +537,12 @@ inline __m256d FMath::Zero<__m256d>(){
template <>
inline __m256 FMath::ConvertTo<__m256,float>(const float val){
return _mm256_set_ps1(val);
return _mm256_set1_ps(val);
}
template <>
inline __m256d FMath::ConvertTo<__m256d,double>(const double val){
return _mm256_set_pd1(val);
return _mm256_set1_pd(val);
}
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment