Commit aabb91bc authored by Berenger Bramas's avatar Berenger Bramas

Remove for all vector types the operators on simd vector type (and explicitely...

Remove for all vector types the operators on simd vector type (and explicitely use operand in the inastemp classes)
parent 96ce4479
#include "immintrin.h"
int main() {
__m256d tx, ty ;
tx += ty ;
return 0;
}
#include "immintrin.h"
int main() {
#ifdef __MIC__
__m512 tx, ty ;
tx += ty ;
#endif
return 0;
}
#include "immintrin.h"
int main() {
__m512d tx, ty ;
tx += ty ;
return 0;
}
#include "immintrin.h"
int main() {
__m512d tx, ty ;
tx += ty ;
return 0;
}
#include "immintrin.h"
int main() {
__m512d tx, ty ;
tx += ty ;
return 0;
}
......@@ -13,15 +13,11 @@ string(TOUPPER ${TYPE} UTYPE)
# The original CPP file
set(checkTypeFile "${PROJECT_SOURCE_DIR}/CMakeModules/${UTYPE}/compileTest${UTYPE}.cpp")
set(checkPeFile "${PROJECT_SOURCE_DIR}/CMakeModules/${UTYPE}/check${UTYPE}pe.cpp")
# Fatal error if the file does not exist
if(NOT EXISTS ${checkTypeFile})
message(FATAL_ERROR "The GetCompilerInfosFile does not exist (${checkTypeFile})")
endif()
if(NOT EXISTS ${checkPeFile})
message(FATAL_ERROR "The GetCompilerInfosFile does not exist (${checkPeFile})")
endif()
try_compile(COMPILE_RESULT ${CMAKE_CURRENT_BINARY_DIR}
......@@ -35,25 +31,6 @@ if(${COMPILE_RESULT})
if($ENV{VERBOSE})
message(STATUS "GetCompilerInfos -- The compiler can compile ${TYPE}")
endif()
try_compile(COMPILE_RESULT_PE ${CMAKE_CURRENT_BINARY_DIR}
${checkPeFile}
COMPILE_DEFINITIONS "-Wno-error ${${UTYPE}_FLAGS}"
OUTPUT_VARIABLE COMPILE_OUTPUT_PE)
if(${COMPILE_RESULT_PE})
set(COMPILER_INFO_${UTYPE}_NOOP OFF)
if($ENV{VERBOSE})
message(STATUS "GetCompilerInfos -- The compiler has operators for ${TYPE}")
endif()
else()
set(COMPILER_INFO_${UTYPE}_NOOP ON)
if($ENV{VERBOSE})
message(STATUS "GetCompilerInfos -- The compiler needs operators for ${TYPE} : ${COMPILE_OUTPUT_PE}")
endif()
endif()
else()
set(COMPILER_INFO_${UTYPE} OFF)
......
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
#include <tmmintrin.h> //SSSE3
#include <smmintrin.h> //SSE41
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
#include <tmmintrin.h> //SSSE3
#include <smmintrin.h> //SSE41
#include <nmmintrin.h> //SSE42
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
#include <xmmintrin.h> // SSE
#include <emmintrin.h> //SSE2
#include <pmmintrin.h> //SSE3
#include <tmmintrin.h> //SSSE3
int main() {
__m128d tx, ty ;
tx += ty ;
return 0;
}
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAAVXOPERATORS_HPP
#define INAAVXOPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_AVX
#error InaAVXOperators is included but AVX is not enable in the configuration
#endif
#include <immintrin.h>
#include <cmath>
#ifdef INASTEMP_USE_AVX_OPERATORS
//Side effect operators DOUBLE
inline __m256d& operator+=(__m256d& a, const __m256d& b) {
return (a = _mm256_add_pd(a, b));
}
inline __m256d& operator-=(__m256d& a, const __m256d& b) {
return (a = _mm256_sub_pd(a, b));
}
inline __m256d& operator*=(__m256d& a, const __m256d& b) {
return (a = _mm256_mul_pd(a, b));
}
inline __m256d& operator/=(__m256d& a, const __m256d& b) {
return (a = _mm256_div_pd(a, b));
}
//No side effect operators DOUBLE
inline __m256d operator+(const __m256d& a, const __m256d& b) {
return _mm256_add_pd(a, b);
}
inline __m256d operator-(const __m256d& a, const __m256d& b) {
return _mm256_sub_pd(a, b);
}
inline __m256d operator*(const __m256d& v1, const __m256d& v2) {
return _mm256_mul_pd(v1, v2);
}
inline __m256d operator/(const __m256d& v1, const __m256d& v2) {
return _mm256_div_pd(v1, v2);
}
//Side effect operators SINGLE
inline __m256& operator+=(__m256& a, const __m256& b) {
return (a = _mm256_add_ps(a, b));
}
inline __m256& operator-=(__m256& a, const __m256& b) {
return (a = _mm256_sub_ps(a, b));
}
inline __m256& operator*=(__m256& a, const __m256& b) {
return (a = _mm256_mul_ps(a, b));
}
inline __m256& operator/=(__m256& a, const __m256& b) {
return (a = _mm256_div_ps(a, b));
}
//No side effect operators SINGLE
inline __m256 operator+(const __m256& a, const __m256& b) {
return _mm256_add_ps(a, b);
}
inline __m256 operator-(const __m256& a, const __m256& b) {
return _mm256_sub_ps(a, b);
}
inline __m256 operator*(const __m256& v1, const __m256& v2) {
return _mm256_mul_ps(v1, v2);
}
inline __m256 operator/(const __m256& v1, const __m256& v2) {
return _mm256_div_ps(v1, v2);
}
#endif
#endif
......@@ -6,7 +6,6 @@
#define INAVECAVXDOUBLE_HPP
#include "InastempConfig.h"
#include "InaAVXOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
......@@ -281,19 +280,23 @@ public:
const __m256d COEFF_P5_E = _mm256_set1_pd(double(InaFastExp::GetCoefficient9_1()));
const __m256d COEFF_P5_F = _mm256_set1_pd(double(InaFastExp::GetCoefficient9_0()));
__m256d x = vec * COEFF_LOG2E;
__m256d x = _mm256_mul_pd(vec, COEFF_LOG2E);
const __m256d fractional_part = x - InaVecAVX(x).floor().vec;
const __m256d fractional_part = _mm256_sub_pd(x, InaVecAVX(x).floor().vec);
__m256d factor = ((((((((COEFF_P5_X * fractional_part + COEFF_P5_Y)
* fractional_part + COEFF_P5_Z) * fractional_part + COEFF_P5_A)
* fractional_part + COEFF_P5_B) * fractional_part + COEFF_P5_C)
* fractional_part + COEFF_P5_D) * fractional_part + COEFF_P5_E)
* fractional_part + COEFF_P5_F);
__m256d factor = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(
_mm256_mul_pd(_mm256_add_pd( _mm256_mul_pd(_mm256_add_pd(
_mm256_mul_pd(_mm256_add_pd( _mm256_mul_pd(_mm256_add_pd(
_mm256_mul_pd(_mm256_add_pd( _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(
COEFF_P5_X, fractional_part), COEFF_P5_Y), fractional_part),
COEFF_P5_Z),fractional_part), COEFF_P5_A), fractional_part),
COEFF_P5_B), fractional_part), COEFF_P5_C),fractional_part),
COEFF_P5_D), fractional_part), COEFF_P5_E),fractional_part),
COEFF_P5_F);
x -= factor;
x = _mm256_sub_pd(x,factor);
x = (COEFF_A * x + COEFF_B);
x = _mm256_add_pd(_mm256_mul_pd(COEFF_A, x), COEFF_B);
__m128d valupper = _mm256_extractf128_pd(x, 1);
__m128d vallower = _mm256_castpd256_pd128(x);
......@@ -317,17 +320,20 @@ public:
const __m256d COEFF_P5_E = _mm256_set1_pd(double(InaFastExp::GetCoefficient4_1()));
const __m256d COEFF_P5_F = _mm256_set1_pd(double(InaFastExp::GetCoefficient4_0()));
__m256d x = vec * COEFF_LOG2E;
__m256d x = _mm256_mul_pd(vec, COEFF_LOG2E);
const __m256d fractional_part = x - InaVecAVX(x).floor().vec;
const __m256d fractional_part = _mm256_sub_pd(x, InaVecAVX(x).floor().vec);
__m256d factor = (((COEFF_P5_C * fractional_part + COEFF_P5_D)
* fractional_part + COEFF_P5_E)
* fractional_part + COEFF_P5_F);
__m256d factor = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(
_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(
COEFF_P5_C, fractional_part),
COEFF_P5_D), fractional_part),
COEFF_P5_E), fractional_part),
COEFF_P5_F);
x -= factor;
x = _mm256_sub_pd(x,factor);
x = (COEFF_A * x + COEFF_B);
x = _mm256_add_pd(_mm256_mul_pd(COEFF_A, x), COEFF_B);
__m128d valupper = _mm256_extractf128_pd(x, 1);
__m128d vallower = _mm256_castpd256_pd128(x);
......@@ -562,7 +568,7 @@ public:
}
inline InaVecAVX<double> pow(size_t power) const{
return InaUtils::FastPow<InaVecAVX<double>>(vec, power);
return InaUtils::FastPow<InaVecAVX<double>>(*this, power);
}
};
......
......@@ -6,7 +6,6 @@
#define INAVECAVXFLOAT_HPP
#include "InastempConfig.h"
#include "InaAVXOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
......@@ -290,19 +289,18 @@ public:
const __m256 COEFF_P5_E = _mm256_set1_ps(float(InaFastExp::GetCoefficient6_1()));
const __m256 COEFF_P5_F = _mm256_set1_ps(float(InaFastExp::GetCoefficient6_0()));
__m256 x = vec * COEFF_LOG2E;
__m256 x = _mm256_mul_ps(vec, COEFF_LOG2E);
const __m256 fractional_part = x - InaVecAVX(x).floor().vec;
const __m256 fractional_part = _mm256_sub_ps(x, InaVecAVX(x).floor().vec);
__m256 factor = (((((COEFF_P5_A * fractional_part + COEFF_P5_B)
* fractional_part + COEFF_P5_C)
* fractional_part + COEFF_P5_D)
* fractional_part + COEFF_P5_E)
* fractional_part + COEFF_P5_F);
__m256 factor = _mm256_add_ps(_mm256_mul_ps(_mm256_add_ps( _mm256_mul_ps(_mm256_add_ps(
_mm256_mul_ps(_mm256_add_ps( _mm256_mul_ps(_mm256_add_ps(_mm256_mul_ps(
COEFF_P5_A, fractional_part), COEFF_P5_B), fractional_part), COEFF_P5_C),fractional_part),
COEFF_P5_D), fractional_part), COEFF_P5_E),fractional_part), COEFF_P5_F);
x -= factor;
x = _mm256_sub_ps(x,factor);
__m256i castedInteger = _mm256_cvtps_epi32(COEFF_A * x + COEFF_B);
__m256i castedInteger = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(COEFF_A, x), COEFF_B));
return _mm256_castsi256_ps(castedInteger);
#endif
......@@ -316,15 +314,19 @@ public:
const __m256 COEFF_P5_E = _mm256_set1_ps(float(InaFastExp::GetCoefficient3_1()));
const __m256 COEFF_P5_F = _mm256_set1_ps(float(InaFastExp::GetCoefficient3_0()));
__m256 x = vec * COEFF_LOG2E;
__m256 x = _mm256_mul_ps(vec, COEFF_LOG2E);
const __m256 fractional_part = x - InaVecAVX(x).floor().vec;
const __m256 fractional_part = _mm256_sub_ps(x, InaVecAVX(x).floor().vec);
__m256 factor = ((COEFF_P5_D * fractional_part + COEFF_P5_E) * fractional_part + COEFF_P5_F);
__m256 factor = _mm256_add_ps(_mm256_mul_ps(
_mm256_add_ps(_mm256_mul_ps(
COEFF_P5_D, fractional_part),
COEFF_P5_E), fractional_part),
COEFF_P5_F);
x -= factor;
x = _mm256_sub_ps(x,factor);
__m256i castedInteger = _mm256_cvtps_epi32(COEFF_A * x + COEFF_B);
__m256i castedInteger = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(COEFF_A, x), COEFF_B));
return _mm256_castsi256_ps(castedInteger);
}
......@@ -550,7 +552,7 @@ public:
}
inline InaVecAVX<float> pow(size_t power) const{
return InaUtils::FastPow<InaVecAVX<float>>(vec, power);
return InaUtils::FastPow<InaVecAVX<float>>(*this, power);
}
};
......
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAAVX2OPERATORS_HPP
#define INAAVX2OPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_AVX2
#error InaAVX2Operators is included but AVX2 is not enable in the configuration
#endif
#ifdef INASTEMP_USE_AVX2_OPERATORS
#include "AVX/InaAVXOperators.hpp"
#endif
#endif
......@@ -7,7 +7,6 @@
#include "InastempConfig.h"
#include "AVX/InaVecAVXDouble.hpp"
#include "InaAVX2Operators.hpp"
#ifndef INASTEMP_USE_AVX2
#error InaVecAVX2<double> is included but AVX2 is not enable in the configuration
......
......@@ -7,7 +7,6 @@
#include "InastempConfig.h"
#include "AVX/InaVecAVXFloat.hpp"
#include "InaAVX2Operators.hpp"
#ifndef INASTEMP_USE_AVX2
#error InaVecAVX2<float> is included but AVX2 is not enable in the configuration
......
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAAVX512COMMONOPERATORS_HPP
#define INAAVX512COMMONOPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_AVX512COMMON
#error InaAVX512COMMONOperators is included but AVX512COMMON is not enable in the configuration
#endif
#include <immintrin.h>
#include <cmath>
#ifdef INASTEMP_USE_AVX512COMMON_OPERATORS
//Side effect operators DOUBLE
inline __m512d& operator+=(__m512d& a, const __m512d& b) {
return (a = _mm512_add_pd(a, b));
}
inline __m512d& operator-=(__m512d& a, const __m512d& b) {
return (a = _mm512_sub_pd(a, b));
}
inline __m512d& operator*=(__m512d& a, const __m512d& b) {
return (a = _mm512_mul_pd(a, b));
}
inline __m512d& operator/=(__m512d& a, const __m512d& b) {
return (a = _mm512_div_pd(a, b));
}
//No side effect operators DOUBLE
inline __m512d operator+(const __m512d& a, const __m512d& b) {
return _mm512_add_pd(a, b);
}
inline __m512d operator-(const __m512d& a, const __m512d& b) {
return _mm512_sub_pd(a, b);
}
inline __m512d operator*(const __m512d& v1, const __m512d& v2) {
return _mm512_mul_pd(v1, v2);
}
inline __m512d operator/(const __m512d& v1, const __m512d& v2) {
return _mm512_div_pd(v1, v2);
}
//Side effect operators SINGLE
inline __m512& operator+=(__m512& a, const __m512& b) {
return (a = _mm512_add_ps(a, b));
}
inline __m512& operator-=(__m512& a, const __m512& b) {
return (a = _mm512_sub_ps(a, b));
}
inline __m512& operator*=(__m512& a, const __m512& b) {
return (a = _mm512_mul_ps(a, b));
}
inline __m512& operator/=(__m512& a, const __m512& b) {
return (a = _mm512_div_ps(a, b));
}
//No side effect operators SINGLE
inline __m512 operator+(const __m512& a, const __m512& b) {
return _mm512_add_ps(a, b);
}
inline __m512 operator-(const __m512& a, const __m512& b) {
return _mm512_sub_ps(a, b);
}
inline __m512 operator*(const __m512& v1, const __m512& v2) {
return _mm512_mul_ps(v1, v2);
}
inline __m512 operator/(const __m512& v1, const __m512& v2) {
return _mm512_div_ps(v1, v2);
}
#endif
#endif
......@@ -6,7 +6,6 @@
#define INAVECAVX512COMMONDOUBLE_HPP
#include "InastempConfig.h"
#include "InaAVX512COMMONOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
......@@ -295,19 +294,23 @@ public:
const __m512d COEFF_P5_E = _mm512_set1_pd(double(InaFastExp::GetCoefficient9_1()));
const __m512d COEFF_P5_F = _mm512_set1_pd(double(InaFastExp::GetCoefficient9_0()));
__m512d x = vec * COEFF_LOG2E;
__m512d x = _mm512_mul_pd(vec, COEFF_LOG2E);
const __m512d fractional_part = x - InaVecAVX512COMMON(x).floor().vec;
const __m512d fractional_part = _mm512_sub_pd(x, InaVecAVX512COMMON(x).floor().vec);
__m512d factor = ((((((((COEFF_P5_X * fractional_part + COEFF_P5_Y)
* fractional_part + COEFF_P5_Z) * fractional_part + COEFF_P5_A)
* fractional_part + COEFF_P5_B) * fractional_part + COEFF_P5_C)
* fractional_part + COEFF_P5_D) * fractional_part + COEFF_P5_E)
* fractional_part + COEFF_P5_F);
__m512d factor = _mm512_add_pd(_mm512_mul_pd(_mm512_add_pd(
_mm512_mul_pd(_mm512_add_pd( _mm512_mul_pd(_mm512_add_pd(
_mm512_mul_pd(_mm512_add_pd( _mm512_mul_pd(_mm512_add_pd(
_mm512_mul_pd(_mm512_add_pd( _mm512_mul_pd(_mm512_add_pd(_mm512_mul_pd(
COEFF_P5_X, fractional_part), COEFF_P5_Y), fractional_part),
COEFF_P5_Z),fractional_part), COEFF_P5_A), fractional_part),
COEFF_P5_B), fractional_part), COEFF_P5_C),fractional_part),
COEFF_P5_D), fractional_part), COEFF_P5_E),fractional_part),
COEFF_P5_F);
x -= factor;
x = _mm512_sub_pd(x,factor);
x = (COEFF_A * x + COEFF_B);
x = _mm512_add_pd(_mm512_mul_pd(COEFF_A, x), COEFF_B);
alignas(64) double allvalreal[VecLength];
_mm512_store_pd(allvalreal, x);
......@@ -329,17 +332,20 @@ public:
const __m512d COEFF_P5_E = _mm512_set1_pd(double(InaFastExp::GetCoefficient4_1()));
const __m512d COEFF_P5_F = _mm512_set1_pd(double(InaFastExp::GetCoefficient4_0()));
__m512d x = vec * COEFF_LOG2E;
__m512d x = _mm512_mul_pd(vec, COEFF_LOG2E);
const __m512d fractional_part = x - InaVecAVX512COMMON(x).floor().vec;
const __m512d fractional_part = _mm512_sub_pd(x, InaVecAVX512COMMON(x).floor().vec);
__m512d factor = (((COEFF_P5_C * fractional_part + COEFF_P5_D)
* fractional_part + COEFF_P5_E)
* fractional_part + COEFF_P5_F);
__m512d factor = _mm512_add_pd(_mm512_mul_pd(_mm512_add_pd(
_mm512_mul_pd(_mm512_add_pd(_mm512_mul_pd(
COEFF_P5_C, fractional_part),
COEFF_P5_D), fractional_part),
COEFF_P5_E), fractional_part),
COEFF_P5_F);
x -= factor;
x = _mm512_sub_pd(x,factor);
x = (COEFF_A * x + COEFF_B);
x = _mm512_add_pd(_mm512_mul_pd(COEFF_A, x), COEFF_B);
alignas(64) double allvalreal[VecLength];
_mm512_store_pd(allvalreal, x);
......@@ -354,7 +360,7 @@ public:
inline InaVecAVX512COMMON rsqrt() const {
// _mm512_rsqrt28_pd(vec) => 1E-10 error
return _mm512_set1_pd(1) / _mm512_sqrt_pd(vec);
return _mm512_div_pd(_mm512_set1_pd(1), _mm512_sqrt_pd(vec));
}
inline InaVecAVX512COMMON abs() const {
......@@ -591,7 +597,7 @@ public:
}
inline InaVecAVX512COMMON<double> pow(size_t power) const{
return InaUtils::FastPow<InaVecAVX512COMMON<double>>(vec, power);
return InaUtils::FastPow<InaVecAVX512COMMON<double>>(*this, power);
}
};
......
......@@ -6,7 +6,6 @@
#define INAVECAVX512COMMONFLOAT_HPP
#include "InastempConfig.h"
#include "InaAVX512COMMONOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
......@@ -317,19 +316,18 @@ public:
const __m512 COEFF_P5_E = _mm512_set1_ps(float(InaFastExp::GetCoefficient6_1()));
const __m512 COEFF_P5_F = _mm512_set1_ps(float(InaFastExp::GetCoefficient6_0()));
__m512 x = vec * COEFF_LOG2E;
__m512 x = _mm512_mul_ps(vec, COEFF_LOG2E);
const __m512 fractional_part =