Commit 1bf235b2 authored by Berenger Bramas's avatar Berenger Bramas

Add skylake support -- create dependencies between types - add AVX 512 COMMON...

Add skylake support -- create dependencies between types - add AVX 512 COMMON for knl and skl - add skl files
parent c3b60fef
......@@ -81,20 +81,35 @@ include(GetCompilerInfos)
GetCompilerInfos()
# All types from worse to best (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512KNL")
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
set(ALL_USED_TYPES "SCALAR")
set(INASTEMP_USE_SCALAR ON)
set(INASTEMP_CXX_FLAGS "-std=c++11")
# Set custom cpu <=> vec rules (maybe ADD-NEW-HERE if needed)
set(AVX512KNL_CPU_RULES "AVX512ER;AVX512PF;AVX512ER")
foreach(TYPE ${ALL_TYPES})
set(AVX512COMMON_CPU_RULES "AVX512F;AVX512ER")
set(AVX512KNL_CPU_RULES "AVX512F;AVX512ER;AVX512PF")
set(AVX512SKL_CPU_RULES "AVX512F;AVX512ER;AVX512VL;AVX512BW;AVX512DQ")
# Dependencies between types (maybe ADD-NEW-HERE if needed)
set(SSSE3_DEP "SSE3")
set(SSE41_DEP "SSSE3")
set(SSE42_DEP "SSE41")
set(AVX2_DEP "AVX")
set(AVX512KNL_DEP "AVX512COMMON")
set(AVX512SKL_DEP "AVX512COMMON")
# Enforce rules
set(ALL_TYPES_REVERSE ${ALL_TYPES})
list(REVERSE ALL_TYPES_REVERSE)
foreach(TYPE ${ALL_TYPES_REVERSE})
if( COMPILER_INFO_${TYPE} )
if(NOT DEFINED ${TYPE}_CPU_RULES)
set( ${TYPE}_CPU_RULES "${TYPE}")
endif()
# Value by default is based on CPU capacities
set(${TYPE}_SUPPORTED_BY_CPU TRUE)
foreach(RULE ${${TYPE}_CPU_RULES})
if(NOT DEFINED CPU_INFO_${RULE})
......@@ -107,7 +122,7 @@ foreach(TYPE ${ALL_TYPES})
endif()
endforeach()
OPTION( INASTEMP_USE_${TYPE} "Set to ON to compile with ${TYPE} support (default based on CPU capacities)" ${${TYPE}_SUPPORTED_BY_CPU} )
OPTION( INASTEMP_USE_${TYPE} "Set to ON to compile with ${TYPE} support (default based on CPU capacities)" ${${TYPE}_SUPPORTED_BY_CPU} )
if(${INASTEMP_USE_${TYPE}})
set(INASTEMP_CXX_FLAGS "${INASTEMP_CXX_FLAGS} ${${TYPE}_FLAGS}")
......@@ -118,6 +133,19 @@ foreach(TYPE ${ALL_TYPES})
else()
set(INASTEMP_USE_${TYPE}_OPERATORS OFF)
endif()
# We turn it on if it is needed by another type
foreach(DEPTYPE ${${TYPE}_DEP})
if( NOT COMPILER_INFO_${TYPE} )
MESSAGE(ERROR "Main -- Type ${TYPE} depends on type ${DEPTYPE} which is not supported by the compiler.")
endif()
if( NOT INASTEMP_USE_${DEPTYPE} )
if($ENV{VERBOSE})
MESSAGE(STATUS "Main -- ${DEPTYPE} is turn on because it is needed by ${TYPE}")
endif()
set(INASTEMP_USE_${DEPTYPE} ON)
endif()
endforeach()
endif()
else()
if($ENV{VERBOSE})
......
#include "immintrin.h"
int main() {
__m512d tx, ty ;
tx += ty ;
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
__m512d res0d, res1d;
res0d = _mm512_add_pd(res0d, res1d);
__m512 res0, res1;
res0 = _mm512_add_ps(res0, res1);
// er
{
__m512d src;
__mmask8 k;
__m512d a;
_mm512_mask_rcp28_pd ( src, k, a);
}
// cd
{
__mmask8 k;
__m512i tmp = _mm512_broadcastmb_epi64 (k);
}
return 0;
}
......@@ -15,6 +15,25 @@ int main(){
__m512 res0, res1;
res0 = _mm512_add_ps(res0, res1);
// For knl only
// pf
{
int base_addr;
__m512i vindex;
_mm512_prefetch_i64scatter_ps (&base_addr, vindex, 1, _MM_HINT_T0);
}
// er
{
__m512d src;
__mmask8 k;
__m512d a;
_mm512_mask_rcp28_pd ( src, k, a);
}
// cd
{
__mmask8 k;
__m512i tmp = _mm512_broadcastmb_epi64 (k);
}
return 0;
}
#include "immintrin.h"
int main() {
__m512d tx, ty ;
tx += ty ;
return 0;
}
#include <x86intrin.h>
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4
#include <immintrin.h> // AVX
int main(){
__m512d res0d, res1d;
res0d = _mm512_add_pd(res0d, res1d);
__m512 res0, res1;
res0 = _mm512_add_ps(res0, res1);
// For skl only
// er
{
__m512d src;
__mmask8 k;
__m512d a;
_mm512_mask_rcp28_pd ( src, k, a);
}
// cd
{
__mmask8 k;
__m512i tmp = _mm512_broadcastmb_epi64 (k);
}
// vl
{
__m128i a;
__m128i tmp = _mm_abs_epi64 (a);
}
// bw
{
__m512i a;
__m512i b;
__m512i tmp = _mm512_add_epi8 ( a, b);
}
// vq
{
__m128d a;
__m512d tmp = _mm512_broadcast_f64x2 (a);
}
return 0;
}
......@@ -79,7 +79,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mAVX ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
else() # INTEL LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
......@@ -87,7 +89,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-march=core-avx-i ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
endif()
else()
if(APPLE) # GCC APPLE
......@@ -97,7 +101,9 @@ else()
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
else() # GCC LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
......@@ -105,12 +111,14 @@ else()
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
endif(APPLE)
endif()
# (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512KNL")
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
if($ENV{VERBOSE})
foreach(TYPE ${ALL_TYPES})
......
......@@ -49,7 +49,7 @@ Template C++ source-code Compiled Template C++ code
## Features of Inastemp
- The following x86 SIMD types are currently supported:
- SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512-KNL
- SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512-KNL, AVX512-SKL
- arithmetic operators `*/+-` are provided
- CPU capacities are detected automatically during the CMake stage
- The compiler capacities are detected automatically during the CMake stage
......@@ -96,13 +96,14 @@ For example, here is a part of the output of the `ccmake ..` command on a AVX2 C
INASTEMP_USE_AVX ON
INASTEMP_USE_AVX2 ON
INASTEMP_USE_AVX512KNL OFF
INASTEMP_USE_AVX512SKL OFF
INASTEMP_USE_SSE3 ON
INASTEMP_USE_SSE41 ON
INASTEMP_USE_SSE42 ON
INASTEMP_USE_SSSE3 ON
```
`AVX512KNL` is supported by the compiler but not by the hardware, so it is turned `OFF` but could be turn to `ON` if needed.
`AVX512KNL` and `AVX512SKL` are supported by the compiler but not by the hardware, so they are turned `OFF` but could be turn to `ON` if needed.
By turning the cmake variable `INASTEMP_ISDE_CPU` to `ON` the hardware detection is done over intel SDE.
In this case, one can ask Inastemp to check any hardware (passing the appropriate options to isde).
......@@ -138,7 +139,7 @@ Such that, if one wants to compile only some files with these specific flags, it
### Compilers support
Inastemp was developed and tested using the following compilers on the x86_64 architecture.
- Gcc 6.1 (earlier versions if AVX512/KNL is not used, like 4.9)
- Gcc 6.1 (earlier versions if AVX512/KNL/SKL are not used, like 4.9)
- Clang 3.5
- Intel 16.0
Earlier versions may work as well.
......
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAAVX512COMMONOPERATORS_HPP
#define INAAVX512COMMONOPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_AVX512COMMON
#error InaAVX512COMMONOperators is included but AVX512COMMON is not enable in the configuration
#endif
#include <immintrin.h>
#include <cmath>
#ifdef INASTEMP_USE_AVX512COMMON_OPERATORS
//Side effect operators DOUBLE
inline __m512d& operator+=(__m512d& a, const __m512d& b) {
return (a = _mm512_add_pd(a, b));
}
inline __m512d& operator-=(__m512d& a, const __m512d& b) {
return (a = _mm512_sub_pd(a, b));
}
inline __m512d& operator*=(__m512d& a, const __m512d& b) {
return (a = _mm512_mul_pd(a, b));
}
inline __m512d& operator/=(__m512d& a, const __m512d& b) {
return (a = _mm512_div_pd(a, b));
}
//No side effect operators DOUBLE
inline __m512d operator+(const __m512d& a, const __m512d& b) {
return _mm512_add_pd(a, b);
}
inline __m512d operator-(const __m512d& a, const __m512d& b) {
return _mm512_sub_pd(a, b);
}
inline __m512d operator*(const __m512d& v1, const __m512d& v2) {
return _mm512_mul_pd(v1, v2);
}
inline __m512d operator/(const __m512d& v1, const __m512d& v2) {
return _mm512_div_pd(v1, v2);
}
//Side effect operators SINGLE
inline __m512& operator+=(__m512& a, const __m512& b) {
return (a = _mm512_add_ps(a, b));
}
inline __m512& operator-=(__m512& a, const __m512& b) {
return (a = _mm512_sub_ps(a, b));
}
inline __m512& operator*=(__m512& a, const __m512& b) {
return (a = _mm512_mul_ps(a, b));
}
inline __m512& operator/=(__m512& a, const __m512& b) {
return (a = _mm512_div_ps(a, b));
}
//No side effect operators SINGLE
inline __m512 operator+(const __m512& a, const __m512& b) {
return _mm512_add_ps(a, b);
}
inline __m512 operator-(const __m512& a, const __m512& b) {
return _mm512_sub_ps(a, b);
}
inline __m512 operator*(const __m512& v1, const __m512& v2) {
return _mm512_mul_ps(v1, v2);
}
inline __m512 operator/(const __m512& v1, const __m512& v2) {
return _mm512_div_ps(v1, v2);
}
#endif
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -16,73 +16,7 @@
#ifdef INASTEMP_USE_AVX512KNL_OPERATORS
//Side effect operators DOUBLE
inline __m512d& operator+=(__m512d& a, const __m512d& b) {
return (a = _mm512_add_pd(a, b));
}
inline __m512d& operator-=(__m512d& a, const __m512d& b) {
return (a = _mm512_sub_pd(a, b));
}
inline __m512d& operator*=(__m512d& a, const __m512d& b) {
return (a = _mm512_mul_pd(a, b));
}
inline __m512d& operator/=(__m512d& a, const __m512d& b) {
return (a = _mm512_div_pd(a, b));
}
//No side effect operators DOUBLE
inline __m512d operator+(const __m512d& a, const __m512d& b) {
return _mm512_add_pd(a, b);
}
inline __m512d operator-(const __m512d& a, const __m512d& b) {
return _mm512_sub_pd(a, b);
}
inline __m512d operator*(const __m512d& v1, const __m512d& v2) {
return _mm512_mul_pd(v1, v2);
}
inline __m512d operator/(const __m512d& v1, const __m512d& v2) {
return _mm512_div_pd(v1, v2);
}
//Side effect operators SINGLE
inline __m512& operator+=(__m512& a, const __m512& b) {
return (a = _mm512_add_ps(a, b));
}
inline __m512& operator-=(__m512& a, const __m512& b) {
return (a = _mm512_sub_ps(a, b));
}
inline __m512& operator*=(__m512& a, const __m512& b) {
return (a = _mm512_mul_ps(a, b));
}
inline __m512& operator/=(__m512& a, const __m512& b) {
return (a = _mm512_div_ps(a, b));
}
//No side effect operators SINGLE
inline __m512 operator+(const __m512& a, const __m512& b) {
return _mm512_add_ps(a, b);
}
inline __m512 operator-(const __m512& a, const __m512& b) {
return _mm512_sub_ps(a, b);
}
inline __m512 operator*(const __m512& v1, const __m512& v2) {
return _mm512_mul_ps(v1, v2);
}
inline __m512 operator/(const __m512& v1, const __m512& v2) {
return _mm512_div_ps(v1, v2);
}
#include "AVX512COMMON/InaVecAVX512COMMONOperators.hpp"
#endif
......
This diff is collapsed.
This diff is collapsed.
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAAVX512SKLOPERATORS_HPP
#define INAAVX512SKLOPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_AVX512SKL
#error InaAVX512SKLOperators is included but AVX512SKL is not enable in the configuration
#endif
#include <immintrin.h>
#include <cmath>
#ifdef INASTEMP_USE_AVX512SKL_OPERATORS
#include "AVX512COMMON/InaVecAVX512COMMONOperators.hpp"
#endif
#endif
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAVECAVX512SKLDOUBLE_HPP
#define INAVECAVX512SKLDOUBLE_HPP
#include "InastempConfig.h"
#include "InaAVX512SKLOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
#include "AVX512COMMON/InaVecAVX512COMMONDouble.hpp"
#ifndef INASTEMP_USE_AVX512SKL
#error InaVecAVX512SKL<double> is included but AVX512SKL is not enable in the configuration
#endif
#include "Common/InaFastExp.hpp"
#include <immintrin.h>
#include <cmath>
// Forward declarations
template <class RealType>
using InaVecMaskAVX512SKL = InaVecMaskAVX512COMMON<RealType>;
template <class RealType>
using InaVecAVX512SKL = InaVecAVX512COMMON<RealType>;
#endif
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAVECAVX512SKLFLOAT_HPP
#define INAVECAVX512SKLFLOAT_HPP
#include "InastempConfig.h"
#include "InaAVX512SKLOperators.hpp"
#include "Common/InaIfElse.hpp"
#include "Common/InaUtils.hpp"
#include "AVX512COMMON/InaVecAVX512COMMONFloat.hpp"
#ifndef INASTEMP_USE_AVX512SKL
#error InaVecAVX512SKL512SKL<float> is included but AVX512SKL is not enable in the configuration
#endif
#include "Common/InaFastExp.hpp"
#include <immintrin.h>
#include <cmath>
// Forward declarations
template <class RealType>
using InaVecMaskAVX512SKL = InaVecMaskAVX512COMMON<RealType>;
template <class RealType>
using InaVecAVX512SKL = InaVecAVX512COMMON<RealType>;
#endif
......@@ -26,9 +26,14 @@
#cmakedefine INASTEMP_USE_AVX2
#cmakedefine INASTEMP_USE_AVX2_OPERATORS
#cmakedefine INASTEMP_USE_AVX512COMMON
#cmakedefine INASTEMP_USE_AVX512COMMON_OPERATORS
#cmakedefine INASTEMP_USE_AVX512KNL
#cmakedefine INASTEMP_USE_AVX512KNL_OPERATORS
#cmakedefine INASTEMP_USE_AVX512SKL
#cmakedefine INASTEMP_USE_AVX512SKL_OPERATORS
// Inform about best one
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment