Commit d31beadc authored by Berenger Bramas's avatar Berenger Bramas

Add altivec -- first version which compile using Gcc (but has not been tested yet)

parent 917a20d5
......@@ -72,32 +72,53 @@ set(INASTEMP_VERSION "${INASTEMP_MAJOR_VERSION}.${INASTEMP_MINOR_VERSION}.${INA
#===========================================================================
# Options
#===========================================================================
# Ask CPU capacities
include(GetCpuInfos)
GetCpuInfos()
# Ask compiler capacities
include(GetCompilerInfos)
GetCompilerInfos()
# All types from worse to best (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
set(INASTEMP_USE_SCALAR ON)
set(INASTEMP_CXX_FLAGS "-std=c++11")
# Set custom cpu <=> vec rules (maybe ADD-NEW-HERE if needed)
set(AVX512COMMON_CPU_RULES "AVX512F;AVX512ER")
set(AVX512KNL_CPU_RULES "AVX512F;AVX512ER;AVX512PF")
set(AVX512SKL_CPU_RULES "AVX512F;AVX512ER;AVX512VL;AVX512BW;AVX512DQ")
# Dependencies between types (maybe ADD-NEW-HERE if needed)
set(SSSE3_DEP "SSE3")
set(SSE41_DEP "SSSE3")
set(SSE42_DEP "SSE41")
set(AVX2_DEP "AVX")
set(AVX512KNL_DEP "AVX512COMMON")
set(AVX512SKL_DEP "AVX512COMMON")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
if($ENV{VERBOSE})
MESSAGE(STATUS "Main -- compile for ppc64le architecture")
endif()
# Ask compiler capacities
include(GetCompilerInfos)
GetCompilerInfos()
# All types from worse to best (ADD-NEW-HERE)
set(ALL_TYPES "ALTIVEC")
set(ALTIVEC_CPU_RULES "")
set(ALL_USED_TYPES "SCALAR")
set(INASTEMP_USE_SCALAR ON)
set(INASTEMP_CXX_FLAGS "-std=c++11")
else()
if($ENV{VERBOSE})
MESSAGE(STATUS "Main -- compile for x86 architecture")
endif()
# Ask CPU capacities
include(GetCpuInfos)
GetCpuInfos()
# Ask compiler capacities
include(GetCompilerInfos)
GetCompilerInfos()
# All types from worse to best (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
set(INASTEMP_USE_SCALAR ON)
set(INASTEMP_CXX_FLAGS "-std=c++11")
# Set custom cpu <=> vec rules (maybe ADD-NEW-HERE if needed)
set(AVX512COMMON_CPU_RULES "AVX512F;AVX512ER")
set(AVX512KNL_CPU_RULES "AVX512F;AVX512ER;AVX512PF")
set(AVX512SKL_CPU_RULES "AVX512F;AVX512ER;AVX512VL;AVX512BW;AVX512DQ")
# Dependencies between types (maybe ADD-NEW-HERE if needed)
set(SSSE3_DEP "SSE3")
set(SSE41_DEP "SSSE3")
set(SSE42_DEP "SSE41")
set(AVX2_DEP "AVX")
set(AVX512KNL_DEP "AVX512COMMON")
set(AVX512SKL_DEP "AVX512COMMON")
endif()
# Enforce rules
set(ALL_TYPES_REVERSE ${ALL_TYPES})
......
#include <altivec.h>
int main() {
__vector double tx;
__vector double ty;
tx = ty + tx;
tx = ty * tx;
tx = ty - tx;
tx = ty / tx;
tx += ty;
tx -= ty;
tx *= ty;
tx /= ty;
return 0;
}
#include <altivec.h>
int main(){
{
__vector double res0d;
__vector double res1d;
__vector double res2d = vec_add(res0d, res1d);
res2d = vec_abs (res0d);
res2d = vec_rsqrt (res0d);
}
{
__vector float res0;
__vector float res1;
__vector float res2 = vec_add(res0, res1);
res2 = vec_abs (res0);
res2 = vec_rsqrt (res0);
}
return 0;
}
......@@ -44,58 +44,70 @@ endmacro(GetCompilerInfosCore)
###########################################################################################
macro(GetCompilerInfos)
SET( ARCH_NATIVE_FLAG "-march=native" CACHE STRING "Additional flag for the compiler capacities detection" )
# (ADD-NEW-HERE for each compilers)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(APPLE) # INTEL APPLE
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mAVX ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
else() # INTEL LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-march=core-avx-i ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
# POWERPC
SET( ARCH_NATIVE_FLAG "-mcpu=native" CACHE STRING "Additional flag for the compiler capacities detection such as -mcpu=power8 for example" )
if(CMAKE_CXX_COMPILER_ID STREQUAL "XL" OR CMAKE_CXX_COMPILER_ID STREQUAL "VisualAge" OR CMAKE_CXX_COMPILER_ID STREQUAL "zOS")
set(ALTIVEC_FLAGS "-mcpu=pwr8 -qaltivec ${ARCH_NATIVE_FLAG}")
else()
set(ALTIVEC_FLAGS "-maltivec -mabi=altivec -mvsx ${ARCH_NATIVE_FLAG}")
endif()
set(ALL_TYPES "ALTIVEC")
else()
if(APPLE) # GCC APPLE
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
else() # GCC LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
endif(APPLE)
endif()
# X86
SET( ARCH_NATIVE_FLAG "-march=native" CACHE STRING "Additional flag for the compiler capacities detection" )
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(APPLE) # INTEL APPLE
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mAVX ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
else() # INTEL LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-march=core-avx-i ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-march=core-avx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-xCOMMON-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-xCOMMON-AVX512 -xMIC-AVX512 ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-xCOMMON-AVX512 -xCORE-AVX512 ${ARCH_NATIVE_FLAG}")
endif()
else()
if(APPLE) # GCC APPLE
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
else() # GCC LINUX
set(SSE3_FLAGS "-msse3 ${ARCH_NATIVE_FLAG}")
set(SSSE3_FLAGS "-mssse3 ${ARCH_NATIVE_FLAG}")
set(SSE41_FLAGS "-msse4 -msse4.1 ${ARCH_NATIVE_FLAG}")
set(SSE42_FLAGS "-msse4 -msse4.2 ${ARCH_NATIVE_FLAG}")
set(AVX_FLAGS "-mavx ${ARCH_NATIVE_FLAG}")
set(AVX2_FLAGS "-mavx2 ${ARCH_NATIVE_FLAG}")
set(AVX512COMMON_FLAGS "-mavx512f -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512KNL_FLAGS "-mavx512f -mavx512pf -mavx512er -mavx512cd ${ARCH_NATIVE_FLAG}")
set(AVX512SKL_FLAGS "-mavx512f -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq ${ARCH_NATIVE_FLAG}")
endif(APPLE)
endif()
# (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
# (ADD-NEW-HERE)
set(ALL_TYPES "SSE3;SSSE3;SSE41;SSE42;AVX;AVX2;AVX512COMMON;AVX512KNL;AVX512SKL")
endif()
if($ENV{VERBOSE})
foreach(TYPE ${ALL_TYPES})
......
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#ifndef INAALTIVECOPERATORS_HPP
#define INAALTIVECOPERATORS_HPP
#include "InastempConfig.h"
#ifndef INASTEMP_USE_ALTIVEC
#error InaALTIVECOperators is included but ALTIVEC is not enable in the configuration
#endif
#include <altivec.h>
#undef bool
#undef vector
#undef pixel
#ifdef INASTEMP_USE_ALTIVEC_OPERATORS
#error Operators for ALTIVEC must exist, we cannot overload this type operators.
#endif
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -26,6 +26,8 @@
#cmakedefine INASTEMP_USE_AVX512SKL
#cmakedefine INASTEMP_USE_ALTIVEC
#cmakedefine INASTEMP_USE_ALTIVEC
// Inform about best one
#define INASTEMP_@INASTEMP_BESTTYPE@_IS_BEST
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment