Commit 19fae031 authored by Bramas, Berenger (bbramas)'s avatar Bramas, Berenger (bbramas)

Merge branch 'examples-perfmodel' into 'master'

Examples perfmodel



See merge request !5
parents 81884818 deddc33c
...@@ -150,6 +150,7 @@ foreach(TYPE ${ALL_TYPES_REVERSE}) ...@@ -150,6 +150,7 @@ foreach(TYPE ${ALL_TYPES_REVERSE})
if($ENV{VERBOSE}) if($ENV{VERBOSE})
MESSAGE(STATUS "Main -- The compiler cannot compile ${TYPE} intrinsics") MESSAGE(STATUS "Main -- The compiler cannot compile ${TYPE} intrinsics")
endif() endif()
set(INASTEMP_USE_${TYPE} FALSE)
endif() endif()
endforeach() endforeach()
# The original order should be used as slow-to-fast types # The original order should be used as slow-to-fast types
......
...@@ -314,6 +314,9 @@ void compareExpTime(const size_t NbOverLoop, const size_t NbExp){ ...@@ -314,6 +314,9 @@ void compareExpTime(const size_t NbOverLoop, const size_t NbExp){
timer.stop(); timer.stop();
std::cout << "Scalar for " << NbExp * NbOverLoop std::cout << "Scalar for " << NbExp * NbOverLoop
<< " exp took " << timer.getElapsed() << "s (" << timer.getElapsed()/double(NbExp * NbOverLoop) << "s per exp)\n"; << " exp took " << timer.getElapsed() << "s (" << timer.getElapsed()/double(NbExp * NbOverLoop) << "s per exp)\n";
// Ensure that optimization compute for real
volatile RealType tmp;
tmp = resScalar[0];
} }
std::cout << "\n"; std::cout << "\n";
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
...@@ -408,8 +411,8 @@ void compareExpTime(const size_t NbOverLoop, const size_t NbExp){ ...@@ -408,8 +411,8 @@ void compareExpTime(const size_t NbOverLoop, const size_t NbExp){
int main(int /*argc*/, char* /*argv*/ []) { int main(int /*argc*/, char* /*argv*/ []) {
std::cout << "[INFO] This program runs the computation of exp() using scalar, intrinsic vectors or inastemp vectors. \n"; std::cout << "[INFO] This program runs the computation of exp() using scalar, intrinsic vectors or inastemp vectors. \n";
const size_t NbOverLoop = 5; const size_t NbOverLoop = 7;
const size_t NbExp = 1024000; const size_t NbExp = 10240000;
std::cout << "[INFO] It will compute " << NbExp << " consecutive exp, and store them in an array. \n"; std::cout << "[INFO] It will compute " << NbExp << " consecutive exp, and store them in an array. \n";
std::cout << "[INFO] This process will be done " << NbOverLoop << " times. \n"; std::cout << "[INFO] This process will be done " << NbOverLoop << " times. \n";
......
...@@ -620,7 +620,7 @@ void ScalarGemmIna(const RealType* __restrict__ A, const RealType* __restrict__ ...@@ -620,7 +620,7 @@ void ScalarGemmIna(const RealType* __restrict__ A, const RealType* __restrict__
for(size_t idxRow = 0 ; idxRow < BlockSize ; ++idxRow){ for(size_t idxRow = 0 ; idxRow < BlockSize ; ++idxRow){
for(size_t idxCol = 0 ; idxCol < BlockSize ; ++idxCol){ for(size_t idxCol = 0 ; idxCol < BlockSize ; ++idxCol){
VecType sum = 0; VecType sum = 0.;
for(size_t idxK = 0 ; idxK < PanelSizeK ; idxK += BlockSize){ for(size_t idxK = 0 ; idxK < PanelSizeK ; idxK += BlockSize){
sum += VecType(&panelA[(idxRow+ib)*PanelSizeK+ idxK]) sum += VecType(&panelA[(idxRow+ib)*PanelSizeK+ idxK])
* VecType(&panelB[idxCol*PanelSizeK+ idxK]); * VecType(&panelB[idxCol*PanelSizeK+ idxK]);
......
###########################################################################
# Inastemp - Berenger Bramas MPCDF - 2016
# Under MIT Licence, please you must read the LICENCE file.
###########################################################################
project(EXAMPLES_PERFMODEL_INASTEMP C CXX)
ADD_DEFINITIONS(${INASTEMP_CXX_FLAGS})
OPTION( INASTEMP_BUILD_PATTERNS "Set to ON to build pattern examples" ON )
if($ENV{VERBOSE})
MESSAGE(STATUS "Examples -- PERFMODEL")
endif()
# Find all code files
file(
GLOB_RECURSE
source_tests_files
./*.cpp
)
# Adding the project sources dir as an include dir
INCLUDE_DIRECTORIES(
${INASTEMP_BINARY_DIR}/Src
${INASTEMP_SOURCE_DIR}/Src
./
)
# Add execs - 1 cpp = 1 exec
foreach(exec ${source_tests_files})
get_filename_component(
execname ${exec}
NAME_WE
)
set(execname "perfmodel-${execname}")
if($ENV{VERBOSE})
MESSAGE(STATUS "Examples -- PERFMODEL ${execname}")
endif()
add_executable(
${execname}
${exec}
)
target_link_libraries(
${execname}
${INASTEMP_LIBRARIES}
)
endforeach(exec)
///////////////////////////////////////////////////////////////////////////
// Inastemp - Berenger Bramas MPCDF - 2016
// Under MIT Licence, please you must read the LICENCE file.
///////////////////////////////////////////////////////////////////////////
#include "InastempConfig.h"
#include "SCALAR/InaVecSCALARDouble.hpp"
#include "SCALAR/InaVecSCALARFloat.hpp"
#include "Common/InaTimer.hpp"
#include <cassert>
#include <iostream>
#include <memory>
#include <fstream>
#ifdef INASTEMP_USE_SSE3
#include "SSE3/InaVecSSE3Double.hpp"
#include "SSE3/InaVecSSE3Float.hpp"
#endif
#ifdef INASTEMP_USE_AVX
#include "AVX/InaVecAVXDouble.hpp"
#include "AVX/InaVecAVXFloat.hpp"
#include <immintrin.h>
#endif
#ifdef INASTEMP_USE_AVX512KNL
#include "AVX512KNL/InaVecAVX512KNLDouble.hpp"
#include "AVX512KNL/InaVecAVX512KNLFloat.hpp"
#endif
size_t cpt = 0;
template < class VecType >
VecType DummyPower(VecType inVal, const int inPow){
VecType res = 1;
for( int idx = 0 ; idx < inPow ; ++idx) {
res *= inVal;
cpt++;
}
return res;
}
template < class VecType, class MaskType, class RealType >
double KernelAll(const RealType inVal, const MaskType msk, const int pow1, const int pow2,
const int pow3, const size_t nbLoops, VecType& res){
InaTimer timer;
VecType vec = inVal;
for(size_t idxLoop = 0 ; idxLoop < nbLoops ; ++idxLoop){
vec += DummyPower(inVal, pow1);
vec += VecType::IfElse(msk, DummyPower(inVal, pow2), DummyPower(inVal, pow3));
}
res += vec;
timer.stop();
return timer.getElapsed();
}
template < class VecType, class MaskType, class RealType >
double KernelIfTrue(const RealType inVal, const MaskType msk, const int pow1, const int pow2,
const int pow3, const size_t nbLoops, VecType& res){
InaTimer timer;
VecType vec = inVal;
for(size_t idxLoop = 0 ; idxLoop < nbLoops ; ++idxLoop){
vec += DummyPower(inVal, pow1);
if( msk.isAllTrue() ){
vec += DummyPower(inVal, pow2);
}
else{
vec += VecType::IfElse(msk, DummyPower(inVal, pow2), DummyPower(inVal, pow3));
}
}
res += vec;
timer.stop();
return timer.getElapsed();
}
template < class VecType, class RealType >
int test(const size_t nbLoops, std::ofstream& myfile){
std::cout << "Test " << VecType::GetName() << " in double" << std::endl;
VecType res = 0;
for(int idxSizeTrue = 0 ; idxSizeTrue <= VecType::VecLength ; ++idxSizeTrue){
RealType mskValues[VecType::VecLength];
for(int idx = 0 ; idx < VecType::VecLength ; ++idx){
mskValues[idx] = (idx < idxSizeTrue ? 0. : 1.);
}
typename VecType::MaskType msk = VecType(mskValues).isZeroMask();
for(int power2 = 1 ; power2 < (1 << 8 ) ; power2 *= 2){
for(int power1p = 0 ; power1p <= 100 ; power1p += 25){
const int power1 = (power2*power1p)/100;
for(int power3p = 0 ; power3p <= 100 ; power3p += 25){
const int power3 = (power2*power3p)/100;
{
const double duration = KernelAll<VecType, typename VecType::MaskType, RealType>(
RealType(1), msk, power1, power2, power3, nbLoops, res);
const size_t Flops = nbLoops * size_t(VecType::VecLength * (2 + power1 + power2 + power3));
const size_t EffFlops = nbLoops * size_t(VecType::VecLength * (2 + power1) + power2*idxSizeTrue + power3*(VecType::VecLength-idxSizeTrue));
const double gflops = (double(Flops)/duration)/1E9;
const double effgflops = (double(EffFlops)/duration)/1E9;
std::cout << "[ALL] power 1 " << power1 << " power 2 " << power2 << " power 3 " << power3
<< " duration " << duration
<< " GFlops " << gflops
<< " Effective-GFlops " << effgflops << std::endl;
myfile << "\"all\","<< power1<<","<<power2<<","<<power3<<","<<duration<<","<<gflops<<","<<effgflops<<"\n";
}
{
const double duration = KernelIfTrue<VecType, typename VecType::MaskType, RealType>(
RealType(1), msk, power1, power2, power3, nbLoops, res);
const size_t Flops = (msk.isAllTrue()?
nbLoops * size_t(VecType::VecLength * (2 + power1 + power2)) :
nbLoops * size_t(VecType::VecLength * (2 + power1 + power2 + power3)));
const size_t EffFlops = nbLoops * size_t(VecType::VecLength * (2 + power1) + power2*idxSizeTrue + power3*(VecType::VecLength-idxSizeTrue));
const double gflops = (double(Flops)/duration)/1E9;
const double effgflops = (double(EffFlops)/duration)/1E9;
std::cout << "[IFT] power 1 " << power1 << " power 2 " << power2 << " power 3 " << power3
<< " duration " << duration
<< " GFlops " << gflops
<< " Effective-GFlops " << effgflops << std::endl;
myfile << "\"iftrue\","<< power1<<","<<power2<<","<<power3<<","<<duration<<","<<gflops<<","<<effgflops<<"\n";
}
}
}
}
}
return int(res.horizontalSum());
}
int main() {
std::ofstream myfile;
myfile.open ("res.csv");
myfile << "mode,power1,power2,power3,duration,gflops,effgflops\n";
volatile size_t nbLoops = 100000000;
int res = 0;
res += test<InaVecSCALAR<double>, double>(nbLoops, myfile);
#ifdef INASTEMP_USE_SSE3
res += test<InaVecSSE3<double>, double>(nbLoops, myfile);
#endif
#ifdef INASTEMP_USE_AVX
res += test<InaVecAVX<double>, double>(nbLoops, myfile);
#endif
#ifdef INASTEMP_USE_AVX512KNL
res += test<InaVecAVX512KNL<double>, double>(nbLoops, myfile);
#endif
return res + int(cpt);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment