Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ca6615d1 authored by ANDRADE-BARROSO Guillermo's avatar ANDRADE-BARROSO Guillermo
Browse files

Merge branch 'master' of gitlab.inria.fr:sed-rennes/formations/tutorials_parallel_computing

parents 2029ee7c 0c56c143
No related branches found
No related tags found
No related merge requests found
import numpy
import ctypes
from time import time
import inline
sizeX = 1000000
numberIterations =1000
X = numpy.random.rand(sizeX).astype(numpy.float32)
Y = numpy.empty(sizeX).astype(numpy.float32)
def BenchmarkCode(name, code,X,Y):
# init
Y[:]=0.5
X[:]=1.0
# compile the code
lib=inline.cxx(code, compiler_extra_args=['-march=native','-fopenmp'], link_extra_args= ['-march=native','-fopenmp'])
p_float= numpy.ctypeslib.ndpointer(dtype=numpy.float32)
lib.compute.argtypes = [ctypes.c_int, ctypes.c_int, p_float, p_float]
# start chronometer
start_time = time()
# run the code
lib.compute(numberIterations, sizeX, X, Y)
# stop chronometer
stop_time = time()
execution_time= stop_time - start_time
print("execution time for "+name+" code = "+ str(execution_time))
return execution_time
# C++ reference code
referenceCode="""
#line 33 "saxpy.py" // helpful for debug
extern "C" {
void saxpy(int n, float alpha, float *X, float *Y)
{
int i;
for (i=0; i<n; i++)
Y[i] += alpha * X[i];
}
void compute(int numberIterations, int sizeX, float *X, float *Y )
{
for(int j=0; j< numberIterations;j++)
saxpy(sizeX, 0.001f, X, Y);
return ;
}
}
"""
referenceTime=BenchmarkCode('Reference', referenceCode,X,Y)
ASIMDCode="""
#line 58 "saxpy.py" // helpful for debug
extern "C" {
#include <arm_neon.h>
void saxpy(int n, float alpha, float *X, float *Y)
{
int i;
for (i=0; i<n; i++)
Y[i] += alpha * X[i];
}
void compute(int numberIterations, int sizeX, float *X, float *Y )
{
for(int j=0; j< numberIterations;j++)
saxpy(sizeX, 0.001f, X, Y);
return ;
}
}
"""
ASIMDTime=BenchmarkCode('ASIMD', ASIMDCode,X,Y)
print("speed up for ASIMD = " + str(referenceTime/ASIMDTime))
...@@ -24,7 +24,7 @@ size_data=100 ...@@ -24,7 +24,7 @@ size_data=100
# number of threads in a block # number of threads in a block
block_size=32 block_size=32
# number of blocks of thread # number of blocks of thread
num_blocks=(size_data+block_size-1)/block_size num_blocks=(size_data+block_size-1)//block_size
source = numpy.arange(0,size_data,1).astype(numpy.float32) source = numpy.arange(0,size_data,1).astype(numpy.float32)
destination = numpy.zeros_like(source) destination = numpy.zeros_like(source)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment