Merge branch 'master' of gitlab.inria.fr:sed-rennes/formations/tutorials_parallel_computing

ca6615d1 · ANDRADE-BARROSO Guillermo · 2029ee7c · 0c56c143 · ca6615d1 · ca6615d1
Commit ca6615d1 authored 3 years ago by ANDRADE-BARROSO Guillermo
--- a/ASIMD/saxpy_asimd_origin.py
+++ b/ASIMD/saxpy_asimd_origin.py
+import numpy
+import ctypes
+from time import time
+import inline
+sizeX = 1000000
+numberIterations =1000
+X = numpy.random.rand(sizeX).astype(numpy.float32)
+Y = numpy.empty(sizeX).astype(numpy.float32)
+def BenchmarkCode(name, code,X,Y):
+    # init
+    Y[:]=0.5
+    X[:]=1.0
+    # compile the code
+    lib=inline.cxx(code, compiler_extra_args=['-march=native','-fopenmp'], link_extra_args= ['-march=native','-fopenmp']) 
+    p_float= numpy.ctypeslib.ndpointer(dtype=numpy.float32)     
+    lib.compute.argtypes = [ctypes.c_int, ctypes.c_int, p_float, p_float] 
+    # start chronometer
+    start_time = time()
+    # run the code
+    lib.compute(numberIterations, sizeX, X, Y)
+    # stop chronometer             
+    stop_time = time()
+    execution_time= stop_time - start_time
+    print("execution time for "+name+" code = "+ str(execution_time))
+    return execution_time
+# C++ reference code
+referenceCode="""
+#line 33 "saxpy.py" // helpful for debug
+extern "C" {
+void saxpy(int n, float alpha, float *X, float *Y)
+{
+    int i;
+    for (i=0; i<n; i++)
+        Y[i] += alpha * X[i];
+}
+void compute(int numberIterations, int sizeX, float *X, float *Y )
+{
+    for(int j=0; j< numberIterations;j++)
+        saxpy(sizeX, 0.001f, X, Y);
+    return ;
+}
+}
+"""
+referenceTime=BenchmarkCode('Reference', referenceCode,X,Y)
+ASIMDCode="""
+#line 58 "saxpy.py" // helpful for debug
+extern "C" {
+#include <arm_neon.h>
+void saxpy(int n, float alpha, float *X, float *Y)
+{
+    int i;
+    for (i=0; i<n; i++)
+        Y[i] += alpha * X[i];
+}
+void compute(int numberIterations, int sizeX, float *X, float *Y )
+{
+    for(int j=0; j< numberIterations;j++)
+        saxpy(sizeX, 0.001f, X, Y);
+    return ;
+}
+}
+"""
+ASIMDTime=BenchmarkCode('ASIMD', ASIMDCode,X,Y)
+print("speed up for ASIMD = " + str(referenceTime/ASIMDTime))
--- a/CUDA/tp_reverse.py
+++ b/CUDA/tp_reverse.py
@@ -24,7 +24,7 @@ size_data=100
 # number of threads in a block
 block_size=32
 # number of blocks of thread
-num_blocks=(size_data+block_size-1)/block_size
+num_blocks=(size_data+block_size-1)//block_size
 source = numpy.arange(0,size_data,1).astype(numpy.float32)
 destination = numpy.zeros_like(source)