Commit f036181d authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

Add weak version of cursor for OpenMP and factorize code

parent 7d5644fb
......@@ -36,9 +36,11 @@ openmp_sources = \
openmp_stream.c \
openmp_prime.c \
openmp_cursor.c \
openmp_cursor_weak.c \
openmp_scalar.c \
openmp_stream_weak.c \
openmp_memset.c \
helper_openmp_weak.c \
malloc.c \
timing.c \
$(common_sources)
......
......@@ -19,11 +19,12 @@
#elif defined(_OPENMP)
#include "scalar.h"
#include "stream_weak.h"
#include "cursor_weak.h"
#ifdef HAVE_SIMD_AVX
#include "scalar_avx.h"
#define NB_KERNELS 7
#define NB_KERNELS 8
#else
#define NB_KERNELS 6
#define NB_KERNELS 7
#endif
#endif
......@@ -55,8 +56,9 @@ void init_params()
computing_kernels[3] = stream_weak_get_functions();
computing_kernels[4] = scalar_get_functions();
computing_kernels[5] = memset_get_functions();
computing_kernels[6] = cursor_weak_get_functions();
#ifdef HAVE_SIMD_AVX
computing_kernels[6] = scalar_avx_get_functions();
computing_kernels[7] = scalar_avx_get_functions();
#endif
#endif
}
......
#ifndef __CURSOR_WEAK_H
#define __CURSOR_WEAK_H
#include "helper.h"
int cursor_weak_run(int nb_runs, enum bench_type bench_type);
struct computing_functions cursor_weak_get_functions();
#endif // __CURSOR_WEAK_H
......@@ -25,6 +25,7 @@
#define COMP_TIME_FORMAT "%6.1f"
#define COMP_BW_FORMAT "%8.1f"
#define ARRAY_TYPE double
enum bench_type {
WARMUP = 0,
......
......@@ -3,8 +3,6 @@
#include "helper.h"
#define ARRAY_TYPE double
extern char cursor_bench_done[3];
extern double cursor_perfs_warmup[3];
......
#include "helper_openmp_weak.h"
ARRAY_TYPE **weak_a, **weak_b, **weak_c;
int weak_array_size;
int nb_threads;
extern struct machine_s machine;
int weak_init()
{
nb_threads = get_worker_count();
weak_array_size = 2 * machine.l3_size / sizeof(ARRAY_TYPE);
printf("# Each thread will work on an array of %d items (%ld KB)\n", weak_array_size, weak_array_size*sizeof(ARRAY_TYPE)/1024);
weak_a = malloc(nb_threads*sizeof(ARRAY_TYPE*));
weak_b = malloc(nb_threads*sizeof(ARRAY_TYPE*));
weak_c = malloc(nb_threads*sizeof(ARRAY_TYPE*));
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
{
weak_a[i] = comp_malloc(weak_array_size*sizeof(ARRAY_TYPE));
weak_b[i] = comp_malloc(weak_array_size*sizeof(ARRAY_TYPE));
weak_c[i] = comp_malloc(weak_array_size*sizeof(ARRAY_TYPE));
for (int j = 0; j < weak_array_size; j++)
{
weak_a[i][j] = 1.0;
weak_b[i][j] = 2.7;
weak_c[i][j] = 0.0;
}
}
return 0;
}
void weak_release()
{
for (int i = 0; i < nb_threads; i++)
{
comp_free(weak_a[i], weak_array_size*sizeof(ARRAY_TYPE));
comp_free(weak_b[i], weak_array_size*sizeof(ARRAY_TYPE));
comp_free(weak_c[i], weak_array_size*sizeof(ARRAY_TYPE));
}
free(weak_a);
free(weak_b);
free(weak_c);
}
#ifndef __HELPER_OPENMP_WEAK_H
#define __HELPER_OPENMP_WEAK_H
#include "helper.h"
extern ARRAY_TYPE **weak_a, **weak_b, **weak_c;
extern int weak_array_size;
extern int nb_threads;
int weak_init();
void weak_release();
#endif /* __HELPER_OPENMP_WEAK_H */
......@@ -3,8 +3,6 @@
#include "helper.h"
#define STREAM_TYPE double
// arrays indexed first by stream_bench_type enum, and then {max, avg, min}:
extern double stream_perfs_warmup[4][3];
extern double stream_perfs_no_comm[4][3];
......
#include <float.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include "timing.h"
#include "helper_openmp_weak.h"
#include "cursor.h"
#include "helper_cursor.h"
extern ARRAY_TYPE **weak_a, **weak_b, **weak_c;
extern int weak_array_size;
extern int nb_threads;
static double cursor_weak_run_kernel()
{
puk_tick_t start_time, end_time;
PUK_GET_TICK(start_time);
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
{
for (int j = 0; j < weak_array_size; j++)
{
for (int k = 0; k < cursor + 1; k++)
{
weak_b[i][j] += (weak_a[i][j] + k * weak_c[i][j]);
}
}
}
PUK_GET_TICK(end_time);
return PUK_TIMING_DELAY(start_time, end_time);
}
static int cursor_weak_run(int nb_runs, enum bench_type bench_type)
{
if (bench_type != WARMUP && cursor_bench_done[bench_type])
{
printf("Warning: this bench was already done.\n");
}
double avgtime = 0, maxtime = 0, mintime = FLT_MAX;
int real_nb_runs = nb_runs;
double duration;
for (int k = 0; k < nb_runs; k++)
{
duration = cursor_weak_run_kernel();
mintime = MIN(mintime, duration);
avgtime += duration;
maxtime = MAX(maxtime, duration);
}
if (bench_type == WITH_COMM && cursor_comm_bench_ended != NULL)
{
/* Keep computing while we need more pingpongs: */
while (!*cursor_comm_bench_ended)
{
duration = cursor_weak_run_kernel();
mintime = MIN(mintime, duration);
avgtime += duration;
maxtime = MAX(maxtime, duration);
real_nb_runs++;
}
}
if (cursor_bench_done[bench_type])
{
return 0;
}
if (bench_type == WARMUP)
{
cursor_perfs_warmup[0] = mintime;
cursor_perfs_warmup[1] = avgtime / (double) (real_nb_runs);
cursor_perfs_warmup[2] = maxtime;
}
else if (bench_type == WITH_COMM)
{
cursor_perfs_comm[0] = mintime;
cursor_perfs_comm[1] = avgtime / (double) (real_nb_runs);
cursor_perfs_comm[2] = maxtime;
}
else if (bench_type == WITHOUT_COMM)
{
cursor_perfs_no_comm[0] = mintime;
cursor_perfs_no_comm[1] = avgtime / (double) (real_nb_runs);
cursor_perfs_no_comm[2] = maxtime;
}
else
{
abort();
}
cursor_bench_done[bench_type] = 1;
return 0;
}
struct computing_functions cursor_weak_get_functions()
{
struct computing_functions s = {
.init = &weak_init,
.run = &cursor_weak_run,
.print_results = &cursor_print_results,
.print_params = &cursor_print_params,
.release = &weak_release,
.parse_arg = &cursor_parse_arg,
.man = &cursor_man,
.get_nb_runs = &cursor_get_nb_runs,
.set_comm_bench_ended_ptr = &cursor_set_comm_bench_ended_ptr,
.name = "cursor_weak"
};
return s;
}
......@@ -12,9 +12,9 @@
#ifdef ALLOC_STATIC
static STREAM_TYPE a[STREAM_ARRAY_SIZE], b[STREAM_ARRAY_SIZE], c[STREAM_ARRAY_SIZE];
static ARRAY_TYPE a[STREAM_ARRAY_SIZE], b[STREAM_ARRAY_SIZE], c[STREAM_ARRAY_SIZE];
#else
static STREAM_TYPE *a, *b, *c;
static ARRAY_TYPE *a, *b, *c;
#endif
int stream_init()
......@@ -22,9 +22,9 @@ int stream_init()
ssize_t j;
#ifndef ALLOC_STATIC
a = comp_malloc(STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
b = comp_malloc(STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
c = comp_malloc(STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
a = comp_malloc(STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
b = comp_malloc(STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
c = comp_malloc(STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
#endif
#pragma omp parallel for
......@@ -41,9 +41,9 @@ int stream_init()
void stream_release()
{
#ifndef ALLOC_STATIC
comp_free(a, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
comp_free(b, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
comp_free(c, STREAM_ARRAY_SIZE * sizeof(STREAM_TYPE));
comp_free(a, STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
comp_free(b, STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
comp_free(c, STREAM_ARRAY_SIZE * sizeof(ARRAY_TYPE));
#endif
}
......@@ -51,7 +51,7 @@ static void stream_run_kernel(double avgtime[4], double maxtime[4], double minti
{
puk_tick_t start_time, end_time;
double durations[4];
STREAM_TYPE scalar = 3.0;
ARRAY_TYPE scalar = 3.0;
/* COPY */
if (bench_to_run[COPY])
......@@ -119,10 +119,10 @@ int stream_run(int nb_runs, enum bench_type bench_type)
int real_nb_runs = nb_runs;
double avgtime[4] = {0}, maxtime[4] = {0}, mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
double bytes[4] = {
2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
2 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE,
3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE
2 * sizeof(ARRAY_TYPE) * STREAM_ARRAY_SIZE,
2 * sizeof(ARRAY_TYPE) * STREAM_ARRAY_SIZE,
3 * sizeof(ARRAY_TYPE) * STREAM_ARRAY_SIZE,
3 * sizeof(ARRAY_TYPE) * STREAM_ARRAY_SIZE
};
for (int k = 0; k < nb_runs; k++)
......
......@@ -4,51 +4,21 @@
#include <string.h>
#include <omp.h>
#include "stream.h"
#include "timing.h"
#include "stream.h"
#include "helper_openmp_weak.h"
#include "helper_stream.h"
#include "hwloc.h"
static STREAM_TYPE **a, **b, **c;
static int array_size;
static int nb_threads;
extern struct machine_s machine;
static int stream_weak_init()
{
nb_threads = get_nb_openmp_threads();
array_size = 2 * machine.l3_size / sizeof(STREAM_TYPE);
printf("# Each thread will work on an array of %d items (%ld KB)\n", array_size, array_size*sizeof(STREAM_TYPE)/1024);
a = malloc(nb_threads*sizeof(STREAM_TYPE*));
b = malloc(nb_threads*sizeof(STREAM_TYPE*));
c = malloc(nb_threads*sizeof(STREAM_TYPE*));
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
{
a[i] = comp_malloc(array_size*sizeof(STREAM_TYPE));
b[i] = comp_malloc(array_size*sizeof(STREAM_TYPE));
c[i] = comp_malloc(array_size*sizeof(STREAM_TYPE));
for (int j = 0; j < array_size; j++)
{
a[i][j] = 1.0;
b[i][j] = 2.0;
c[i][j] = 0.0;
}
}
extern ARRAY_TYPE **weak_a, **weak_b, **weak_c;
extern int weak_array_size;
extern int nb_threads;
return 0;
}
static void stream_weak_run_kernel(double avgtime[4], double maxtime[4], double mintime[4])
{
puk_tick_t start_time, end_time;
double durations[4];
STREAM_TYPE scalar = 3.0;
ARRAY_TYPE scalar = 3.0;
/* COPY */
if (bench_to_run[COPY])
......@@ -56,8 +26,8 @@ static void stream_weak_run_kernel(double avgtime[4], double maxtime[4], double
PUK_GET_TICK(start_time);
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
for (int j = 0; j < array_size; j++)
c[i][j] = a[i][j];
for (int j = 0; j < weak_array_size; j++)
weak_c[i][j] = weak_a[i][j];
PUK_GET_TICK(end_time);
durations[COPY] = PUK_TIMING_DELAY(start_time, end_time) / 1000000.0f;
}
......@@ -68,8 +38,8 @@ static void stream_weak_run_kernel(double avgtime[4], double maxtime[4], double
PUK_GET_TICK(start_time);
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
for (int j = 0; j < array_size; j++)
b[i][j] = scalar*c[i][j];
for (int j = 0; j < weak_array_size; j++)
weak_b[i][j] = scalar*weak_c[i][j];
PUK_GET_TICK(end_time);
durations[SCALE] = PUK_TIMING_DELAY(start_time, end_time) / 1000000.0f;
}
......@@ -80,8 +50,8 @@ static void stream_weak_run_kernel(double avgtime[4], double maxtime[4], double
PUK_GET_TICK(start_time);
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
for (int j = 0; j < array_size; j++)
c[i][j] = a[i][j]+b[i][j];
for (int j = 0; j < weak_array_size; j++)
weak_c[i][j] = weak_a[i][j]+weak_b[i][j];
PUK_GET_TICK(end_time);
durations[ADD] = PUK_TIMING_DELAY(start_time, end_time) / 1000000.0f;
}
......@@ -92,8 +62,8 @@ static void stream_weak_run_kernel(double avgtime[4], double maxtime[4], double
PUK_GET_TICK(start_time);
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
for (int j = 0; j < array_size; j++)
a[i][j] = b[i][j]+scalar*c[i][j];
for (int j = 0; j < weak_array_size; j++)
weak_a[i][j] = weak_b[i][j]+scalar*weak_c[i][j];
PUK_GET_TICK(end_time);
durations[TRIAD] = PUK_TIMING_DELAY(start_time, end_time) / 1000000.0f;
}
......@@ -119,10 +89,10 @@ static int stream_weak_run(int nb_runs, enum bench_type bench_type)
int real_nb_runs = nb_runs;
double avgtime[4] = {0}, maxtime[4] = {0}, mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
double bytes[4] = {
2 * sizeof(STREAM_TYPE) * array_size * nb_threads,
2 * sizeof(STREAM_TYPE) * array_size * nb_threads,
3 * sizeof(STREAM_TYPE) * array_size * nb_threads,
3 * sizeof(STREAM_TYPE) * array_size * nb_threads
2 * sizeof(ARRAY_TYPE) * weak_array_size * nb_threads,
2 * sizeof(ARRAY_TYPE) * weak_array_size * nb_threads,
3 * sizeof(ARRAY_TYPE) * weak_array_size * nb_threads,
3 * sizeof(ARRAY_TYPE) * weak_array_size * nb_threads
};
for (int k = 0; k < nb_runs; k++)
......@@ -193,28 +163,15 @@ static int stream_weak_run(int nb_runs, enum bench_type bench_type)
return 0;
}
static void stream_weak_release()
{
for (int i = 0; i < nb_threads; i++)
{
comp_free(a[i], array_size*sizeof(STREAM_TYPE));
comp_free(b[i], array_size*sizeof(STREAM_TYPE));
comp_free(c[i], array_size*sizeof(STREAM_TYPE));
}
free(a);
free(b);
free(c);
}
struct computing_functions stream_weak_get_functions()
{
struct computing_functions s = {
.init = &stream_weak_init,
.init = &weak_init,
.run = &stream_weak_run,
.print_results = &stream_print_results,
.print_params = &stream_print_params,
.release = &stream_weak_release,
.release = &weak_release,
.parse_arg = &stream_parse_arg,
.man = &stream_man,
.get_nb_runs = &stream_get_nb_runs,
......
......@@ -11,7 +11,7 @@
extern struct machine_s machine;
static unsigned nb_workers;
static STREAM_TYPE *a, *b, *c;
static ARRAY_TYPE *a, *b, *c;
static starpu_data_handle_t handle_a, handle_b, handle_c;
/* We provide a default value, but the value will be set according to the
......@@ -22,9 +22,9 @@ static unsigned stream_array_size = 235929600; // * 8 Bytes (double) =~ 1800 MB
static void init_task(void *buffers[], void *cl_arg)
{
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
STREAM_TYPE* a = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
STREAM_TYPE* b = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
STREAM_TYPE* c = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
ARRAY_TYPE* a = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
ARRAY_TYPE* b = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
ARRAY_TYPE* c = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
for (unsigned i = 0; i < n; i++)
{
......@@ -37,8 +37,8 @@ static void init_task(void *buffers[], void *cl_arg)
static void copy_task(void *buffers[], void *cl_arg)
{
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
STREAM_TYPE* a = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
STREAM_TYPE* c = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
ARRAY_TYPE* a = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
ARRAY_TYPE* c = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
for (unsigned i = 0; i < n; i++)
{
......@@ -48,10 +48,10 @@ static void copy_task(void *buffers[], void *cl_arg)
static void scale_task(void *buffers[], void *cl_arg)
{
STREAM_TYPE scalar = 3.0;
ARRAY_TYPE scalar = 3.0;
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
STREAM_TYPE* b = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
STREAM_TYPE* c = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
ARRAY_TYPE* b = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
ARRAY_TYPE* c = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
for (unsigned i = 0; i < n; i++)
{
......@@ -62,9 +62,9 @@ static void scale_task(void *buffers[], void *cl_arg)
static void add_task(void *buffers[], void *cl_arg)
{
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
STREAM_TYPE* a = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
STREAM_TYPE* b = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
STREAM_TYPE* c = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
ARRAY_TYPE* a = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
ARRAY_TYPE* b = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
ARRAY_TYPE* c = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
for (unsigned i = 0; i < n; i++)
{
......@@ -74,11 +74,11 @@ static void add_task(void *buffers[], void *cl_arg)
static void triad_task(void *buffers[], void *cl_arg)
{
STREAM_TYPE scalar = 3.0;
ARRAY_TYPE scalar = 3.0;
unsigned n = STARPU_VECTOR_GET_NX(buffers[0]);
STREAM_TYPE* a = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
STREAM_TYPE* b = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
STREAM_TYPE* c = (STREAM_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
ARRAY_TYPE* a = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[0]);
ARRAY_TYPE* b = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[1]);
ARRAY_TYPE* c = (ARRAY_TYPE*) STARPU_VECTOR_GET_PTR(buffers[2]);
for (unsigned i = 0; i < n; i++)
{
......@@ -143,13 +143,13 @@ int stream_init()
stream_array_size = machine.l3_size * 2 * nb_workers;
starpu_malloc((void**) &a, stream_array_size * sizeof(STREAM_TYPE));
starpu_malloc((void**) &b, stream_array_size * sizeof(STREAM_TYPE));
starpu_malloc((void**) &c, stream_array_size * sizeof(STREAM_TYPE));
starpu_malloc((void**) &a, stream_array_size * sizeof(ARRAY_TYPE));
starpu_malloc((void**) &b, stream_array_size * sizeof(ARRAY_TYPE));
starpu_malloc((void**) &c, stream_array_size * sizeof(ARRAY_TYPE));
starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) a, stream_array_size, sizeof(STREAM_TYPE));
starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) b, stream_array_size, sizeof(STREAM_TYPE));
starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) c, stream_array_size, sizeof(STREAM_TYPE));
starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) a, stream_array_size, sizeof(ARRAY_TYPE));
starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) b, stream_array_size, sizeof(ARRAY_TYPE));
starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) c, stream_array_size, sizeof(ARRAY_TYPE));
struct starpu_data_filter f = {
.filter_func = starpu_vector_filter_block,
......@@ -343,10 +343,10 @@ int stream_run(int nb_runs, enum bench_type bench_type)
int real_nb_runs = nb_runs;
double avgtime[4] = {0}, maxtime[4] = {0}, mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
double bytes[4] = {
2 * sizeof(STREAM_TYPE) * stream_array_size,
2 * sizeof(STREAM_TYPE) * stream_array_size,
3 * sizeof(STREAM_TYPE) * stream_array_size,
3 * sizeof(STREAM_TYPE) * stream_array_size
2 * sizeof(ARRAY_TYPE) * stream_array_size,
2 * sizeof(ARRAY_TYPE) * stream_array_size,
3 * sizeof(ARRAY_TYPE) * stream_array_size,
3 * sizeof(ARRAY_TYPE) * stream_array_size
};
for (int k = 0; k < nb_runs; k++)
......@@ -414,9 +414,9 @@ void stream_release()
starpu_data_unregister(handle_b);
starpu_data_unregister(handle_c);
starpu_free_noflag(a, stream_array_size * sizeof(STREAM_TYPE));
starpu_free_noflag(b, stream_array_size * sizeof(STREAM_TYPE));
starpu_free_noflag(c, stream_array_size * sizeof(STREAM_TYPE));
starpu_free_noflag(a, stream_array_size * sizeof(ARRAY_TYPE));
starpu_free_noflag(b, stream_array_size * sizeof(ARRAY_TYPE));
starpu_free_noflag(c, stream_array_size * sizeof(ARRAY_TYPE));
}
struct computing_functions stream_get_functions()
......
......@@ -3,10 +3,7 @@
#include "helper.h"
int stream_weak_init();
int stream_weak_run(int nb_runs, enum bench_type bench_type);
void stream_weak_release();
void stream_weak_print_results();
struct computing_functions stream_weak_get_functions();
#endif // __STREAM_WEAK_H
Supports Markdown
0% or .