Commit 26185121 authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

memset: display also reached memory bandwidth

parent d0e4106e
......@@ -22,6 +22,7 @@
#define PRINTF_RANK0(msg, ...) do { if (rank == 0) { printf(msg, ## __VA_ARGS__); fflush(stdout); } } while (0)
#define COMP_TIME_FORMAT "%6.1f"
#define COMP_BW_FORMAT "%8.1f"
enum bench_type {
......
......@@ -32,7 +32,10 @@ static void print_stream_result_lines(char bench_to_run[4], double results[4][3]
{
if (bench_to_run[i])
{
printf("# %s\t%.3f\t%.3f\t%.3f\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", stream_labels[i], results[i][0], results[i][1], results[i][2], times[i][0]*1000, times[i][1]*1000, times[i][2]*1000);
printf("# %s\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
stream_labels[i],
results[i][0], results[i][1], results[i][2],
times[i][0]*1000, times[i][1]*1000, times[i][2]*1000);
}
}
}
......
......@@ -8,6 +8,7 @@
#include "timing.h"
#define MEMSET_TYPE int
#define NB_ITER 3
static MEMSET_TYPE **a;
static int array_size;
......@@ -17,6 +18,7 @@ extern struct params_s params;
char memset_bench_done[] = {0, 0, 0};
// All in microseconds:
double memset_perfs_warmup[] = {FLT_MAX, 0, 0};
double memset_perfs_no_comm[] = {FLT_MAX, 0, 0};
double memset_perfs_comm[] = {FLT_MAX, 0, 0};
......@@ -34,20 +36,31 @@ static int memset_get_nb_runs(enum comm_bench_type comm_bench_type)
return 20;
}
static inline double time_to_bw(double t) // microsec
{
return (sizeof(MEMSET_TYPE) * array_size * nb_threads) / (t / NB_ITER); // MB/s
}
static void memset_print_results()
{
printf("# memset results: Time ms (min, avg, max)\n");
printf("# memset results: Bandwidth MB/s (max, avg, min) Time ms (min, avg, max)\n");
if (memset_bench_done[WARMUP])
{
printf("# warmup "COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", memset_perfs_warmup[0] / 1000.0f, memset_perfs_warmup[1] / 1000.0f, memset_perfs_warmup[2] / 1000.0f);
printf("# warmup "COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
time_to_bw(memset_perfs_warmup[0]), time_to_bw(memset_perfs_warmup[1]), time_to_bw(memset_perfs_warmup[2]),
memset_perfs_warmup[0] / 1000.0f, memset_perfs_warmup[1] / 1000.0f, memset_perfs_warmup[2] / 1000.0f);
}
if (memset_bench_done[WITH_COMM])
{
printf("# with communications "COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", memset_perfs_comm[0] / 1000.0f, memset_perfs_comm[1] / 1000.0f, memset_perfs_comm[2] / 1000.0f);
printf("# with communications "COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
time_to_bw(memset_perfs_comm[0]), time_to_bw(memset_perfs_comm[1]), time_to_bw(memset_perfs_comm[2]),
memset_perfs_comm[0] / 1000.0f, memset_perfs_comm[1] / 1000.0f, memset_perfs_comm[2] / 1000.0f);
}
if (memset_bench_done[WITHOUT_COMM])
{
printf("# without communications "COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", memset_perfs_no_comm[0] / 1000.0f, memset_perfs_no_comm[1] / 1000.0f, memset_perfs_no_comm[2] / 1000.0f);
printf("# without communications "COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
time_to_bw(memset_perfs_no_comm[0]), time_to_bw(memset_perfs_no_comm[1]), time_to_bw(memset_perfs_no_comm[2]),
memset_perfs_no_comm[0] / 1000.0f, memset_perfs_no_comm[1] / 1000.0f, memset_perfs_no_comm[2] / 1000.0f);
}
}
......@@ -83,7 +96,7 @@ static double memset_run_kernel()
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
{
for (int j = 1; j <= 3; j++)
for (int j = 1; j <= NB_ITER; j++)
{
memset(a[i], scalar*j, array_size*sizeof(MEMSET_TYPE));
}
......@@ -91,7 +104,6 @@ static double memset_run_kernel()
PUK_GET_TICK(end_time);
return PUK_TIMING_DELAY(start_time, end_time);
}
static int memset_run(int nb_runs, enum bench_type bench_type)
......
......@@ -13,7 +13,7 @@
#endif
#define PUK_GET_TICK(t) clock_gettime(CLOCK_TYPE, &(t))
#define PUK_TIMING_DELAY(start, end) puk_ticks2delay(&(start), &(end))
#define PUK_TIMING_DELAY(start, end) puk_ticks2delay(&(start), &(end)) // microseconds
typedef struct timespec puk_tick_t;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment