Commit d9fb7b54 authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

Add non-temporal stores to the memset bench

parent 5f0ec60f
......@@ -87,6 +87,9 @@ if test "$available_avx" != "none"; then
AC_DEFINE(HAVE_SIMD_AVX, [1], [Whether some AVX set is available])
fi
# for non-temporal memset:
AC_CHECK_DECLS([_mm_stream_si32], [], [], [[#include <x86intrin.h>]])
# Compiler
AC_PROG_CC
......
......@@ -4,6 +4,12 @@
#include <string.h>
#include <omp.h>
#include "config.h"
#if defined(HAVE_DECL__MM_STREAM_SI32)
#include <x86intrin.h>
#endif
#include "cli.h"
#include "timing.h"
......@@ -13,6 +19,7 @@
static MEMSET_TYPE **a;
static int array_size;
static int nb_threads;
static int use_non_temporal = 0;
extern struct machine_s machine;
extern struct params_s params;
......@@ -144,12 +151,30 @@ static double memset_run_kernel(enum bench_type bench_type)
{
puk_tick_t thread_start_time, thread_end_time;
PUK_GET_TICK(thread_start_time);
for (int j = 1; j <= NB_ITER; j++)
#if defined(HAVE_DECL__MM_STREAM_SI32)
if (use_non_temporal)
{
memset(a[i], scalar*j, array_size*sizeof(MEMSET_TYPE));
PUK_GET_TICK(thread_start_time);
for (int j = 1; j <= NB_ITER; j++)
{
int value = scalar * j;
for (int k = 0; k < array_size; k++)
{
_mm_stream_si32(a[i]+k, value);
}
}
PUK_GET_TICK(thread_end_time);
}
else
#endif
{
PUK_GET_TICK(thread_start_time);
for (int j = 1; j <= NB_ITER; j++)
{
memset(a[i], scalar*j, array_size*sizeof(MEMSET_TYPE));
}
PUK_GET_TICK(thread_end_time);
}
PUK_GET_TICK(thread_end_time);
if (params.per_thread_perf)
{
......@@ -263,16 +288,42 @@ static void memset_release()
free(memset_perfs_per_thread_comm);
}
static void memset_man()
{
#if defined(HAVE_DECL__MM_STREAM_SI32)
printf("Memset-related options:\n");
printf("\t--nt\tuse non-temporal stores to bypass the LLC\n");
#endif
}
static void memset_print_params()
{
printf("# Will %suse non-temporal memset\n", use_non_temporal ? "" : "not ");
}
static int memset_parse_arg(char* arg)
{
#if defined(HAVE_DECL__MM_STREAM_SI32)
if (strcmp(arg, "--nt") == 0)
{
use_non_temporal = 1;
return 1;
}
#endif
return 0;
}
struct computing_functions memset_get_functions()
{
struct computing_functions s = {
.init = &memset_init,
.run = &memset_run,
.print_results = &memset_print_results,
.print_params = NULL,
.print_params = &memset_print_params,
.release = &memset_release,
.parse_arg = NULL,
.man = NULL,
.parse_arg = &memset_parse_arg,
.man = &memset_man,
.get_nb_runs = &memset_get_nb_runs,
.set_comm_bench_ended_ptr = &memset_set_comm_bench_ended_ptr,
.name = "memset"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment