Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Philippe SWARTVAGHER
memory-contention
Commits
316d5061
Commit
316d5061
authored
Jan 08, 2022
by
Philippe SWARTVAGHER
Browse files
Use non-temporal instructions also on ARM processors
parent
48401f44
Changes
2
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
316d5061
...
...
@@ -101,6 +101,18 @@ fi
AC_CHECK_DECLS([_mm_stream_si32], [have_mm_stream_si32=yes], [have_mm_stream_si32=no], [[#include <x86intrin.h>]])
AM_CONDITIONAL(HAVE_MM_STREAM_SI32, test x$have_mm_stream_si32 = xyes)
AC_CHECK_DECLS([vst1q_lane_s32, vdupq_n_s32], [have_arm_nt_store=yes], [have_arm_nt_store=no], [[#include <arm_neon.h>]])
AM_CONDITIONAL(HAVE_ARM_NT_STORE, test x$have_arm_nt_store = xyes)
if test x$have_arm_nt_store = xyes; then
AC_DEFINE([HAVE_ARM_NT_STORE], [1], [Define to 1 if you have non-temporal store instructions on ARM architecture])
fi
have_non_temporal_store=no
if test x$have_mm_stream_si32 = xyes -o x$have_arm_nt_store = xyes; then
AC_DEFINE([HAVE_NON_TEMPORAL_STORE], [1], [Define to 1 if you have non-temporal store instructions])
have_non_temporal_store=yes
fi
# Compiler
AC_PROG_CC
...
...
@@ -165,11 +177,12 @@ AC_OUTPUT
AC_MSG_NOTICE([
Configuration summary:
CPU: $host_cpu
MPI: $have_mpi ($mpicc_path)
likwid: $have_likwid
StarPU: $have_starpu
MKL: $have_mkl
AVX: $available_avx
Python: $install_python_package
CPU: $host_cpu
MPI: $have_mpi ($mpicc_path)
likwid: $have_likwid
StarPU: $have_starpu
MKL: $have_mkl
AVX: $available_avx
Python: $install_python_package
nt store: $have_non_temporal_store
])
src/openmp_memset.c
View file @
316d5061
...
...
@@ -6,8 +6,12 @@
#include "config.h"
#if defined(HAVE_DECL__MM_STREAM_SI32)
#if HAVE_NON_TEMPORAL_STORE
#if HAVE_DECL__MM_STREAM_SI32
#include <x86intrin.h>
#elif HAVE_ARM_NT_STORE
#include <arm_neon.h>
#endif
#endif
#include "cli.h"
...
...
@@ -37,6 +41,18 @@ static double* memset_perfs_per_thread_comm;
volatile
int
*
memset_comm_bench_ended
=
NULL
;
#if HAVE_NON_TEMPORAL_STORE
static
inline
__attribute__
((
always_inline
))
void
non_temporal_store
(
int
*
p
,
int
a
)
{
#if HAVE_DECL__MM_STREAM_SI32
_mm_stream_si32
(
p
,
a
);
#elif HAVE_ARM_NT_STORE
vst1q_lane_s32
((
int32_t
*
)
p
,
vdupq_n_s32
(
a
),
0
);
#endif
}
#endif
static
void
memset_set_comm_bench_ended_ptr
(
volatile
int
*
_comm_bench_ended
)
{
memset_comm_bench_ended
=
_comm_bench_ended
;
...
...
@@ -201,14 +217,14 @@ static double memset_run_kernel(enum bench_type bench_type)
double
last_iter_duration
=
0
;
do
{
#if
defined(HAVE_DECL__MM
_STRE
AM_SI32)
#if
HAVE_NON_TEMPORAL
_ST
O
RE
if
(
use_non_temporal
)
{
PUK_GET_TICK
(
start_iter_time
);
int
value
=
scalar
*
(
nb_iter_per_thread
[
i
]
+
1
);
for
(
int
k
=
0
;
k
<
array_size
;
k
++
)
{
_mm
_stre
am_si32
(
a
[
i
]
+
k
,
value
);
non_temporal
_st
o
re
(
a
[
i
]
+
k
,
value
);
}
PUK_GET_TICK
(
end_iter_time
);
}
...
...
@@ -236,7 +252,7 @@ static double memset_run_kernel(enum bench_type bench_type)
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
nb_threads
;
i
++
)
{
#if
defined(HAVE_DECL__MM
_STRE
AM_SI32)
#if
HAVE_NON_TEMPORAL
_ST
O
RE
if
(
use_non_temporal
)
{
PUK_GET_TICK
(
thread_start_times
[
i
]);
...
...
@@ -245,7 +261,7 @@ static double memset_run_kernel(enum bench_type bench_type)
int
value
=
scalar
*
j
;
for
(
int
k
=
0
;
k
<
array_size
;
k
++
)
{
_mm
_stre
am_si32
(
a
[
i
]
+
k
,
value
);
non_temporal
_st
o
re
(
a
[
i
]
+
k
,
value
);
}
}
PUK_GET_TICK
(
thread_end_times
[
i
]);
...
...
@@ -403,7 +419,7 @@ static void memset_release()
static
void
memset_man
()
{
printf
(
"Memset-related options:
\n
"
);
#if
defined(HAVE_DECL__MM
_STRE
AM_SI32)
#if
HAVE_NON_TEMPORAL
_ST
O
RE
printf
(
"
\t
--nt
\t
use non-temporal stores to bypass the LLC
\n
"
);
#endif
printf
(
"
\t
--throughput
\t
measure memory throughput
\n
"
);
...
...
@@ -417,7 +433,7 @@ static void memset_print_params()
static
int
memset_parse_arg
(
char
*
arg
)
{
#if
defined(HAVE_DECL__MM
_STRE
AM_SI32)
#if
HAVE_NON_TEMPORAL
_ST
O
RE
if
(
strcmp
(
arg
,
"--nt"
)
==
0
)
{
use_non_temporal
=
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment