Commit 9ca2c04c authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

Also display bandwidth and binding in per-thread performances

parent de290cd1
......@@ -84,6 +84,7 @@ void ping_pong_init(int _rank, enum pingpong_type_e _pingpong_type, int _buffer_
void ping_pong_free();
void get_worker_binding(hwloc_topology_t topo, char* binding_str[64]);
void get_worker_binding_ids(hwloc_topology_t topo, int nb_threads, int* thread_binding);
void hwloc_display_binding();
void bind_thread(struct machine_s machine, int thread_pu, char* name);
void fill_machine(struct machine_s* machine);
......
......@@ -35,6 +35,18 @@ int get_nb_openmp_threads()
}
void get_worker_binding_ids(hwloc_topology_t topo, int nb_threads, int* thread_binding)
{
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < nb_threads; i++)
{
int core_os = sched_getcpu();
int logical_core = hwloc_get_pu_obj_by_os_index(topo, core_os)->logical_index;
thread_binding[i] = logical_core;
}
}
void get_worker_binding(hwloc_topology_t topo, char* binding_str[64])
{
#pragma omp parallel
......
......@@ -50,30 +50,43 @@ static inline double time_to_bw(double t) // microsec
return (sizeof(MEMSET_TYPE) * array_size * nb_threads) / (t / NB_ITER); // MB/s
}
static inline double time_to_bw_per_thread(double t) // microsec
{
return (sizeof(MEMSET_TYPE) * array_size) / (t / NB_ITER); // MB/s
}
static void memset_print_results()
{
if (params.per_thread_perf)
{
int* thread_binding = malloc(nb_threads * sizeof(int));
memset(thread_binding, 0, nb_threads * sizeof(int));
get_worker_binding_ids(machine.topology, nb_threads, thread_binding);
if (memset_bench_done[WITH_COMM])
{
printf("# memset results per thread: with comm (time in ms (min, avg, max))\n");
printf("# memset results per thread (with comm): Bandwidth MB/s (max, avg, min) Time ms (min, avg, max)\n");
for (int i = 0; i < nb_threads; i++)
{
printf("# Thread #%d\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", i,
printf("Thread #%d\t%d\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
i, thread_binding[i],
time_to_bw_per_thread(memset_perfs_per_thread_comm[i*3]), time_to_bw_per_thread(memset_perfs_per_thread_comm[i*3+1]), time_to_bw_per_thread(memset_perfs_per_thread_comm[i*3+2]),
memset_perfs_per_thread_comm[i*3] / 1000.0f, memset_perfs_per_thread_comm[i*3+1] / 1000.0f, memset_perfs_per_thread_comm[i*3+2] / 1000.0f);
}
}
if (memset_bench_done[WITHOUT_COMM])
{
printf("# memset results per thread: without comm (time in ms (min, avg, max))\n");
printf("# memset results per thread (without comm): Bandwidth MB/s (max, avg, min) Time ms (min, avg, max)\n");
for (int i = 0; i < nb_threads; i++)
{
printf("# Thread #%d\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n", i,
printf("Thread #%d\t%d\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_BW_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\t"COMP_TIME_FORMAT"\n",
i, thread_binding[i],
time_to_bw_per_thread(memset_perfs_per_thread_no_comm[i*3]), time_to_bw_per_thread(memset_perfs_per_thread_no_comm[i*3+1]), time_to_bw_per_thread(memset_perfs_per_thread_no_comm[i*3+2]),
memset_perfs_per_thread_no_comm[i*3] / 1000.0f, memset_perfs_per_thread_no_comm[i*3+1] / 1000.0f, memset_perfs_per_thread_no_comm[i*3+2] / 1000.0f);
}
}
free(thread_binding);
}
printf("# memset results: Bandwidth MB/s (max, avg, min) Time ms (min, avg, max)\n");
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment