Commit 55ceaf9c authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

Rather manually allocate memory in a round-robin fashion

Instead of relying on the interleave flag of hwloc.
parent e1d233b0
......@@ -224,7 +224,7 @@ int main(int argc, char* argv[])
#endif
fill_machine(&machine);
init_malloc(machine.topology, params.memory_comp_numa_node_str, params.memory_comm_numa_node);
init_malloc(&machine, params.memory_comp_numa_node_str, params.memory_comm_numa_node);
if (params.display_help)
{
......
......@@ -5,39 +5,50 @@
#endif
static hwloc_topology_t topology;
static hwloc_bitmap_t memory_comp_numa_node_bitmap;
static int memory_comm_numa_node;
void init_malloc(hwloc_topology_t _topology, char* _memory_comp_numa_node_str, int _memory_comm_numa_node)
/* Array describing on which NUMA nodes memory for computations can be allocated: */
static int* numa_nodes_comp = NULL;
/* Number of NUMA nodes on which memory for computations can be allocated: */
static int nb_numa_nodes_comp = 0;
void init_malloc(struct machine_s* machine, char* _memory_comp_numa_node_str, int _memory_comm_numa_node)
{
topology = _topology;
topology = machine->topology;
memory_comm_numa_node = _memory_comm_numa_node;
memory_comp_numa_node_bitmap = hwloc_bitmap_alloc(); // empty when allocated
numa_nodes_comp = malloc(machine->nb_numa * sizeof(int));
const char delim[] = ",";
const char delim[] = "-,";
char* sub = strtok(_memory_comp_numa_node_str, delim);
for (; sub != NULL; sub = strtok(NULL, delim))
{
int logical_index = atoi(sub);
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, logical_index);
if (obj == NULL)
if (nb_numa_nodes_comp >= machine->nb_numa)
{
fprintf(stderr, "Cannot find NUMA node %d\n", logical_index);
fprintf(stderr, "There is no such number of NUMA nodes (%d) !\n", machine->nb_numa);
abort();
}
hwloc_bitmap_or(memory_comp_numa_node_bitmap, memory_comp_numa_node_bitmap, obj->nodeset);
numa_nodes_comp[nb_numa_nodes_comp++] = atoi(sub);
}
}
void release_malloc()
{
hwloc_bitmap_free(memory_comp_numa_node_bitmap);
free(numa_nodes_comp);
}
int get_numa_node_comp_for_id(int id)
{
if (id >= nb_numa_nodes_comp)
{
fprintf(stderr, "This id (%d) is superior to the number of provided NUMA nodes in the options (%d) !\n", id, nb_numa_nodes_comp);
abort();
}
return numa_nodes_comp[id];
}
static inline void* data_malloc(size_t size, int numa_node)
void* data_malloc(size_t size, int numa_node)
{
if (numa_node == -1)
{
......@@ -70,24 +81,10 @@ void* comm_malloc(size_t size)
void* comp_malloc(size_t size)
{
if (hwloc_bitmap_iszero(memory_comp_numa_node_bitmap))
{
return malloc(size);
}
else
{
const hwloc_membind_policy_t policy = (hwloc_bitmap_weight(memory_comp_numa_node_bitmap) == 1 ) ? HWLOC_MEMBIND_BIND : HWLOC_MEMBIND_INTERLEAVE;
/* HWLOC_MEMBIND_BYNODESET: Consider the bitmap argument as a nodeset.
* HWLOC_MEMBIND_NOCPUBIND: Avoid any effect on CPU binding. */
#if HWLOC_API_VERSION >= 0x00020000
return hwloc_alloc_membind(topology, size, memory_comp_numa_node_bitmap, policy, HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_NOCPUBIND);
#else
return hwloc_alloc_membind_nodeset(topology, size, memory_comp_numa_node_bitmap, policy, HWLOC_MEMBIND_NOCPUBIND);
#endif
}
return data_malloc(size, (nb_numa_nodes_comp == 0) ? -1 : numa_nodes_comp[0]);
}
static inline void data_free(void* ptr, size_t size, int numa_node)
void data_free(void* ptr, size_t size, int numa_node)
{
if (numa_node == -1)
{
......@@ -106,12 +103,5 @@ void comm_free(void* ptr, size_t size)
void comp_free(void* ptr, size_t size)
{
if (hwloc_bitmap_iszero(memory_comp_numa_node_bitmap))
{
free(ptr);
}
else
{
hwloc_free(topology, ptr, size);
}
return data_free(ptr, size, (nb_numa_nodes_comp == 0) ? -1 : numa_nodes_comp[0]);
}
......@@ -2,12 +2,17 @@
#define __MALLOC_H
#include <hwloc.h>
#include "helper.h"
void init_malloc(hwloc_topology_t _topology, char* _memory_comp_numa_node_str, int _memory_comm_numa_node);
struct machine_s;
void init_malloc(struct machine_s* machine, char* _memory_comp_numa_node_str, int _memory_comm_numa_node);
void release_malloc();
void* comm_malloc(size_t size);
void* comp_malloc(size_t size);
void* data_malloc(size_t size, int numa_node);
int get_numa_node_comp_for_id(int id);
void comm_free(void* ptr, size_t size);
void comp_free(void* ptr, size_t size);
void data_free(void* ptr, size_t size, int numa_node);
#endif /* __MALLOC_H */
......@@ -125,7 +125,7 @@ static int memset_init()
#pragma omp parallel for
for (int i = 0; i < nb_threads; i++)
{
a[i] = comp_malloc(array_size*sizeof(MEMSET_TYPE));
a[i] = data_malloc(array_size*sizeof(MEMSET_TYPE), get_numa_node_comp_for_id(i % machine.nb_numa));
for (int j = 0; j < array_size; j++)
{
......@@ -293,7 +293,7 @@ static void memset_release()
{
for (int i = 0; i < nb_threads; i++)
{
comp_free(a[i], array_size*sizeof(MEMSET_TYPE));
data_free(a[i], array_size*sizeof(MEMSET_TYPE), get_numa_node_comp_for_id(i % machine.nb_numa));
}
free(a);
......
......@@ -130,7 +130,7 @@ int main(int argc, char* argv[])
#endif
fill_machine(&machine);
init_malloc(machine.topology, params.memory_comp_numa_node_str, params.memory_comm_numa_node);
init_malloc(&machine, params.memory_comp_numa_node_str, params.memory_comm_numa_node);
if (params.display_help)
{
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment