Commit 1d657a0c authored by Millian Poquet's avatar Millian Poquet

Good advance in the C++ version. JSON files are now parsed. Machines' states...

Good advance in the C++ version. JSON files are now parsed. Machines' states are now taken into account in a better way to log what happens in the Pajé file.
parent 1347e5ab
......@@ -2,6 +2,8 @@
* @file context.hpp The Batsim context
*/
#pragma once
#include "network.hpp"
#include "machines.hpp"
#include "jobs.hpp"
......@@ -14,6 +16,7 @@ struct BatsimContext
Machines machines;
Jobs jobs;
Profiles profiles;
PajeTracer tracer;
long long microseconds_used_by_scheduler;
};
......@@ -67,12 +67,12 @@ void WriteBuffer::flushBuffer()
PajeTracer::PajeTracer(const std::string & filename,
bool logLaunchings) :
logLaunchings(logLaunchings)
_logLaunchings(logLaunchings)
{
generateColors(64);
shuffleColors();
wbuf = new WriteBuffer(filename);
_wbuf = new WriteBuffer(filename);
}
PajeTracer::~PajeTracer()
......@@ -80,10 +80,10 @@ PajeTracer::~PajeTracer()
if (state != FINALIZED)
fprintf(stderr, "Destruction of a PajeTracer object which has not been finalized. The corresponding trace file may be invalid.");
if (wbuf != nullptr)
if (_wbuf != nullptr)
{
delete wbuf;
wbuf = nullptr;
delete _wbuf;
_wbuf = nullptr;
}
}
......@@ -169,7 +169,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
DEFINE_STATE_TYPE, DEFINE_ENTITY_VALUE, SET_STATE,
DEFINE_EVENT_TYPE, NEW_EVENT, DEFINE_VARIABLE_TYPE,
SET_VARIABLE);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's create our container types
snprintf(buf, bufSize,
......@@ -183,7 +183,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
DEFINE_CONTAINER_TYPE, rootType, machineType,
DEFINE_CONTAINER_TYPE, schedulerType,
DEFINE_CONTAINER_TYPE, schedulerType, killerType);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's create our event types
snprintf(buf, bufSize,
......@@ -193,7 +193,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
"\n",
DEFINE_EVENT_TYPE, killerType, killEventKiller,
DEFINE_EVENT_TYPE, machineType, killEventMachine);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's create our variable types
snprintf(buf, bufSize,
......@@ -201,13 +201,13 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
"%d %s %s \"Utilization\" %s\n"
"\n",
DEFINE_VARIABLE_TYPE, schedulerType, utilizationVarType, utilizationColor);
wbuf->appendText(buf);
_wbuf->appendText(buf);
snprintf(buf, bufSize,
"# Containers creation\n"
"%d %lf %s %s \"Machines\" 0\n",
CREATE_CONTAINER, time, rootType, root);
wbuf->appendText(buf);
_wbuf->appendText(buf);
for (const Machine & m : machines)
{
......@@ -217,7 +217,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
CREATE_CONTAINER, time, machineType,
machinePrefix, m.id,
m.name.c_str(), root);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
snprintf(buf, bufSize,
......@@ -226,7 +226,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
"\n",
CREATE_CONTAINER, time, schedulerType, scheduler,
CREATE_CONTAINER, time, killerType, killer, scheduler);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's declare that machines have a state
snprintf(buf, bufSize,
......@@ -234,7 +234,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
"%d %s %s \"Machine state\"\n"
"\n",
DEFINE_STATE_TYPE, machineState, machineType);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's declare some machine states
snprintf(buf, bufSize,
......@@ -245,7 +245,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
"# Begin of events\n",
DEFINE_ENTITY_VALUE, mstateWaiting, machineState, waitingColor,
DEFINE_ENTITY_VALUE, mstateLaunching, machineState, launchingColor);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's set all the machines in waiting state
for (const Machine & m : machines)
......@@ -253,7 +253,7 @@ void PajeTracer::initialize(const vector<Machine> & machines, double time)
snprintf(buf, bufSize,
"%d %lf %s %s%d %s\n",
SET_STATE, time, machineState, machinePrefix, m.id, mstateWaiting);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
state = INITIALIZED;
......@@ -269,20 +269,20 @@ void PajeTracer::finalize(const vector<Machine> & machines, double time)
snprintf(buf, bufSize,
"\n"
"# End of events, containers destruction\n");
wbuf->appendText(buf);
_wbuf->appendText(buf);
for (const Machine & m : machines)
{
snprintf(buf, bufSize,
"%d %lf %s%d %s\n",
DESTROY_CONTAINER, time, machinePrefix, m.id, machineType);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
snprintf(buf, bufSize,
"%d %lf %s %s\n",
DESTROY_CONTAINER, time, root, rootType);
wbuf->appendText(buf);
_wbuf->appendText(buf);
state = FINALIZED;
}
......@@ -294,13 +294,7 @@ void PajeTracer::addJobLaunching(int jobID, const std::vector<int> & usedMachine
const int bufSize = 64;
char buf[bufSize];
// Let's create a state value corresponding to this job
snprintf(buf, bufSize,
"%d %s%d %s \"%d\" %s\n",
DEFINE_ENTITY_VALUE, jobPrefix, jobID, machineState, jobID, colors[jobID % (int)colors.size()].c_str());
wbuf->appendText(buf);
if (logLaunchings)
if (_logLaunchings)
{
// Let's change the state of all the machines which launch the job
for (const int & machineID : usedMachineIDs)
......@@ -308,10 +302,51 @@ void PajeTracer::addJobLaunching(int jobID, const std::vector<int> & usedMachine
snprintf(buf, bufSize,
"%d %lf %s %s%d %s\n",
SET_STATE, time, machineState, machinePrefix, machineID, mstateLaunching);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
}
}
void PajeTracer::register_new_job(int jobID)
{
xbt_assert(_jobs.find(jobID) == _jobs.end(), "Cannot register new job %d: it already exists", jobID);
const int bufSize = 64;
char buf[bufSize];
// Let's create a state value corresponding to this job
snprintf(buf, bufSize,
"%d %s%d %s \"%d\" %s\n",
DEFINE_ENTITY_VALUE, jobPrefix, jobID, machineState, jobID, _colors[jobID % (int)_colors.size()].c_str());
_wbuf->appendText(buf);
_jobs[jobID] = jobPrefix + to_string(jobID);
}
void PajeTracer::set_machine_idle(int machineID, double time)
{
const int bufSize = 64;
char buf[bufSize];
snprintf(buf, bufSize,
"%d %lf %s %s%d %s\n",
SET_STATE, time, machineState, machinePrefix, machineID, mstateWaiting);
_wbuf->appendText(buf);
}
void PajeTracer::set_machine_as_computing_job(int machineID, int jobID, double time)
{
auto mit = _jobs.find(jobID);
if (mit == _jobs.end())
{
register_new_job(jobID);
mit = _jobs.find(jobID);
}
const int bufSize = 64;
char buf[bufSize];
snprintf(buf, bufSize,
"%d %lf %s %s%d %s\n",
SET_STATE, time, machineState, machinePrefix, machineID, mit->second.c_str());
_wbuf->appendText(buf);
}
void PajeTracer::addJobRunning(int jobID, const vector<int> & usedMachineIDs, double time)
......@@ -327,7 +362,7 @@ void PajeTracer::addJobRunning(int jobID, const vector<int> & usedMachineIDs, do
snprintf(buf, bufSize,
"%d %lf %s %s%d %s%d\n",
SET_STATE, time, machineState, machinePrefix, machineID, jobPrefix, jobID);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
}
......@@ -345,11 +380,11 @@ void PajeTracer::addJobEnding(int jobID, const vector<int> & usedMachineIDs, dou
snprintf(buf, bufSize,
"%d %lf %s %s%d %s\n",
SET_STATE, time, machineState, machinePrefix, machineID, mstateWaiting);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
}
void PajeTracer::addJobKill(int jobID, const vector<int> & usedMachineIDs, double time)
void PajeTracer::addJobKill(int jobID, const vector<int> & usedMachineIDs, double time, bool associateKillToMachines)
{
xbt_assert(state == INITIALIZED, "Bad addJobKill call: the PajeTracer object is not initialized or had been finalized");
......@@ -360,15 +395,18 @@ void PajeTracer::addJobKill(int jobID, const vector<int> & usedMachineIDs, doubl
snprintf(buf, bufSize,
"%d %lf %s %s \"%d\"\n",
NEW_EVENT, time, killEventKiller, killer, jobID);
wbuf->appendText(buf);
_wbuf->appendText(buf);
// Let's add a kill event associated with each machine
for (const int & machineID : usedMachineIDs)
if (associateKillToMachines)
{
snprintf(buf, bufSize,
"%d %lf %s %s%d \"%d\"\n",
NEW_EVENT, time, killEventMachine, machinePrefix, machineID, jobID);
wbuf->appendText(buf);
// Let's add a kill event associated with each machine
for (const int & machineID : usedMachineIDs)
{
snprintf(buf, bufSize,
"%d %lf %s %s%d \"%d\"\n",
NEW_EVENT, time, killEventMachine, machinePrefix, machineID, jobID);
_wbuf->appendText(buf);
}
}
}
......@@ -383,7 +421,7 @@ void PajeTracer::addGlobalUtilization(double utilization, double time)
snprintf(buf, bufSize,
"%d %lf %s %s %lf\n",
SET_VARIABLE, time, utilizationVarType, scheduler, utilization);
wbuf->appendText(buf);
_wbuf->appendText(buf);
}
void PajeTracer::generateColors(int colorCount)
......@@ -401,13 +439,13 @@ void PajeTracer::generateColors(int colorCount)
hsvToRgb(h,s,v, r,g,b);
snprintf(buf, bufSize, "\"%lf %lf %lf\"", r, g, b);
colors.push_back(buf);
_colors.push_back(buf);
}
}
void PajeTracer::shuffleColors()
{
random_shuffle(colors.begin(), colors.end());
random_shuffle(_colors.begin(), _colors.end());
}
void PajeTracer::hsvToRgb(double h, double s, double v, double & r, double & g, double & b)
......
......@@ -5,6 +5,7 @@
#include <vector>
#include <string>
#include <fstream>
#include <map>
#include <simgrid/msg.h>
......@@ -69,7 +70,7 @@ public:
* @param filename
* @param logLaunchings If set to true, job launching time will be written in the trace. This option leads to larger trace files.
*/
PajeTracer(const std::string & filename, bool logLaunchings = false);
PajeTracer(const std::string & filename, bool _logLaunchings = false);
/**
* @brief PajeTracer destructor.
......@@ -100,6 +101,10 @@ public:
*/
void addJobLaunching(int jobID, const std::vector<int> & usedMachineIDs, double time);
void register_new_job(int jobID);
void set_machine_idle(int machineID, double time);
void set_machine_as_computing_job(int machineID, int jobID, double time);
/**
* @brief Adds a job run in the file trace.
* @details Please note that this method can only be called when the PajeTracer object has been initialized and had not been finalized yet.
......@@ -125,7 +130,7 @@ public:
* TODO UPDATE
* @param time The simulation time at which the kill is done
*/
void addJobKill(int jobID, const std::vector<int> & usedMachineIDs, double time);
void addJobKill(int jobID, const std::vector<int> & usedMachineIDs, double time, bool associateKillToMachines = false);
/**
* @brief Adds a global utilization value of the system.
......@@ -185,11 +190,12 @@ private:
const char * launchingColor = "\"0.3 0.3 0.3\"";
const char * utilizationColor = "\"0.0 0.5 0.0\"";
const bool logLaunchings;
const bool _logLaunchings;
WriteBuffer * wbuf = nullptr;
WriteBuffer * _wbuf = nullptr;
std::vector<std::string> colors;
std::map<int, std::string> _jobs;
std::vector<std::string> _colors;
enum
{
......
......@@ -36,3 +36,9 @@ std::string ipMessageTypeToString(IPMessageType type)
return type_to_string[type];
}
void send_message(const char *destination_mailbox, IPMessageType type, void *data)
{
const string str = destination_mailbox;
send_message(str, type, data);
}
......@@ -7,6 +7,8 @@
#include <vector>
#include <string>
#include <simgrid/msg.h>
struct BatsimContext;
enum class IPMessageType
......@@ -34,6 +36,7 @@ struct SchedulingAllocation
{
int job_id;
std::vector<int> machine_ids; //! The IDs of the machines on which the job should be allocated
std::vector<msg_host_t> hosts; //! The corresponding SimGrid hosts
};
struct SchedulingAllocationMessage
......@@ -59,6 +62,18 @@ struct ServerProcessArguments
BatsimContext * context;
};
struct ExecuteJobProcessArguments
{
BatsimContext * context;
SchedulingAllocation allocation;
};
struct KillerProcessArguments
{
msg_task_t task; //! The task that will be cancelled if the walltime is reached
double walltime; //! The number of seconds to wait before cancelling the task
} ;
/**
* @brief Sends a message from the given process to the given mailbox
* @param[in] dst The destination mailbox
......@@ -67,5 +82,6 @@ struct ServerProcessArguments
* @param[in] data The data associated to the message
*/
void send_message(const std::string & destination_mailbox, IPMessageType type, void * data = nullptr);
void send_message(const char * destination_mailbox, IPMessageType type, void * data = nullptr);
std::string ipMessageTypeToString(IPMessageType type);
......@@ -4,7 +4,18 @@
#include "jobs.hpp"
#include <string>
#include <fstream>
#include <streambuf>
#include <simgrid/msg.h>
#include <rapidjson/document.h>
using namespace std;
using namespace rapidjson;
XBT_LOG_NEW_DEFAULT_CATEGORY(jobs, "jobs");
Jobs::Jobs()
{
......@@ -21,18 +32,74 @@ Jobs::~Jobs()
void Jobs::load_from_json(const std::string &filename)
{
// TODO
// Let the file content be placed in a string
ifstream ifile(filename);
string content;
ifile.seekg(0, ios::end);
content.reserve(ifile.tellg());
ifile.seekg(0, ios::beg);
content.assign((std::istreambuf_iterator<char>(ifile)),
std::istreambuf_iterator<char>());
// JSON document creation
Document doc;
doc.Parse(content.c_str());
xbt_assert(doc.IsObject());
xbt_assert(doc.HasMember("jobs"), "Invalid JSON file '%s': the 'jobs' array is missing", filename.c_str());
const Value & jobs = doc["jobs"];
xbt_assert(jobs.IsArray(), "Invalid JSON file '%s': the 'jobs' member is not an array", filename.c_str());
Job j;
j.starting_time = -1;
j.runtime = -1;
j.state = JobState::JOB_STATE_NOT_SUBMITTED;
for (SizeType i = 0; i < jobs.Size(); i++) // Uses SizeType instead of size_t
{
const Value & job = jobs[i];
xbt_assert(job.IsObject(), "Invalid JSON file '%s': one job is not an object", filename.c_str());
xbt_assert(job.HasMember("id"), "Invalid JSON file '%s': one job has no 'id' field", filename.c_str());
xbt_assert(job["id"].IsInt(), "Invalid JSON file '%s': one job has a non-integral 'id' field ('%s')", filename.c_str(), job["id"].GetString());
j.id = job["id"].GetInt();
xbt_assert(job.HasMember("subtime"), "Invalid JSON file '%s': job %d has no 'subtime' field", filename.c_str(), j.id);
xbt_assert(job["subtime"].IsNumber(), "Invalid JSON file '%s': job %d has a non-number 'subtime' field", filename.c_str(), j.id);
j.submission_time = job["subtime"].GetDouble();
xbt_assert(job.HasMember("walltime"), "Invalid JSON file '%s': job %d has no 'walltime' field", filename.c_str(), j.id);
xbt_assert(job["walltime"].IsNumber(), "Invalid JSON file '%s': job %d has a non-number 'walltime' field", filename.c_str(), j.id);
j.walltime = job["walltime"].GetDouble();
xbt_assert(job.HasMember("res"), "Invalid JSON file '%s': job %d has no 'res' field", filename.c_str(), j.id);
xbt_assert(job["res"].IsInt(), "Invalid JSON file '%s': job %d has a non-number 'res' field", filename.c_str(), j.id);
j.required_nb_res = job["res"].GetInt();
xbt_assert(job.HasMember("profile"), "Invalid JSON file '%s': job %d has no 'profile' field", filename.c_str(), j.id);
xbt_assert(job["profile"].IsString(), "Invalid JSON file '%s': job %d has a non-string 'profile' field", filename.c_str(), j.id);
j.profile = job["profile"].GetString();
xbt_assert(!exists(j.id), "Invalid JSON file '%s': duplication of job id %d", filename.c_str(), j.id);
Job * nj = new Job;
*nj = j;
_jobs[j.id] = nj;
}
}
Job *Jobs::operator[](int job_id)
{
auto it = _jobs.find(job_id);
xbt_assert(it != _jobs.end(), "Cannot get job %d: it does not exist", job_id);
return it->second;
}
const Job *Jobs::operator[](int job_id) const
{
auto it = _jobs.find(job_id);
xbt_assert(it != _jobs.end(), "Cannot get job %d: it does not exist", job_id);
return it->second;
}
......
#include "jobs_execution.hpp"
#include "jobs.hpp"
#include <simgrid/msg.h>
#include <smpi/smpi.h>
XBT_LOG_NEW_DEFAULT_CATEGORY(jobs_execution, "jobs_execution");
using namespace std;
int killer_process(int argc, char *argv[])
{
(void) argc;
(void) argv;
KillerProcessArguments * args = (KillerProcessArguments *) MSG_process_get_data(MSG_process_self());
/* The sleep can either stop normally (res=MSG_OK) or be cancelled when the task execution
* completed (res=MSG_TASK_CANCELED) */
msg_error_t res = MSG_process_sleep(args->walltime);
if (res == MSG_OK)
{
// If we had time to sleep until walltime (res=MSG_OK), the task execution is not over and must be cancelled
XBT_INFO("Cancelling task '%s'", MSG_task_get_name(args->task));
MSG_task_cancel(args->task);
}
delete args;
return 0;
}
int smpi_replay_process(int argc, char *argv[])
{
//just to verify given argv
/*for(int index = 0; index < argc; index++)
printf("The %d is %s\n",index,argv[index]);*/
smpi_replay_run(&argc, &argv);
return 0;
}
int execute_profile(BatsimContext *context,
const string & profile_name,
SchedulingAllocation *allocation,
double *remaining_time)
{
Job * job = context->jobs[allocation->job_id];
Profile * profile = context->profiles[profile_name];
int nb_res = job->required_nb_res;
if (profile->type == ProfileType::MSG_PARALLEL_HOMOGENEOUS)
{
MsgParallelHomogeneousProfileData * data = (MsgParallelHomogeneousProfileData *)profile->data;
// These amounts are deallocated by SG
double * computation_amount = new double[nb_res];
double * communication_amount = new double[nb_res*nb_res];
double cpu = data->cpu;
double com = data->com;
// Let us fill the local computation and communication matrices
int k = 0;
for (int y = 0; y < nb_res; ++y)
{
computation_amount[y] = cpu;
for (int x = 0; x < nb_res; ++x)
{
if (x == y)
communication_amount[k++] = 0;
else
communication_amount[k++] = com;
}
}
string task_name = "phg " + to_string(job->id) + "'" + job->profile + "'";
XBT_INFO("Creating task '%s'", task_name.c_str());
msg_task_t ptask = MSG_parallel_task_create(task_name.c_str(),
nb_res, allocation->hosts.data(),
computation_amount,
communication_amount, NULL);
// Let's spawn a process which will wait until walltime and cancel the task if needed
KillerProcessArguments * killer_args = new KillerProcessArguments;
killer_args->task = ptask;
killer_args->walltime = *remaining_time;
msg_process_t kill_process = MSG_process_create("killer", killer_process, killer_args, MSG_host_self());
double timeBeforeExecute = MSG_get_clock();
XBT_INFO("Executing task '%s'", MSG_task_get_name(ptask));
msg_error_t err = MSG_parallel_task_execute(ptask);
*remaining_time = *remaining_time - (MSG_get_clock() - timeBeforeExecute);
int ret = 1;
if (err == MSG_OK)
SIMIX_process_throw(kill_process, cancel_error, 0, "wake up");
else if (err == MSG_TASK_CANCELED)
ret = 0;
else
xbt_die("A task execution had been stopped by an unhandled way (err = %d)", err);
XBT_INFO("Task '%s' finished", MSG_task_get_name(ptask));
MSG_task_destroy(ptask);
return ret;
}
else if (profile->type == ProfileType::MSG_PARALLEL)
{
MsgParallelProfileData * data = (MsgParallelProfileData *)profile->data;
// These amounts are deallocated by SG
double * computation_amount = new double[nb_res];
double * communication_amount = new double[nb_res*nb_res];
// Let us retrieve the matrices from the profile
memcpy(computation_amount, data->cpu, sizeof(double) * nb_res);
memcpy(communication_amount, data->com, sizeof(double) * nb_res * nb_res);
string task_name = "p " + to_string(job->id) + "'" + job->profile + "'";
XBT_INFO("Creating task '%s'", task_name.c_str());
msg_task_t ptask = MSG_parallel_task_create(task_name.c_str(),
nb_res, allocation->hosts.data(),
computation_amount,
communication_amount, NULL);
// Let's spawn a process which will wait until walltime and cancel the task if needed
KillerProcessArguments * killer_args = new KillerProcessArguments;
killer_args->task = ptask;
killer_args->walltime = *remaining_time;
msg_process_t kill_process = MSG_process_create("killer", killer_process, killer_args, MSG_host_self());
double timeBeforeExecute = MSG_get_clock();
XBT_INFO("Executing task '%s'", MSG_task_get_name(ptask));
msg_error_t err = MSG_parallel_task_execute(ptask);
*remaining_time = *remaining_time - (MSG_get_clock() - timeBeforeExecute);
int ret = 1;
if (err == MSG_OK)
SIMIX_process_throw(kill_process, cancel_error, 0, "wake up");
else if (err == MSG_TASK_CANCELED)
ret = 0;
else
xbt_die("A task execution had been stopped by an unhandled way (err = %d)", err);
XBT_INFO("Task '%s' finished", MSG_task_get_name(ptask));
MSG_task_destroy(ptask);
return ret;
}
else if (profile->type == ProfileType::SEQUENCE)
{
xbt_die("Unhandled sequence profile type");
SequenceProfileData * data = (SequenceProfileData *) profile->data;
for (int i = 0; i < data->repeat; i++)
{
for (int j = 0; j < data->sequence.size(); j++)
{
if (execute_profile(context, data->sequence[j], allocation, remaining_time) == 0)
return 0;
}
}
return 1;
}
else if (profile->type == ProfileType::DELAY)
{
DelayProfileData * data = (DelayProfileData *) profile->data;
if (data->delay < *remaining_time)
{
XBT_INFO("Sleeping the whole task length");
MSG_process_sleep(data->delay);
XBT_INFO("Sleeping done");
*remaining_time = *remaining_time - data->delay;
return 1;
}
else
{
XBT_INFO("Sleeping until walltime");
MSG_process_sleep(*remaining_time);
XBT_INFO("Walltime reached");
*remaining_time = 0;
return 0;
}
}
else if (profile->type == ProfileType::SMPI)
{
SmpiProfileData * data = (SmpiProfileData *) profile->data;
for (int i = 0; i < nb_res; ++i)
{
char *str_instance_id = NULL;
int ret = asprintf(&str_instance_id, "%d", job->id);
xbt_assert(ret != -1, "asprintf failed (not enough memory?)");
char *str_rank_id = NULL;
ret = asprintf(&str_rank_id, "%d", i);
xbt_assert(ret != -1, "asprintf failed (not enough memory?)");
char *str_pname = NULL;