batsim.cpp 11.9 KB
Newer Older
Millian Poquet's avatar
Millian Poquet committed
1 2 3 4 5
/**
 * @file batsim.cpp
 * @brief Batsim's entry point
 */

6
#include <string>
7

8 9 10 11 12
#include <stdio.h>
#include <argp.h>
#include <unistd.h>

#include <simgrid/msg.h>
13
#include <smpi/smpi.h>
14
#include <simgrid/plugins/energy.h>
15

16 17
#include <boost/algorithm/string/case_conv.hpp>

18 19 20 21 22 23 24 25 26
#include "context.hpp"
#include "export.hpp"
#include "ipp.hpp"
#include "job_submitter.hpp"
#include "jobs.hpp"
#include "jobs_execution.hpp"
#include "machines.hpp"
#include "network.hpp"
#include "profiles.hpp"
27
#include "server.hpp"
28
#include "workload.hpp"
29 30 31

using namespace std;

Millian Poquet's avatar
Millian Poquet committed
32
XBT_LOG_NEW_DEFAULT_CATEGORY(batsim, "batsim"); //!< Logging
33

Millian Poquet's avatar
Millian Poquet committed
34 35 36
/**
 * @brief Batsim verbosity level
 */
37 38
enum class VerbosityLevel
{
Millian Poquet's avatar
Millian Poquet committed
39 40 41 42
    QUIET           //!< Almost nothing should be displayed
    ,NETWORK_ONLY   //!< Only network messages should be displayed
    ,INFORMATION    //!< Informations should be displayed (default)
    ,DEBUG          //!< Debug informations should be displayed too
43 44
};

45 46 47 48 49
/**
 * @brief The main function arguments (a.k.a. program arguments)
 */
struct MainArguments
{
Millian Poquet's avatar
Millian Poquet committed
50 51
    std::string platformFilename;                           //!< The SimGrid platform filename
    std::string workloadFilename;                           //!< The JSON workload filename
52

Millian Poquet's avatar
Millian Poquet committed
53
    std::string socketFilename = "/tmp/bat_socket";         //!< The Unix Domain Socket filename
54

Millian Poquet's avatar
Millian Poquet committed
55 56
    std::string masterHostName = "master_host";             //!< The name of the SimGrid host which runs scheduler processes and not user tasks
    std::string exportPrefix = "out";                       //!< The filename prefix used to export simulation information
57

Millian Poquet's avatar
Millian Poquet committed
58 59 60 61 62
    bool energy_used = false;                               //!< True if and only if the SimGrid energy plugin should be used.
    VerbosityLevel verbosity = VerbosityLevel::INFORMATION; //!< Sets the Batsim verbosity
    bool allow_space_sharing = false;                       //!< Allows/forbids space sharing. Two jobs can run on the same machine if and only if space sharing is allowed.
    bool enable_simgrid_process_tracing = false;            //!< If set to true, this options enables the tracing of SimGrid processes
    bool enable_schedule_tracing = true;                    //!< If set to true, the schedule is exported to a Pajé trace file
63

Millian Poquet's avatar
Millian Poquet committed
64 65
    bool abort = false;                                     //!< A boolean value. If set to yet, the launching should be aborted for reason abortReason
    std::string abortReason;                                //!< Human readable reasons which explains why the launch should be aborted
66 67 68 69 70 71 72 73 74
};

/**
 * @brief Used to parse the main function parameters
 * @param[in] key The current key
 * @param[in] arg The current argument
 * @param[in, out] state The current argp_state
 * @return 0
 */
75
int parse_opt (int key, char *arg, struct argp_state *state)
76 77 78 79 80
{
    MainArguments * mainArgs = (MainArguments *) state->input;

    switch (key)
    {
81 82 83
    case 'h':
        mainArgs->allow_space_sharing = true;
        break;
84 85 86 87 88 89
    case 'e':
        mainArgs->exportPrefix = arg;
        break;
    case 'm':
        mainArgs->masterHostName = arg;
        break;
90 91 92
    case 'p':
        mainArgs->energy_used = true;
        break;
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
    case 'v':
    {
        string sArg = arg;
        boost::to_lower(sArg);
        if (sArg == "quiet")
            mainArgs->verbosity = VerbosityLevel::QUIET;
        else if (sArg == "network-only")
            mainArgs->verbosity = VerbosityLevel::NETWORK_ONLY;
        else if (sArg == "information")
            mainArgs->verbosity = VerbosityLevel::INFORMATION;
        else if (sArg == "debug")
            mainArgs->verbosity = VerbosityLevel::DEBUG;
        else
        {
            mainArgs->abort = true;
108
            mainArgs->abortReason += "\n  invalid VERBOSITY_LEVEL argument: '" + string(sArg) + "' is not in {quiet, network-only, information, debug}.";
109 110 111
        }
        break;
    }
112
    case 'q':
113
        mainArgs->verbosity = VerbosityLevel::QUIET;
114
        break;
115 116 117
    case 's':
        mainArgs->socketFilename = arg;
        break;
118 119 120
    case 't':
        mainArgs->enable_simgrid_process_tracing = true;
        break;
121 122 123
    case 'T':
        mainArgs->enable_schedule_tracing = false;
        break;
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    case ARGP_KEY_ARG:
        switch(state->arg_num)
        {
        case 0:
            mainArgs->platformFilename = arg;
            if (access(mainArgs->platformFilename.c_str(), R_OK) == -1)
            {
                mainArgs->abort = true;
                mainArgs->abortReason += "\n  invalid PLATFORM_FILE argument: file '" + string(mainArgs->platformFilename) + "' cannot be read";
            }
            break;
        case 1:
            mainArgs->workloadFilename = arg;
            if (access(mainArgs->workloadFilename.c_str(), R_OK) == -1)
            {
                mainArgs->abort = true;
140
                mainArgs->abortReason += "\n  invalid WORKLOAD_FILE argument: file '" + string(mainArgs->workloadFilename) + "' cannot be read";
141 142 143 144 145 146 147 148
            }
            break;
        }
        break;
    case ARGP_KEY_END:
        if (state->arg_num < 2)
        {
            mainArgs->abort = 1;
149
            mainArgs->abortReason += "\n  Too few arguments. Try the --help option to display usage information.";
150 151 152 153 154 155 156
        }
        break;
    }

    return 0;
}

Millian Poquet's avatar
Millian Poquet committed
157 158 159 160 161 162
/**
 * @brief Main function
 * @param[in] argc The number of arguments
 * @param[in] argv The arguments' values
 * @return 0 on success, something else otherwise
 */
163 164 165 166 167 168 169
int main(int argc, char * argv[])
{
    MainArguments mainArgs;

    struct argp_option options[] =
    {
        {"export", 'e', "FILENAME_PREFIX", 0, "The export filename prefix used to generate simulation output", 0},
170 171
        {"allow-space-sharing", 'h', 0, 0, "Allows space sharing: the same resource can compute several jobs at the same time", 0},
        {"master-host", 'm', "NAME", 0, "The name of the host in PLATFORM_FILE which will run SimGrid scheduling processes and won't be used to compute tasks", 0},
172
        {"energy-plugin", 'p', 0, 0, "Enables energy-aware experiments", 0},
173
        {"quiet", 'q', 0, 0, "Shortcut for --verbosity=quiet", 0},
174
        {"socket", 's', "FILENAME", 0, "Unix Domain Socket filename", 0},
175
        {"process-tracing", 't', 0, 0, "Enables SimGrid process tracing (shortcut for SimGrid options ----cfg=tracing:1 --cfg=tracing/msg/process:1)", 0},
176
        {"disable-schedule-tracing", 'T', 0, 0, "If set, disables the tracing of the schedule.", 0},
177
        {"verbosity", 'v', "VERBOSITY_LEVEL", 0, "Sets the Batsim verbosity level. Available values are : quiet, network-only, information (default), debug.", 0},
178 179 180 181 182 183 184 185 186 187 188
        {0, '\0', 0, 0, 0, 0} // The options array must be NULL-terminated
    };
    struct argp argp = {options, parse_opt, "PLATFORM_FILE WORKLOAD_FILE", "A tool to simulate (via SimGrid) the behaviour of scheduling algorithms.", 0, 0, 0};
    argp_parse(&argp, argc, argv, 0, 0, &mainArgs);

    if (mainArgs.abort)
    {
        fprintf(stderr, "Impossible to run batsim:%s\n", mainArgs.abortReason.c_str());
        return 1;
    }

189 190 191
    if (mainArgs.energy_used)
        sg_energy_plugin_init();

192
    if (mainArgs.verbosity == VerbosityLevel::QUIET || mainArgs.verbosity == VerbosityLevel::NETWORK_ONLY)
193 194 195 196 197 198 199 200 201 202
    {
        xbt_log_control_set("workload.thresh:error");
        xbt_log_control_set("jobs.thresh:error");
        xbt_log_control_set("batsim.thresh:error");
        xbt_log_control_set("machines.thresh:error");
        xbt_log_control_set("pstate.thresh:error");
        xbt_log_control_set("jobs_execution.thresh:error");
        xbt_log_control_set("export.thresh:error");
        xbt_log_control_set("profiles.thresh:error");
        xbt_log_control_set("network.thresh:error");
203
        xbt_log_control_set("server.thresh:error");
204 205 206
        xbt_log_control_set("ipp.thresh:error");
    }

207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
    if (mainArgs.verbosity == VerbosityLevel::NETWORK_ONLY)
    {
        xbt_log_control_set("network.thresh:info");
    }
    else if (mainArgs.verbosity == VerbosityLevel::DEBUG)
    {
        xbt_log_control_set("workload.thresh:debug");
        xbt_log_control_set("jobs.thresh:debug");
        xbt_log_control_set("batsim.thresh:debug");
        xbt_log_control_set("machines.thresh:debug");
        xbt_log_control_set("pstate.thresh:debug");
        xbt_log_control_set("jobs_execution.thresh:debug");
        xbt_log_control_set("export.thresh:debug");
        xbt_log_control_set("profiles.thresh:debug");
        xbt_log_control_set("network.thresh:debug");
        xbt_log_control_set("server.thresh:debug");
        xbt_log_control_set("ipp.thresh:debug");
    }

226 227 228
    // Initialization
    MSG_init(&argc, argv);

229 230 231 232 233 234 235 236 237 238
    // Setting SimGrid configuration if the SimGrid process tracing is enabled
    if (mainArgs.enable_simgrid_process_tracing)
    {
        string sg_trace_filename = mainArgs.exportPrefix + "_sg_processes.trace";

        MSG_config("tracing", "1");
        MSG_config("tracing/msg/process", "1");
        MSG_config("tracing/filename", sg_trace_filename.c_str());
    }

239
    BatsimContext context;
240 241
    context.platform_filename = mainArgs.platformFilename;
    context.workload_filename = mainArgs.workloadFilename;
242
    context.export_prefix = mainArgs.exportPrefix;
243
    context.energy_used = mainArgs.energy_used;
244
    context.allow_space_sharing = mainArgs.allow_space_sharing;
245
    context.trace_schedule = mainArgs.enable_schedule_tracing;
246 247

    load_json_workload(&context, mainArgs.workloadFilename);
248
    context.jobs.setProfiles(&context.profiles);
249

250
    XBT_INFO("Checking whether SMPI is used or not...");
251 252
    context.smpi_used = context.jobs.containsSMPIJob();
    if (!context.smpi_used)
253 254
    {
        XBT_INFO("SMPI will NOT be used.");
255
        MSG_config("host/model", "ptask_L07");
256 257
    }
    else
258
    {
259
        XBT_INFO("SMPI will be used.");
260 261 262 263 264 265
        register_smpi_applications(&context);
        SMPI_init();
    }

    if (context.trace_schedule)
        context.paje_tracer.setFilename(mainArgs.exportPrefix + "_schedule.trace");
266

267
    XBT_INFO("Creating the machines...");
268 269 270
    MSG_create_environment(mainArgs.platformFilename.c_str());

    xbt_dynar_t hosts = MSG_hosts_as_dynar();
271
    context.machines.createMachines(hosts, &context, mainArgs.masterHostName);
272 273
    xbt_dynar_free(&hosts);
    const Machine * masterMachine = context.machines.masterMachine();
274 275 276 277 278
    if (context.trace_schedule)
    {
        context.machines.setTracer(&context.paje_tracer);
        context.paje_tracer.initialize(&context, MSG_get_clock());
    }
279
    XBT_INFO("Machines created successfully. There are %lu computing machines.", context.machines.machines().size());
280

281 282 283 284 285 286 287
    if (context.energy_used)
    {
        context.pstate_tracer.setFilename(mainArgs.exportPrefix + "_pstate_changes.csv");
        for (const Machine * machine : context.machines.machines())
            context.pstate_tracer.add_pstate_change(MSG_get_clock(), machine->id, MSG_host_get_pstate(machine->host));
    }

288 289 290 291 292
    // Socket
    context.socket.create_socket(mainArgs.socketFilename);
    context.socket.accept_pending_connection();

    // Main processes running
293
    XBT_INFO("Creating jobs_submitter process...");
294 295 296
    JobSubmitterProcessArguments * submitterArgs = new JobSubmitterProcessArguments;
    submitterArgs->context = &context;
    MSG_process_create("jobs_submitter", job_submitter_process, (void*)submitterArgs, masterMachine->host);
297
    XBT_INFO("The jobs_submitter process has been created.");
298

299
    XBT_INFO("Creating the uds_server process...");
300 301 302
    ServerProcessArguments * serverArgs = new ServerProcessArguments;
    serverArgs->context = &context;
    MSG_process_create("server", uds_server_process, (void*)serverArgs, masterMachine->host);
303
    XBT_INFO("The uds_server process has been created.");
304 305 306

    msg_error_t res = MSG_main();

307 308 309
    if (context.smpi_used)
        SMPI_finalize();

310
    // Finalization
311 312
    if (context.trace_schedule)
        context.paje_tracer.finalize(&context, MSG_get_clock());
313
    exportScheduleToCSV(mainArgs.exportPrefix + "_schedule.csv", MSG_get_clock(), &context);
314
    exportJobsToCSV(mainArgs.exportPrefix + "_jobs.csv", &context);
315 316 317 318 319 320

    if (res == MSG_OK)
        return 0;
    else
        return 1;
}