Commit d7329c06 authored by Martin Khannouz's avatar Martin Khannouz Committed by Berenger Bramas

Add script to generate plots from a database and script to create

database from stdout and trace.rec.
parent 68b80efb
......@@ -114,6 +114,14 @@ get_ltypes_scheme <- function()
'tb-omp4#task#dep-CP' = "dashed"))
}
get_one_node_reference_algorithm <- function()
{
return ("starpu")
}
get_output_directory <- function()
{
return ("output")
}
# Timings
get_breaks_timings <- function()
{
......
......@@ -78,7 +78,7 @@ compute_efficiency <- function(data, n)
gen_efficiency <- function(data_init, algo_wanted, model_wanted)
{
data <- subset(data_init, algo == algo_wanted && model == model_wanted)
data <- subset(data_init, algo == algo_wanted & model == model_wanted)
if(nrow(data))
{
sdata <- NULL
......@@ -94,15 +94,15 @@ gen_efficiency <- function(data_init, algo_wanted, model_wanted)
sdata <- rbind(sdata, compute_efficiency(data, all_nparts[i]))
}
}
output <- paste(algo_wanted, "-", model_wanted, "-efficiencies.pdf", sep="")
output <- paste(get_output_directory(), "/", model_wanted, "-", algo_wanted, "-efficiencies.pdf", sep="")
gen_efficiencies_plot(output, sdata)
}
}
gen_efficiencies <- function(dbfile)
{
data_init <- get_data_subset(dbfile, 0L, 0L, "False")
data_init <- subset(data_init, algo != get_one_node_reference_algorithm())
data <- melt(data_init, id=c("model", "algo", "nnode", "nthreads", "npart","height","bsize"))
data$nthreads <- NULL
data <- rename(data, c("variable"="event", "value"="duration"))
all_algorithm <- unique(data$algo)
......
library(plyr)
library(ggplot2)
calc_normalized_time <- function(data, ref_name)
{
# TODO: put starpu algorithm instead
dataref <- subset(data, algo == get_one_node_reference_algorithm())
# XXX: Most likely suboptimal but it works as expected!
for (i in 1:length(ref_name)) {
for (j in 1:nrow(data)) {
if (data$algo[j] == ref_name[i]) {
tmp_ref <- subset(dataref, npart == data$npart[j] &
height == data$height[j])
seq_time <- subset(tmp_ref, nnode == 1)
tid = as.integer(as.vector(data$nnode[j]))
data$efficiency[j] <- seq_time$global_time / (data$global_time[j] * tid)
}
}
}
return (data)
}
gen_normalized_time_plot <- function(db, d_breaks, model_wanted)
{
db <- subset(db, model == model_wanted)
#Compute normalized time with one node reference
db <- calc_normalized_time(db, d_breaks)
#Then remove one node reference because it's only available on one node
db <- subset(db, algo != get_one_node_reference_algorithm())
g <- ggplot(data=db,aes_string(x="nnode", y="efficiency", color="algo"))
g <- g + geom_line()
g <- g + facet_wrap(npart ~ height, scales="free",
labeller = labeller(npart = as_labeller(npart_labeller),
height = as_labeller(height_labeller),
.default=label_both,
.multi_line=FALSE))
# Set our own colors, linetypes and point shapes.
g <- g + scale_color_manual(name="Algorithm",
breaks=get_breaks_runtime(),
labels=get_labels_runtime(),
values=get_colors_runtime())
# Set X/Y labels.
g <- g + xlab("Number of nodes")
g <- g + ylab("Normalized time")
g <- g + scale_x_continuous(breaks=c(1, 2, 3, 4, 5, 6, 9, 12, 16, 20, 24))
# Save generated plot.
output <- paste(get_output_directory(), "/", model_wanted, "-normalized-time.pdf", sep="")
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
}
gen_normalized_time <- function(dbfile)
{
data <- get_data_subset(dbfile, 0L, 0L, "False")
all_model <- unique(data$model)
#Get all algorithm without the reference algorithm
all_algo <- unique(subset(data, algo != get_one_node_reference_algorithm())$algo)
for (i in 1:length(all_model))
{
gen_normalized_time_plot(data, all_algo, all_model[i])
}
}
library(plyr)
library(ggplot2)
calc_parallel_efficiency <- function(data, ref_name)
{
# XXX: Most likely suboptimal but it works as expected!
# NOTE does it really make sense to compare with an mpi version on only one node ?
for (i in 1:length(ref_name)) {
data_ref <- subset(data, algo == ref_name[i])
for (j in 1:nrow(data)) {
if (data$algo[j] == ref_name[i]) {
tmp_ref <- subset(data_ref, npart == data$npart[j])
tmp_ref <- subset(tmp_ref, height == data$height[j])
#seq_time <- subset(tmp_ref, nthreads == 1)
seq_time <- subset(tmp_ref, nnode == 1)
#tid = as.integer(as.vector(data$nthreads[j]))
tid = as.integer(as.vector(data$nnode[j]))
data$efficiency[j] <- seq_time$global_time / (data$global_time[j] * tid)
}
}
}
return (data)
}
gen_pareff_plot <- function(db, d_breaks, model_wanted)
{
db <- subset(db, model == model_wanted)
db <- calc_parallel_efficiency(db, d_breaks)
g <- ggplot(data=db,aes_string(x="nnode", y="efficiency", color="algo"))
g <- g + geom_line()
g <- g + facet_wrap(npart ~ height, scales="free",
labeller = labeller(npart = as_labeller(npart_labeller),
height = as_labeller(height_labeller),
.default=label_both,
.multi_line=FALSE))
# Set our own colors, linetypes and point shapes.
g <- g + scale_color_manual(name="Algorithm",
breaks=get_breaks_runtime(),
labels=get_labels_runtime(),
values=get_colors_runtime())
# Set X/Y labels.
g <- g + xlab("Number of nodes")
g <- g + ylab("Parallel efficiency")
g <- g + scale_x_continuous(breaks=c(1, 2, 3, 4, 5, 6, 9, 12, 16, 20, 24))
# Save generated plot.
output <- paste(get_output_directory(), "/", model_wanted, "-parallel-efficiency.pdf", sep="")
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
}
gen_pareff <- function(dbfile)
{
file <- paste(dbfile, sep="")
data <- get_data_subset(dbfile, 0L, 0L, "False")
data <- subset(data, algo != get_one_node_reference_algorithm())
all_model <- unique(data$model)
for (i in 1:length(all_model))
{
gen_pareff_plot(data, unique(data$algo), all_model[i])
}
}
......@@ -8,7 +8,6 @@ calc_speedup <- function(data, ref_algo)
data_ref <- subset(data, algo == ref_algo)
for (i in 1:nrow(data)) {
tmp_ref <- subset(data_ref, npart == data$npart[i] & height == data$height[i] & nnode == data$nnode[i])
#tmp_ref <- subset(tmp_ref, nthreads == data$nthreads[i])
data$speedup[i] <- tmp_ref$global_time / data$global_time[i]
}
return (data)
......@@ -40,14 +39,14 @@ gen_speedup_taskdep_plot <- function(d, model_wanted)
#g <- g + ylim(ylimits)
# Save generated plot.
output <- paste(model_wanted, "-speedup.pdf", sep="")
output <- paste(get_output_directory(), "/", model_wanted, "-speedup.pdf", sep="")
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
}
gen_speedup <- function(dbfile)
{
data <- get_data_subset(dbfile, 0L, 0L, "False")
#output <- paste(output_dir, node, "-1M-7-cube-speedup.pdf", sep="")
data <- subset(data, algo != get_one_node_reference_algorithm())
all_model <- unique(data$model)
for (i in 1:length(all_model))
......
......@@ -6,12 +6,12 @@ library(ggplot2)
gen_times_taskdep_plot <- function(data, algo_wanted, model_wanted)
{
# Sort data to have task, runtime and idle.
subdata <- subset(data, model == model_wanted && algo == algo_wanted)
subdata <- subset(data, model == model_wanted & algo == algo_wanted)
subdata$rmem <- NULL
subdata$global_time <- NULL
subdata <- melt(subdata, id=c("model", "algo", "nnode", "nthreads", "npart","height","bsize"))
subdata <- rename(subdata, c("variable"="event", "value"="duration"))
#subdata <- subdata[order(subdata$event, decreasing = TRUE),]
g <- ggplot(data=subdata, aes(x=nnode, y=duration, fill=event))
g <- g + geom_bar(stat="identity", position="fill")
......@@ -38,10 +38,9 @@ gen_times_taskdep_plot <- function(data, algo_wanted, model_wanted)
g <- g + xlab("Number of nodes")
g <- g + ylab("% of time")
output <- paste(algo_wanted, "-", model_wanted, "-times.pdf", sep="")
output <- paste(get_output_directory(), "/", model_wanted, "-", algo_wanted, "-times.pdf", sep="")
# Save generated plot.
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
print(output)
}
#Use this function to normalize
......@@ -83,6 +82,7 @@ gen_times_taskdep <- function(dbfile)
{
# Cube (volume)
data <- get_data_subset(dbfile, 0L, 0L, "False")
data <- subset(data, algo != get_one_node_reference_algorithm())
all_algorithm <- unique(data$algo)
all_model <- unique(data$model)
......@@ -93,11 +93,4 @@ gen_times_taskdep <- function(dbfile)
gen_times_taskdep_plot(data, all_algorithm[i], all_model[j])
}
}
# Ellipsoid (Surface)
#data <- get_data_subset(dbfile, 0L, 0L, "True")
#data <- subset(data, name == compiler)
#output <- paste("times_taskdep/", machine, "_", compiler, "_ellipsoid.pdf", sep="")
#gen_times_taskdep_plot(output, data)
}
......@@ -2,9 +2,14 @@ source("common.R")
source("gen_times_taskdep.R")
source("gen_efficiencies_taskdep.R")
source("gen_speedup_plots.R")
source("gen_parallel_efficiency_plots.R")
source("gen_normalized_time_plots.R")
###
# Generate display of bars with the time spent in Task, Runtime and Idle.
###
gen_times_taskdep("loutre.db")
gen_efficiencies("loutre.db")
gen_speedup("loutre.db")
gen_pareff("loutre.db")
gen_normalized_time("loutre.db")
#!/usr/bin/python
import getopt
import sys
import math
import copy
import os
import socket
import subprocess
import re
import types
class ScalFMMConfig(object):
num_threads = 1
num_nodes = 1
algorithm = "implicit"
model = "cube"
num_particules = 10000
height = 4
bloc_size = 100
order = 5
def show(self):
print ("=== Simulation parameters ===")
print ("Number of nodes: " + str(self.num_nodes))
print ("Number of threads: " + str(self.num_threads))
print ("Model: " + str(self.model))
print ("Number of particules: " + str(self.num_particules))
print ("Height: " + str(self.height))
print ("Bloc size: " + str(self.bloc_size))
print ("Order: " + str(self.order))
def gen_header(self):
columns = [
"model",
"algo",
"nnode",
"nthreads",
"npart",
"height",
"bsize",
"global_time",
"runtime_time",
"task_time",
"idle_time",
"scheduling_time",
"communication_time",
"rmem",
]
header = ""
for i in range(len(columns)):
if not i == 0:
header += ","
header += "\"" + columns[i] + "\""
header += "\n"
return header
def gen_record(self, global_time, runtime_time, task_time, idle_time, scheduling_time, rmem):
columns = [
self.model,
self.algorithm,
self.num_nodes,
self.num_threads,
self.num_particules,
self.height,
self.bloc_size,
global_time,
runtime_time,
task_time,
idle_time,
scheduling_time,
0.0,
rmem,
]
record = ""
for i in range(len(columns)):
if not i == 0:
record += ","
if (type(columns[i]) is bool or
type(columns[i]) == str):
record += "\""
record += str(columns[i])
if (type(columns[i]) == bool or
type(columns[i]) == str):
record += "\""
record += "\n"
return record
def get_times_from_trace_file(filename):
cmd = "starpu_trace_state_stats.py " + filename
proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
if not proc.returncode == 0:
sys.exit("FATAL: Failed to parse trace.rec!")
return proc.returncode
task_time = 0.0
idle_time = 0.0
runtime_time = 0.0
scheduling_time = 0.0
for line in stdout.decode().splitlines():
arr = line.replace("\"", "").split(",")
if arr[0] == "Name":
continue
if len(arr) >= 4:
if arr[2] == "Runtime":
if arr[0] == "Scheduling":
scheduling_time = float(arr[3])
else:
runtime_time = float(arr[3])
elif arr[2] == "Task":
task_time += float(arr[3])
elif arr[2] == "Other":
idle_time = float(arr[3])
# sys.exit("Invalid time!")
return runtime_time, task_time, idle_time, scheduling_time
def main():
output_trace_file=""
trace_filename="trace.rec"
output_filename="loutre.db"
long_opts = ["help",
"trace-file=",
"output-trace-file=",
"output-file="]
opts, args = getopt.getopt(sys.argv[1:], "ht:i:o:", long_opts)
for o, a in opts:
if o in ("-h", "--help"):
# usage()
print("No help")
sys.exit()
elif o in ("-t", "--trace-file"):
trace_filename = str(a)
elif o in ("-i", "--output-trace-file"):
output_trace_file = str(a)
elif o in ("-o", "--output-file"):
output_filename = str(a)
else:
assert False, "unhandled option"
config=ScalFMMConfig()
rmem = 0
global_time = 0.0
runtime_time = 0.0
task_time = 0.0
idle_time = 0.0
scheduling_time = 0.0
if (os.path.isfile(output_filename)): #Time in milli
output_file = open(output_filename, "a")
else:
output_file = open(output_filename, "w")
output_file.write(config.gen_header())
with open(output_trace_file, "r") as ins:
for line in ins:
if re.search("Average", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
global_time = a[0]
elif re.search("Total Particles", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_particules = int(a[0])
elif re.search("Total Particles", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_particules = int(a[0])
elif re.search("Group size", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.bloc_size = int(a[0])
elif re.search("Nb node", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_nodes = int(a[0])
elif re.search("Tree height", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.height = int(a[0])
elif re.search("Nb thread", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_threads = int(a[0])
elif re.search("Model", line):
config.model = line[line.index(":")+1:].strip()
elif re.search("Algorithm", line):
config.algorithm = line[line.index(":")+1:].strip()
if (os.path.isfile(trace_filename)): #Time in milli
runtime_time, task_time, idle_time, scheduling_time = get_times_from_trace_file(trace_filename)
else:
print("File doesn't exist " + trace_filename)
# Write a record to the output file.
output_file.write(config.gen_record(float(global_time),
float(runtime_time),
float(task_time),
float(idle_time),
float(scheduling_time),
int(rmem)))
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment