Commit 5b4d6981 authored by Martin Khannouz's avatar Martin Khannouz Committed by Berenger Bramas

Add R script

parent 37209d54
get_data_subset <- function(f, n, h, p)
{
d <- read.csv(file=f,comment.char = "#", sep=",", quote = "\"", head=TRUE,
dec=".", colClasses=
c("factor", # model
"factor", # algorithm
"integer", # nnode
"integer", # nthread
"integer", # npart
"integer", # height
"integer", # bsize
"numeric", # global_time
"numeric", # runtime_time
"numeric", # task_time
"numeric", # idle_time
"numeric", # scheduling_time
"numeric", # communuication_time
"numeric" # rmen
))
d$npart <- ordered(d$npart)
d$height <- ordered(d$height)
d$bsize <- ordered(d$bsize)
if (n)
d <- subset(d, npart == n)
if (h)
d <- subset(d, height == h)
return (d)
}
get_db_with_time <- function(f, s, n, h, m)
{
d <- read.csv(file=f,comment.char = "#", sep=",", quote = "\"", head=TRUE,
dec=".", colClasses=
c("integer", # nthreads
"factor", # name
"factor", # scheme
"factor", # compiler
"factor", # runtime
"factor", # model
"integer", # npart
"integer", # height
"integer", # bsize
"integer", # niter
"numeric", # time
"integer" # rmem
))
d$npart <- ordered(d$npart)
d$height <- ordered(d$height)
d$bsize <- ordered(d$bsize)
d <- subset(d, scheme %in% s)
if (n)
d <- subset(d, npart == n)
if (h)
d <- subset(d, height == h)
d <- subset(d, model == m)
d <- within(d, "compiler_runtime" <- paste(compiler, runtime, sep='-'))
names <- c(
'GCC-OMP4',
'Klang',
'Klang-C',
'Klang-CP',
'StarPU',
'StarPU-C',
'StarPU-CP'
)
d <- subset(d, name %in% names)
return (d)
}
# OMP Compiler/runtime breaks, colors...
get_breaks_runtime <- function()
{
return (c('implicit', 'explicit'))
}
get_labels_runtime <- function()
{
return (c('Implicit', 'Explicit'))
}
get_colors_runtime <- function()
{
return (c('implicit' = "#266d83",
'explicit' = "#e20025"))
}
# Scheme breaks, colors ...
get_breaks_scheme <- function()
{
return (c('tb-omp4#task#dep', 'tb-omp4#task#dep-P','tb-omp4#task#dep-C',
'tb-omp4#task#dep-CP'))
}
get_shapes_scheme <- function()
{
return(c('tb-omp4#task#dep' = 0,
'tb-omp4#task#dep-P' = 2,
'tb-omp4#task#dep-C' = 10,
'tb-omp4#task#dep-CP' = 8))
}
get_ltypes_scheme <- function()
{
return (c('tb-omp4#task#dep' = "solid",
'tb-omp4#task#dep-P' = "dotdash",
'tb-omp4#task#dep-C' = "solid",
'tb-omp4#task#dep-CP' = "dashed"))
}
# Timings
get_breaks_timings <- function()
{
return (c('runtime_time', 'communication_time', 'scheduling_time', 'pipeline_time', 'task_time', 'parallel_time'))
}
get_labels_timings <- function()
{
return (c('Runtime', 'Communication', 'Scheduling', 'Pipeline', 'Task', 'Parallel'))
}
get_colors_timings <- function()
{
return (c('task_time' = "#619dff",
'runtime_time' = "#01ba38",
'pipeline_time' = "#f9766e",
'scheduling_time' = "#fdbc43",
'communication_time' = "#9D2FEA",
'parallel_time' = "#000000"))
}
npart_labeller <- function(value)
{
return (paste("N =", format(as.numeric(value), scientific=TRUE)))
}
height_labeller <- function(value)
{
return (paste("h =", value))
}
library(plyr)
library(reshape)
library(ggplot2)
gen_efficiencies_plot <- function(output, data)
{
g <- ggplot(data=data, aes_string(x="nnode", y="efficiency",
color="event", group="event"))
g <- g + geom_line()
g <- g + geom_point(aes_string(color="event"), size=2)
g <- g + facet_wrap(npart ~ height, scales="free",
labeller = labeller(npart = as_labeller(npart_labeller),
height = as_labeller(height_labeller),
.default=label_both,
.multi_line=FALSE))
# Set colors.
g <- g + scale_color_manual(name="Efficiencies",
breaks=get_breaks_timings(),
labels=get_labels_timings(),
values=get_colors_timings())
# Set title and X/Y labels.
g <- g + xlab("Number of node")
g <- g + ylab("Efficiency")
g <- g + scale_x_continuous(breaks=c(1, 2, 3, 4, 6, 9, 12, 16, 20, 24))
# Set y-axis range
g <- g + ylim(c(0.0, 1.10))
# Save generated plot.
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
}
compute_efficiency <- function(data, n)
{
# Select data
#sdata <- subset(data, npart == n & height == h & model == m)
sdata <- subset(data, npart == n)
# Compute task efficiency
#tt_1 <- subset(sdata, event == "task_time" & nthreads == 1)
tt_1 <- subset(sdata, event == "task_time" & nnode == 1)
et <- subset(sdata, event == "task_time")
et$efficiency <- tt_1$duration / et$duration
# Compute scheduling efficiency
es <- subset(sdata, event == "scheduling_time")
es$efficiency <- et$duration / (et$duration + es$duration)
# Compute runtime efficiency
er <- subset(sdata, event == "runtime_time")
er$efficiency <- (et$duration + es$duration) / (et$duration + er$duration + es$duration)
# Compute communication efficiency
ec <- subset(sdata, event == "communication_time")
ec$efficiency <- (et$duration + es$duration + er$duration) / (et$duration + er$duration + es$duration + ec$duration)
# Compute pipeline efficiency
ep <- subset(sdata, event == "idle_time")
ep$event <- "pipeline_time" # idle is weird.
ep$efficiency <- (et$duration + er$duration + es$duration + ec$duration) / (et$duration + er$duration + ep$duration + es$duration + ec$duration)
# Add new rows for the parallel efficiency
ndata <- subset(sdata, event == "task_time")
ndata$event <- "parallel_time"
sdata <- rbind(sdata, ndata)
# Compute parallel efficiency
e <- subset(sdata, event == "parallel_time")
e$efficiency <- et$efficiency * er$efficiency * ep$efficiency * es$efficiency * ec$efficiency
# Merge all efficiencies
sdata <- rbind(et, er, ep, es, ec, e)
return (sdata)
}
gen_efficiency <- function(data_init, algo_wanted, model_wanted)
{
data <- subset(data_init, algo == algo_wanted && model == model_wanted)
if(nrow(data))
{
sdata <- NULL
all_nparts <- unique(data$npart)
for (i in 1:length(all_nparts))
{
if(i == 1)
{
sdata <- compute_efficiency(data, all_nparts[i])
}
else
{
sdata <- rbind(sdata, compute_efficiency(data, all_nparts[i]))
}
}
output <- paste(algo_wanted, "-", model_wanted, "-efficiencies.pdf", sep="")
gen_efficiencies_plot(output, sdata)
}
}
gen_efficiencies <- function(dbfile)
{
data_init <- get_data_subset(dbfile, 0L, 0L, "False")
data <- melt(data_init, id=c("model", "algo", "nnode", "nthreads", "npart","height","bsize"))
data$nthreads <- NULL
data <- rename(data, c("variable"="event", "value"="duration"))
all_algorithm <- unique(data$algo)
all_model <- unique(data$model)
for (i in 1:length(all_algorithm))
{
for (j in 1:length(all_model))
{
gen_efficiency(data, all_algorithm[i], all_model[j])
}
}
}
library(plyr)
library(ggplot2)
library(scales)
calc_speedup <- function(data, ref_algo)
{
# XXX: probably suboptimal
data_ref <- subset(data, algo == ref_algo)
for (i in 1:nrow(data)) {
tmp_ref <- subset(data_ref, npart == data$npart[i] & height == data$height[i] & nnode == data$nnode[i])
#tmp_ref <- subset(tmp_ref, nthreads == data$nthreads[i])
data$speedup[i] <- tmp_ref$global_time / data$global_time[i]
}
return (data)
}
gen_speedup_taskdep_plot <- function(d, model_wanted)
{
d <- subset(d, model == model_wanted)
d <- calc_speedup(d, "explicit")
g <- ggplot(data=d,aes_string(x="nnode", y="speedup", color="algo"))
g <- g + geom_line()
g <- g + facet_wrap(npart ~ height, scales="free",
labeller = labeller(npart = as_labeller(npart_labeller),
height = as_labeller(height_labeller),
.default=label_both,
.multi_line=FALSE))
# Set our own colors, linetypes and point shapes.
g <- g + scale_color_manual(name="Algorithm",
breaks=get_breaks_runtime(),
labels=get_labels_runtime(),
values=get_colors_runtime())
# Set X/Y labels.
g <- g + xlab("Number of nodes")
g <- g + ylab("Speedup")
# Set y-axis range
#g <- g + ylim(ylimits)
# Save generated plot.
output <- paste(model_wanted, "-speedup.pdf", sep="")
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
}
gen_speedup <- function(dbfile)
{
data <- get_data_subset(dbfile, 0L, 0L, "False")
#output <- paste(output_dir, node, "-1M-7-cube-speedup.pdf", sep="")
all_model <- unique(data$model)
for (i in 1:length(all_model))
{
gen_speedup_taskdep_plot(data, all_model[i])
}
}
library(plyr)
library(reshape)
library(ggplot2)
gen_times_taskdep_plot <- function(data, algo_wanted, model_wanted)
{
# Sort data to have task, runtime and idle.
subdata <- subset(data, model == model_wanted && algo == algo_wanted)
subdata$rmem <- NULL
subdata$global_time <- NULL
subdata <- melt(subdata, id=c("model", "algo", "nnode", "nthreads", "npart","height","bsize"))
subdata <- rename(subdata, c("variable"="event", "value"="duration"))
#subdata <- subdata[order(subdata$event, decreasing = TRUE),]
g <- ggplot(data=subdata, aes(x=nnode, y=duration, fill=event))
g <- g + geom_bar(stat="identity", position="fill")
#Pour le titre
g <- g + facet_wrap(npart ~ height, scales="free",
labeller = labeller(npart = as_labeller(npart_labeller),
height = as_labeller(height_labeller),
.default=label_both,
.multi_line=FALSE))
# Set colors.
breaks <- c('idle_time', 'communication_time', 'runtime_time', 'scheduling_time', 'task_time')
labels <- c('Idle', 'Communication', 'Runtime', 'Scheduling', 'Task')
colors <- c(
'task_time' = "#619dff",
'runtime_time' = "#01ba38",
'idle_time' = "#f9766e",
'scheduling_time' = "#02bb37",
'communication_time' = "#9D2FEA"
)
g <- g + scale_fill_manual(name="Time", breaks=breaks,
labels=labels, values=colors)
# Set title and X/Y labels.
g <- g + xlab("Number of nodes")
g <- g + ylab("% of time")
output <- paste(algo_wanted, "-", model_wanted, "-times.pdf", sep="")
# Save generated plot.
ggsave(output, g, width=29.7, height=21, units=c("cm"), device=cairo_pdf)
print(output)
}
#Use this function to normalize
compute_timings <- function(data, n, h, m)
{
# Select data
sdata <- subset(data, npart == n & height == h & model == m)
# Select Tt(1)
tt_1 <- subset(sdata, event == "task_time" & nthreads == 1)
# Compute task efficiency
tt_p <- subset(sdata, event == "task_time")
tt_p$event <- "task"
tt_p$efficiency <- tt_p$duration / tt_1$duration
# Compute scheduling efficiency
ts_p <- subset(sdata, event == "scheduling_time")
ts_p$event <- "scheduling"
ts_p$efficiency <- ts_p$duration / tt_1$duration
# Compute runtime efficiency
tr_p <- subset(sdata, event == "runtime_time")
tr_p$event <- "runtime"
tr_p$efficiency <- tr_p$duration / tt_1$duration
# Compute pipeline efficiency
ti_p <- subset(sdata, event == "idle_time")
ti_p$event <- "idle"
ti_p$efficiency <- ti_p$duration / tt_1$duration
# Merge all efficiencies
sdata <- rbind(tt_p, ts_p, tr_p, ti_p)
return (sdata)
}
gen_times_taskdep <- function(dbfile)
{
# Cube (volume)
data <- get_data_subset(dbfile, 0L, 0L, "False")
all_algorithm <- unique(data$algo)
all_model <- unique(data$model)
for (i in 1:length(all_algorithm))
{
for (j in 1:length(all_model))
{
gen_times_taskdep_plot(data, all_algorithm[i], all_model[j])
}
}
# Ellipsoid (Surface)
#data <- get_data_subset(dbfile, 0L, 0L, "True")
#data <- subset(data, name == compiler)
#output <- paste("times_taskdep/", machine, "_", compiler, "_ellipsoid.pdf", sep="")
#gen_times_taskdep_plot(output, data)
}
source("common.R")
source("gen_times_taskdep.R")
source("gen_efficiencies_taskdep.R")
source("gen_speedup_plots.R")
###
# Generate display of bars with the time spent in Task, Runtime and Idle.
###
gen_times_taskdep("loutre.db")
gen_efficiencies("loutre.db")
gen_speedup("loutre.db")
#!/usr/bin/python
import getopt
import sys
import math
import copy
import os
import socket
import subprocess
import re
import types
class ScalFMMConfig(object):
num_threads = 1
num_nodes = 1
algorithm = "implicit"
model = "cube"
num_particules = 10000
height = 4
bloc_size = 100
order = 5
def show(self):
print ("=== Simulation parameters ===")
print ("Number of nodes: " + str(self.num_nodes))
print ("Number of threads: " + str(self.num_threads))
print ("Model: " + str(self.model))
print ("Number of particules: " + str(self.num_particules))
print ("Height: " + str(self.height))
print ("Bloc size: " + str(self.bloc_size))
print ("Order: " + str(self.order))
def gen_header(self):
columns = [
"model",
"algo",
"nnode",
"nthreads",
"npart",
"height",
"bsize",
"global_time",
"runtime_time",
"task_time",
"idle_time",
"scheduling_time",
"communication_time",
"rmem",
]
header = ""
for i in range(len(columns)):
if not i == 0:
header += ","
header += "\"" + columns[i] + "\""
header += "\n"
return header
def gen_record(self, global_time, runtime_time, task_time, idle_time, scheduling_time, rmem):
columns = [
self.model,
self.algorithm,
self.num_nodes,
self.num_threads,
self.num_particules,
self.height,
self.bloc_size,
global_time,
runtime_time,
task_time,
idle_time,
scheduling_time,
0.0,
rmem,
]
record = ""
for i in range(len(columns)):
if not i == 0:
record += ","
if (type(columns[i]) is bool or
type(columns[i]) == str):
record += "\""
record += str(columns[i])
if (type(columns[i]) == bool or
type(columns[i]) == str):
record += "\""
record += "\n"
return record
def get_times_from_trace_file(filename):
cmd = "starpu_trace_state_stats.py " + filename
proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
if not proc.returncode == 0:
sys.exit("FATAL: Failed to parse trace.rec!")
return proc.returncode
task_time = 0.0
idle_time = 0.0
runtime_time = 0.0
scheduling_time = 0.0
for line in stdout.decode().splitlines():
arr = line.replace("\"", "").split(",")
if arr[0] == "Name":
continue
if len(arr) >= 4:
if arr[2] == "Runtime":
if arr[0] == "Scheduling":
scheduling_time = float(arr[3])
else:
runtime_time = float(arr[3])
elif arr[2] == "Task":
task_time += float(arr[3])
elif arr[2] == "Other":
idle_time = float(arr[3])
# sys.exit("Invalid time!")
return runtime_time, task_time, idle_time, scheduling_time
def main():
output_trace_file=""
trace_filename="trace.rec"
output_filename="loutre.db"
long_opts = ["help",
"trace-file=",
"output-trace-file=",
"output-file="]
opts, args = getopt.getopt(sys.argv[1:], "ht:i:o:", long_opts)
for o, a in opts:
if o in ("-h", "--help"):
# usage()
print("No help")
sys.exit()
elif o in ("-t", "--trace-file"):
trace_filename = str(a)
elif o in ("-i", "--output-trace-file"):
output_trace_file = str(a)
elif o in ("-o", "--output-file"):
output_filename = str(a)
else:
assert False, "unhandled option"
config=ScalFMMConfig()
rmem = 0
global_time = 0.0
runtime_time = 0.0
task_time = 0.0
idle_time = 0.0
scheduling_time = 0.0
if (os.path.isfile(output_filename)): #Time in milli
output_file = open(output_filename, "a")
else:
output_file = open(output_filename, "w")
output_file.write(config.gen_header())
with open(output_trace_file, "r") as ins:
for line in ins:
if re.search("Average", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
global_time = a[0]
elif re.search("Total Particles", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_particules = int(a[0])
elif re.search("Total Particles", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_particules = int(a[0])
elif re.search("Group size", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.bloc_size = int(a[0])
elif re.search("Nb node", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_nodes = int(a[0])
elif re.search("Tree height", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.height = int(a[0])
elif re.search("Nb thread", line):
a = re.findall("[-+]?\d*\.\d+|\d+", line)
if len(a) == 1:
config.num_threads = int(a[0])
elif re.search("Model", line):
config.model = line[line.index(":")+1:].strip()
elif re.search("Algorithm", line):
config.algorithm = line[line.index(":")+1:].strip()
if (os.path.isfile(trace_filename)): #Time in milli
runtime_time, task_time, idle_time, scheduling_time = get_times_from_trace_file(trace_filename)
else:
print("File doesn't exist " + trace_filename)
# Write a record to the output file.
output_file.write(config.gen_record(float(global_time),
float(runtime_time),
float(task_time),
float(idle_time),
float(scheduling_time),
int(rmem)))
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment