Commit 8c8f5256 authored by SINDT--BARET Yanis's avatar SINDT--BARET Yanis
Browse files

Add graphic script

parent 4728e1ee
setwd("/home/yanissb/CLionProjects/sswm_prototype/examples/sswm_project5_2")
library(ggplot2)
library(dplyr)
library(ggnewscale)
library(glue)
library(scales)
library(matrixStats)
library(cowplot)
select_path <- function(index, id_prob) {
path = glue("sswm_evol_result_5000000result_{id_prob[1]}_{index}.json_{id_prob[2]}00000_y_{index}_.csv")
return(path)
}
interprate_data <- function(path) {
data = read.csv(path,header = T, sep = ",")
data$mutation = recode_factor(as.factor(data$mutation), "0" = "Switch",
"6" = "Inversion")
temp = data[length(data$nb_gen),]
concentrate_data_prot = adaptive_distinct(data)
concentrate_data_fitness = concentrate_data_prot%>%distinct(non_mutant, .keep_all = TRUE)
concentrate_data_prot = rbind(concentrate_data_prot,temp)
concentrate_data_fitness = rbind(concentrate_data_fitness,temp)
return(c(c(list(concentrate_data_prot),list(concentrate_data_fitness)),list(data)))
}
do_nice_plot <- function(data_list_prot,data_list_fitness,index) {
df1 = as.data.frame(data_list_fitness[[1]])
df2 = as.data.frame(data_list_fitness[[2]])
df3 = as.data.frame(data_list_fitness[[3]])
df4 = as.data.frame(data_list_fitness[[4]])
fig1 = ggplot(df1, aes(x = nb_gen,y = non_mutant)) +
geom_point(data = df1, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
geom_point(data = df2, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
geom_point(data = df3, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
geom_point(data = df4, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
scale_color_manual(values = c("Switch" = "Blue", "Inversion"="red"), name = "Type of mutation") +
scale_size_manual(values = c("Inversion" = 2, "Switch" = 2, "one" = 1), guide = "none") +
scale_shape_manual(values = c("Inversion" = 1, "Switch" = 1), guide = "none") +
labs(y = "Fitness", x = "Generation", title = glue("Hikes{index}")) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)+
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)+ annotation_logticks(base = 10, sides = "b") + theme_bw() +
# annotation_logticks(base = 10, sides = "l") +
new_scale_color() +
geom_step(data = df1, linetype = "solid", aes(color = "1")) +
geom_step(data = df2, linetype = "solid", aes(color = "0.5")) +
geom_step(data = df3, linetype = "solid", aes(color = "0.1")) +
geom_step(data = df4, linetype = "solid", aes(color = "0")) +
scale_color_brewer(palette = "Dark2", name = "Probability of inversion")
df1 = as.data.frame(data_list_prot[[1]])
df2 = as.data.frame(data_list_prot[[2]])
df3 = as.data.frame(data_list_prot[[3]])
df4 = as.data.frame(data_list_prot[[4]])
fig2 = ggplot(df1, aes(x = nb_gen, y = nb_coding_arn)) +
geom_step(data = df1, aes(color = "1")) +
geom_step(data = df2, aes(color = "0.5")) +
geom_step(data = df3, aes(color = "0.1")) +
geom_step(data = df4, aes(color = "0")) +
scale_color_brewer(palette = "Dark2", name = "Probability of inversion") +
labs(y = "Number of proteins", x = "Generation", title = glue("Hikes{index}")) +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)+ annotation_logticks(base = 10, sides = "b") + theme_bw()
final_fig = ggdraw() + draw_plot(fig1,0,0,1,0.5) + draw_plot(fig2,0,0.5,1,0.5)
return(final_fig)
}
create_df_all_hikes <- function(list_list_df,nb_files,param) {
list_of_mean_df = list()
for (prob_id in 1:4) {
for (i in 1:nb_files) {
if (i == 1) {
temp_data = as.data.frame(list_list_df[[i]][[prob_id]])
main_data = select(temp_data,c("nb_gen", param))
list_of_mean_df[[prob_id]] = list(main_data)
}
else {
temp_data = as.data.frame(list_list_df[[i]][[prob_id]])
main_data = select(temp_data,c(param))
names(main_data) = glue("hikes{i}")
list_of_mean_df[[prob_id]] = cbind(list_of_mean_df[[prob_id]], main_data)
}
}
}
return(list_of_mean_df)
}
do_nice_mean_plot <- function(data_list, y_name) {
df1 = as.data.frame(data_list[[1]])
df2 = as.data.frame(data_list[[2]])
df3 = as.data.frame(data_list[[3]])
df4 = as.data.frame(data_list[[4]])
f_plot = ggplot(df1, aes(x = nb_gen,y = mean_val)) +
new_scale_color() +
geom_step(data = df1, linetype = "solid", aes(color = "1")) +
geom_step(data = df2, linetype = "solid", aes(color = "0.5")) +
geom_step(data = df3, linetype = "solid", aes(color = "0.1")) +
geom_step(data = df4, linetype = "solid", aes(color = "0")) +
scale_color_brewer(palette = "Dark2", name = "Probability of inversion") +
labs(y = y_name, x = "Generation") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)+ annotation_logticks(base = 10, sides = "b")
if (y_name == "Median of fitness") {
f_plot = f_plot + scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
}
return (f_plot + theme(
legend.position = c(1,0.2),
legend.justification = c(1, 0),
legend.background = element_rect(fill = "white", colour = "black")))
}
adaptive_distinct <- function(df1) {
df2 = df1
df2$nb_gen = df1$nb_gen + 1
df2 = cbind(df2, is_ok = "pasok")
df3 = merge(df1,df2, by = c("nb_gen","non_mutant", "nb_coding_arn"), all.x = TRUE)
remove(df1)
remove(df2)
df4 = subset(df3, is.na(df3$is_ok == TRUE))
remove(df3)
df4 = select(df4,-c(mutant.y,mutation.y,is_ok))
names(df4)=c("nb_gen","non_mutant","nb_coding_arn","mutant","mutation")
return(df4)
}
adaptive_median_distinct <- function(df1) {
df2 = df1
df2$nb_gen = df1$nb_gen + 1
df2 = cbind(df2, is_ok = "pasok")
df3 = merge(df1,df2, by = c("nb_gen","mean_val"), all.x = TRUE)
remove(df1)
remove(df2)
df4 = subset(df3, is.na(df3$is_ok == TRUE))
remove(df3)
df4 = select(df4,-c(is_ok))
return(df4)
}
nb_files = 30
id_prob = list(c("100","1.0"),c("050","0.5"),c("010","0.1"),c("000","0.0"))
list_of_list_of_big_df = list()
for (i in 1:nb_files) {
list_of_big_df = list()
list_of_df_prot = list()
list_of_df_fitness = list()
for (j in id_prob) {
path = select_path(i,j)
dfs = interprate_data(path)
list_of_df_prot = append(list_of_df_prot,list(dfs[[1]]))
list_of_df_fitness = append(list_of_df_fitness,list(dfs[[2]]))
list_of_big_df = append(list_of_big_df,list(dfs[3]))
remove(dfs)
}
nice_plot = do_nice_plot(list_of_df_prot,list_of_df_fitness,i)
ggsave(glue("image/Hikes{i}.png"),nice_plot, width = 29.7, height = 21, units = "cm")
print(glue("{i},{j}"))
remove(list_of_df_prot)
remove(list_of_df_fitness)
remove(nice_plot)
# print(a)
list_of_list_of_big_df = append(list_of_list_of_big_df,list(list_of_big_df))
remove(list_of_big_df)
gc()
}
#
list_of_mean_df = create_df_all_hikes(list_of_list_of_big_df,nb_files,param = "non_mutant")
new_df = list()
for (i in 1:length(list_of_mean_df)) {
data = list_of_mean_df[[i]]
temp_big_data = data.frame(nb_gen = data$nb_gen, mean_val = rowMedians(as.matrix(data[,])))
remove(data)
temp = temp_big_data[length(temp_big_data$nb_gen),]
concentrate_data = temp_big_data%>%distinct(mean_val, .keep_all = TRUE)
remove(temp_big_data)
concentrate_data = rbind(concentrate_data,temp)
concentrate_data$nb_gen[concentrate_data$nb_gen==0] = 1
remove(temp)
new_df[[i]] = concentrate_data
remove(concentrate_data)
}
mean_plot = do_nice_mean_plot(new_df,"Median of fitness")
remove(new_df)
gc()
list_of_mean_df_prot = create_df_all_hikes(list_of_list_of_big_df,nb_files,param = "nb_coding_arn")
new_df = list()
for (i in 1:length(list_of_mean_df_prot)) {
data = list_of_mean_df_prot[[i]]
temp_big_data = data.frame(nb_gen = data$nb_gen, mean_val = rowMedians(as.matrix(data[,])))
remove(data)
temp = temp_big_data[length(temp_big_data$nb_gen),]
concentrate_data = adaptive_median_distinct(temp_big_data)
remove(temp_big_data)
concentrate_data = rbind(concentrate_data,temp)
concentrate_data$nb_gen[concentrate_data$nb_gen==0] = 1
remove(temp)
new_df[[i]] = concentrate_data
remove(concentrate_data)
}
mean_plot_prot = do_nice_mean_plot(new_df,"Number of proteins")
remove(new_df)
final_med_fig = ggdraw() + draw_plot(mean_plot,0,0,1,0.5) + draw_plot(mean_plot_prot,0,0.5,1,0.5)
ggsave("image/medianHikes.png",final_med_fig, width = 29.7, height = 21, units = "cm")
save(list_of_list_of_big_df, file = "big_list")
setwd("/home/yanissb/CLionProjects/sswm_prototype/examples/sswm_project6")
library(ggplot2)
library(dplyr)
library(ggnewscale)
library(glue)
library(scales)
library(matrixStats)
library(cowplot)
library(RColorBrewer)
select_path <- function(index, id_prob) {
path = glue("sswm_evol_result_5000000result_{id_prob[1]}_{index}.json_{id_prob[2]}00000_y_{index}_.csv")
return(path)
}
interprate_data <- function(path) {
data = read.csv(path,header = T, sep = ",")
data$mutation = recode_factor(as.factor(data$mutation), "0" = "Switch",
"6" = "Inversion")
return(list(data))
}
create_df_all_hikes <- function(list_list_df,nb_files,param) {
list_of_mean_df = list()
for (prob_id in 1:length(id_prob)) {
for (i in 1:nb_files) {
if (i == 1) {
print(prob_id)
temp_data = as.data.frame(list_list_df[[i]][[prob_id]])
main_data = select(temp_data,c("nb_gen", param))
list_of_mean_df[[prob_id]] = list(main_data)
remove(temp_data,main_data)
}
else {
temp_data = as.data.frame(list_list_df[[i]][[prob_id]])
main_data = select(temp_data,c(param))
names(main_data) = glue("hikes{i}")
list_of_mean_df[[prob_id]] = cbind(list_of_mean_df[[prob_id]], main_data)
remove(temp_data,main_data)
}
}
}
return(list_of_mean_df)
}
do_nice_mean_plot <- function(data_list, y_name) {
df1 = as.data.frame(data_list[[1]])
df2 = as.data.frame(data_list[[2]])
df3 = as.data.frame(data_list[[3]])
df4 = as.data.frame(data_list[[4]])
f_plot = ggplot(df1, aes(x = nb_gen,y = mean_val)) +
new_scale_color() +
geom_step(data = df1, linetype = "solid", aes(color = "1")) +
geom_step(data = df2, linetype = "solid", aes(color = "0.5")) +
geom_step(data = df3, linetype = "solid", aes(color = "0.1")) +
geom_step(data = df4, linetype = "solid", aes(color = "0")) +
scale_color_brewer(palette = "Dark2", name = "Probability of inversion") +
new_scale_color() +
geom_ribbon(data = df1,aes(ymin= first_quantile,ymax = third_quantile, fill = "1", color = "black"), show.legend = F) +
geom_ribbon(data = df2,aes(ymin= first_quantile,ymax = third_quantile, fill = "0.5", color = "black"), show.legend = F) +
geom_ribbon(data = df3,aes(ymin= first_quantile,ymax = third_quantile, fill = "0.1", color = "black"), show.legend = F) +
geom_ribbon(data = df4,aes(ymin= first_quantile,ymax = third_quantile, fill = "0", color = "black"), show.legend = F) +
scale_color_brewer(palette = "Dark2", name = "Probability of inversion") +
scale_color_manual(values = alpha(c("black" = "black"), .4), guide = "none") +
scale_fill_manual(values = alpha(brewer.pal(n=4, name = "Dark2"), .3), guide = "none") +
scale_alpha(guide = "none") +
labs(y = y_name, x = "Generation") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)+ annotation_logticks(base = 10, sides = "b")
if (y_name == "Median of fitness") {
f_plot = f_plot + scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
)
}
return (f_plot + theme(
legend.position = c(0.2,0.6),
legend.justification = c(1, 0),
legend.background = element_rect(fill = "white", colour = "black")))
}
## -- Need some change to perform again
# do_nice_plot <- function(data_list_prot,data_list_fitness,index) {
# df1 = as.data.frame(data_list_fitness[[1]])
# df2 = as.data.frame(data_list_fitness[[2]])
# df3 = as.data.frame(data_list_fitness[[3]])
# df4 = as.data.frame(data_list_fitness[[4]])
# fig1 = ggplot(df1, aes(x = nb_gen,y = non_mutant)) +
# geom_point(data = df1, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
# geom_point(data = df2, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
# geom_point(data = df3, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
# geom_point(data = df4, aes(color = as.factor(mutation), shape = as.factor(mutation),size = as.factor(mutation))) +
# scale_color_manual(values = c("Switch" = "Blue", "Inversion"="red"), name = "Type of mutation") +
# scale_size_manual(values = c("Inversion" = 2, "Switch" = 2, "one" = 1), guide = "none") +
# scale_shape_manual(values = c("Inversion" = 1, "Switch" = 1), guide = "none") +
# labs(y = "Fitness", x = "Generation", title = glue("Hikes{index}")) +
# scale_y_log10(
# breaks = scales::trans_breaks("log10", function(x) 10^x),
# labels = scales::trans_format("log10", scales::math_format(10^.x))
# )+
# scale_x_log10(
# breaks = scales::trans_breaks("log10", function(x) 10^x),
# labels = scales::trans_format("log10", scales::math_format(10^.x))
# )+ annotation_logticks(base = 10, sides = "b") + theme_bw() +
# # annotation_logticks(base = 10, sides = "l") +
# new_scale_color() +
# geom_step(data = df1, linetype = "solid", aes(color = "1")) +
# geom_step(data = df2, linetype = "solid", aes(color = "0.5")) +
# geom_step(data = df3, linetype = "solid", aes(color = "0.1")) +
# geom_step(data = df4, linetype = "solid", aes(color = "0")) +
# scale_color_brewer(palette = "Dark2", name = "Probability of inversion")
# df1 = as.data.frame(data_list_prot[[1]])
# df2 = as.data.frame(data_list_prot[[2]])
# df3 = as.data.frame(data_list_prot[[3]])
# df4 = as.data.frame(data_list_prot[[4]])
# fig2 = ggplot(df1, aes(x = nb_gen, y = nb_coding_arn)) +
# geom_step(data = df1, aes(color = "1")) +
# geom_step(data = df2, aes(color = "0.5")) +
# geom_step(data = df3, aes(color = "0.1")) +
# geom_step(data = df4, aes(color = "0")) +
# scale_color_brewer(palette = "Dark2", name = "Probability of inversion") +
# labs(y = "Number of proteins", x = "Generation", title = glue("Hikes{index}")) +
# scale_x_log10(
# breaks = scales::trans_breaks("log10", function(x) 10^x),
# labels = scales::trans_format("log10", scales::math_format(10^.x))
# )+ annotation_logticks(base = 10, sides = "b") + theme_bw()
# final_fig = ggdraw() + draw_plot(fig1,0,0,1,0.5) + draw_plot(fig2,0,0.5,1,0.5)
#
# return(final_fig)
# }
adaptive_median_distinct <- function(df1) {
df2 = df1
df2$nb_gen = df1$nb_gen + 1
df2 = cbind(df2, is_ok = "pasok")
df3 = merge(df1,df2, by = c("nb_gen","mean_val"), all.x = TRUE)
remove(df1)
remove(df2)
df4 = subset(df3, is.na(df3$is_ok == TRUE))
remove(df3)
df4 = select(df4,-c(is_ok))
return(df4)
}
load_all_data <- function(nb_files, id_prob, final_list) {
for (i in 1:nb_files) {
list_of_big_df = list()
for (j in id_prob) {
path = select_path(i,j)
data = read.csv(path,header = T, sep = ",")
list_of_big_df = append(list_of_big_df,list(data))
remove(path,data)
}
final_list = append(final_list,list(list_of_big_df))
remove(list_of_big_df)
gc()
print(i)
}
return(final_list)
}
# en supsens
load_compact_mean_data <- function(big_big_list, id_prob, final_list) {
for (i in 1:nb_files) {
list_of_big_df = list
for (j in id_prob) {
path = select_path(i,j)
data = read.csv(path,header = T, sep = ",")
temp_conc_data = data[which(diff(data$nb_prot_kept)==0),]
list_of_big_df = append(list_of_big_df,list(temp_conc_data))
remove(data,temp_conc_data)
}
final_list = append(final_list,list(list_of_big_df))
remove(list_of_big_df)
gc()
print(i)
}
return(final_list)
}
#which better mutation
prot_from_mutation <- function(df) {
diff_vec = diff(df$nb_prot_kept)
}
nb_files = 30
id_prob = list(c("100","1.0"),c("050","0.5"),c("010","0.1"),c("000","0.0"))
list_of_list_of_big_df = list()
list_of_list_of_big_df = load_all_data(nb_files,id_prob,list_of_list_of_big_df)
list_of_mean_df = create_df_all_hikes(list_of_list_of_big_df,nb_files,param = "fitness_kept")
new_df = list()
for (i in 1:length(list_of_mean_df)) {
data = list_of_mean_df[[i]]
temp_big_data = data.frame(nb_gen = data$nb_gen, mean_val = rowMedians(as.matrix(data[,])),
first_quantile = rowQuantiles(as.matrix(data[,]),probs = 0.25),
third_quantile = rowQuantiles(as.matrix(data[,]),probs = 0.75))
remove(data)
temp = temp_big_data[length(temp_big_data$nb_gen),]
concentrate_data = temp_big_data%>%distinct(mean_val, .keep_all = TRUE)
remove(temp_big_data)
concentrate_data = rbind(concentrate_data,temp)
concentrate_data$nb_gen[concentrate_data$nb_gen==0] = 1
remove(temp)
new_df[[i]] = concentrate_data
remove(concentrate_data)
}
mean_plot = do_nice_mean_plot(new_df,"Median of fitness")
remove(new_df)
gc()
list_of_mean_df_prot = create_df_all_hikes(list_of_list_of_big_df,nb_files,param = "nb_prot_kept")
remove(list_of_list_of_big_df)
new_df = list()
for (i in 1:length(list_of_mean_df_prot)) {
data = list_of_mean_df_prot[[i]]
temp_big_data = data.frame(nb_gen = data$nb_gen, mean_val = rowMedians(as.matrix(data[,])),
first_quantile = rowQuantiles(as.matrix(data[,]),probs = 0.25),
third_quantile = rowQuantiles(as.matrix(data[,]),probs = 0.75))
remove(data)
temp = temp_big_data[length(temp_big_data$nb_gen),]
concentrate_data = temp_big_data[which(diff(temp_big_data$mean_val)==0),]
remove(temp_big_data)
concentrate_data = rbind(concentrate_data,temp)
concentrate_data$nb_gen[concentrate_data$nb_gen==0] = 1
remove(temp)
new_df[[i]] = concentrate_data
remove(concentrate_data)
}
mean_plot_prot = do_nice_mean_plot(new_df,"Number of proteins")
final_med_fig = ggdraw() + draw_plot(mean_plot,0,0,1,0.5) + draw_plot(mean_plot_prot,0,0.5,1,0.5)
ggsave("image/medianHikes.png",final_med_fig, width = 29.7, height = 21, units = "cm")
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment