From 29c39d94396d033dc346a3abe4fb9d440ad85165 Mon Sep 17 00:00:00 2001 From: Marco Foley <marco.foley@inria.fr> Date: Mon, 28 Sep 2020 14:23:29 +0200 Subject: [PATCH] import neutral mutations related post treatement --- .../aevol_post_change_size_neutral_mut.cpp | 142 ++++++++++ .../aevol_post_neutral_mut_acc.cpp | 127 +++++++++ .../aevol_post_robustness_bias.cpp | 139 ++++++++++ .../libaevol_post/Robustness_bias_output.cpp | 255 ++++++++++++++++++ .../libaevol_post/Robustness_bias_output.h | 52 ++++ .../libaevol_post/neutral_mutation_exp.cpp | 193 +++++++++++++ .../libaevol_post/neutral_mutation_exp.h | 27 ++ .../libaevol_post/neutral_mutation_output.cpp | 144 ++++++++++ .../libaevol_post/neutral_mutation_output.h | 25 ++ src/post_treatments/testJSON.cpp | 19 ++ 10 files changed, 1123 insertions(+) create mode 100644 src/post_treatments/aevol_post_change_size_neutral_mut.cpp create mode 100644 src/post_treatments/aevol_post_neutral_mut_acc.cpp create mode 100644 src/post_treatments/aevol_post_robustness_bias.cpp create mode 100644 src/post_treatments/libaevol_post/Robustness_bias_output.cpp create mode 100644 src/post_treatments/libaevol_post/Robustness_bias_output.h create mode 100644 src/post_treatments/libaevol_post/neutral_mutation_exp.cpp create mode 100644 src/post_treatments/libaevol_post/neutral_mutation_exp.h create mode 100644 src/post_treatments/libaevol_post/neutral_mutation_output.cpp create mode 100644 src/post_treatments/libaevol_post/neutral_mutation_output.h create mode 100644 src/post_treatments/testJSON.cpp diff --git a/src/post_treatments/aevol_post_change_size_neutral_mut.cpp b/src/post_treatments/aevol_post_change_size_neutral_mut.cpp new file mode 100644 index 000000000..0929437c7 --- /dev/null +++ b/src/post_treatments/aevol_post_change_size_neutral_mut.cpp @@ -0,0 +1,142 @@ +// +// Created by duazel on 16/04/2020. +// +#include "IOJson.h" +#include "Individual.h" +#include "neutral_mutation_exp.h" +#include "neutral_mutation_output.h" + +#include <iostream> +#include <cstdio> +#include <cstring> +#include <getopt.h> +#include <string> +#include <vector> + +bool verbose = false; +uint32_t seed_prng = 0; +std::string inputFile = "input.json"; +std::string outputFile = "output.json"; +unsigned int wanted_size = 0; +int delta = 0; + +void print_help(char *prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char *prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: Accumulate neutral mutations on an individual to change its size without\n" + "changing its phenotype.\n", prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf("\nOptions\n"); + printf(" -S, --Size\n\twanted size\n"); + printf(" -d, --delta\n\twanted modification of the size\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); +} + +void interpret_cmd_line_options(int argc, char **argv) { + const char *short_options = "hVv"; + static struct option long_options[] = {{"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"verbose", no_argument, nullptr, 'v'}, + {"Size", required_argument, nullptr, 'S'}, + {"delta", required_argument, nullptr, 'd'}, + {"seed", required_argument, nullptr, 's'}, + {"file", required_argument, nullptr, 'f'}, + {"output", required_argument, nullptr, 'o'}, + {0, 0, 0, 0}}; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, + nullptr)) != -1) { + switch (option) { + case 'h' : + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V' : + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'v': + verbose = true; + break; + case 'S': + wanted_size = atol(optarg); + break; + case 'd': + delta = atol(optarg); + break; + case 's': + seed_prng = atol(optarg); + break; + case 'f': + inputFile.assign(optarg); + break; + case 'o': + outputFile.assign(optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + + if (wanted_size == 0 && delta == 0) { + Utils::ExitWithUsrMsg("One of the parameters wanted_size or delta is needed"); + } else if (wanted_size != 0 && delta != 0) { + Utils::ExitWithUsrMsg("Only one of the parameters wanted_size or delta has to be defined.\n" + "Otherwise its ambiguous"); + } + if (seed_prng == 0) { + Utils::ExitWithUsrMsg("The parameter seed_prng is needed"); + } + +} + +int main(int argc, char ** argv) { + interpret_cmd_line_options(argc, argv); + + std::cout << argv[0] << " started with :" << std::endl; + if (wanted_size != 0) { + std::cout << " - wanted_size = " << wanted_size << std::endl; + } else { + std::cout << " - delta = " << delta << std::endl; + } + std::cout << " - seed = " << seed_prng << std::endl; + std::cout << " - input file = " << inputFile << std::endl; + std::cout << " - output file = " << outputFile << std::endl; + IOJson inputJson(inputFile); + + out::init("result.txt", "mutation.txt"); + + if (wanted_size == 0) { + wanted_size = inputJson.individuals()[0].amount_of_dna() + delta; + } + + Individual * indiv = run_to_size(wanted_size, inputJson.individuals()[0]); + std::vector<Individual> indiv_vector; + indiv_vector.emplace_back(*indiv); + + inputJson.individuals().clear(); + inputJson.setIndividuals(indiv_vector); + + inputJson.write(outputFile); + return 0; +} diff --git a/src/post_treatments/aevol_post_neutral_mut_acc.cpp b/src/post_treatments/aevol_post_neutral_mut_acc.cpp new file mode 100644 index 000000000..17871749a --- /dev/null +++ b/src/post_treatments/aevol_post_neutral_mut_acc.cpp @@ -0,0 +1,127 @@ +// +// Created by duazel on 16/04/2020. +// +#include "IOJson.h" +#include "Individual.h" +#include "neutral_mutation_exp.h" +#include "neutral_mutation_output.h" + +#include <iostream> +#include <cstdio> +#include <cstring> +#include <getopt.h> +#include <string> +#include <vector> + +bool verbose = false; +uint32_t seed_prng = 0; +std::string inputFile; +std::string outputFile; +unsigned int number_generation; + +void print_help(char *prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char *prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: Accumulate neutral mutations on an individual.\n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf("\nOptions\n"); + printf(" -n, --nb_generations\n\tnumber of generations\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); +} + +void interpret_cmd_line_options(int argc, char **argv) { + const char *short_options = "hVv"; + static struct option long_options[] = {{"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"verbose", no_argument, nullptr, 'v'}, + {"nb_generations", required_argument, nullptr, 'n'}, + {"seed", required_argument, nullptr, 's'}, + {"file", required_argument, nullptr, 'f'}, + {"output_file", required_argument, nullptr, 'o'}, + {0, 0, 0, 0}}; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, + nullptr)) != -1) { + switch (option) { + case 'h' : + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V' : + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'v': + verbose = true; + break; + case 'n': + number_generation = atol(optarg); + break; + case 's': + seed_prng = atol(optarg); + break; + case 'f': + inputFile.assign(optarg); + break; + case 'o': + outputFile.assign(optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + + if (number_generation == 0) { + Utils::ExitWithUsrMsg("The parameter number_generation is needed"); + } + if (seed_prng == 0) { + Utils::ExitWithUsrMsg("The parameter seed is needed"); + } + +} + +int main(int argc, char ** argv) { + inputFile = "input.json"; + outputFile = "output.json"; + interpret_cmd_line_options(argc, argv); + + std::cout << argv[0] << " started with :" << std::endl; + std::cout << " - number_generation = " << number_generation << std::endl; + std::cout << " - seed = " << seed_prng << std::endl; + std::cout << " - input file = " << inputFile << std::endl; + std::cout << " - output file = " << outputFile << std::endl; + IOJson inputJson(inputFile); + + out::init("result.txt", "mutation.txt"); + + run_generations(number_generation, inputJson.individuals()[0]); +// std::vector<Individual> indiv_vector; +// indiv_vector.emplace_back(*indiv); + +// inputJson.individuals().clear(); +// inputJson.setIndividuals(indiv_vector); + +// inputJson.write(outputFile); + return 0; +} diff --git a/src/post_treatments/aevol_post_robustness_bias.cpp b/src/post_treatments/aevol_post_robustness_bias.cpp new file mode 100644 index 000000000..591c102b1 --- /dev/null +++ b/src/post_treatments/aevol_post_robustness_bias.cpp @@ -0,0 +1,139 @@ +// +// Created by duazel on 27/04/2020. +// + +// +// Created by duazel on 16/04/2020. +// +#include "IOJson.h" +#include "Individual.h" +#include "neutral_mutation_exp.h" +#include "Robustness_bias_output.h" + +#include <iostream> +#include <cstdio> +#include <cstring> +#include <fstream> +#include <getopt.h> +#include <string> +#include <vector> + +unsigned int print_every = 0; +uint32_t seed_prng = 0; +std::string genomeFile; +unsigned int number_replications; + +void print_help(char *prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char *prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: Generate replications of an individual \n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf("\nOptions\n"); + printf(" -n, --nb_replications\n\tnumber of replications\n"); + printf(" -f, --file\n\tinput file\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); +} + +void interpret_cmd_line_options(int argc, char **argv) { + const char *short_options = "hVv"; + static struct option long_options[] = {{"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"print", required_argument, nullptr, 'p'}, + {"nb_replications", required_argument, nullptr, 'n'}, + {"seed", required_argument, nullptr, 's'}, + {"file", required_argument, nullptr, 'f'}, + {0, 0, 0, 0}}; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, + nullptr)) != -1) { + switch (option) { + case 'h' : + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V' : + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'p': + print_every = atol(optarg); + break; + case 'n': + number_replications = atol(optarg); + break; + case 's': + seed_prng = atol(optarg); + break; + case 'f': + genomeFile.assign(optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit0 + exit(EXIT_FAILURE); + } + } + + if (number_replications == 0) { + Utils::ExitWithUsrMsg("The parameter number_replications is needed"); + } + +} + +int main(int argc, char ** argv) { + seed_prng = 456465; + number_replications = 1000000; + genomeFile = "input.json"; + print_every = 10000; + std::ofstream summary_file("summary.txt"); + interpret_cmd_line_options(argc, argv); + + std::cout << argv[0] << " started with :" << std::endl; + std::cout << " - number_replications = " << number_replications << std::endl; + std::cout << " - seed = " << seed_prng << std::endl; + std::cout << " - file = " << genomeFile << std::endl; + + IOJson inputJson(genomeFile); + + Individual individual = inputJson.individuals().front(); + individual.clear_everything_except_dna_and_promoters(); + individual.compute_phenotype(); + Robustness_bias_output out(individual, "indiv.csv", "mutation.csv"); + + for (unsigned int id = 1; id <= number_replications; ++id) { + std::vector<MutationEvent *> mutations; + std::vector<bool> is_neutral; + Individual * child = new_child(&individual, mutations, is_neutral); + out.record_replication(id, *child, individual, mutations, is_neutral); + delete child; + if (id%print_every == 0){ + out.print_summary(std::cout); + out.print_summary(summary_file); + } else if (id%100 == 0) { + std::cout << id << std::endl; + } + } + out.print_summary(summary_file); + + return 0; +} diff --git a/src/post_treatments/libaevol_post/Robustness_bias_output.cpp b/src/post_treatments/libaevol_post/Robustness_bias_output.cpp new file mode 100644 index 000000000..662eea456 --- /dev/null +++ b/src/post_treatments/libaevol_post/Robustness_bias_output.cpp @@ -0,0 +1,255 @@ +// +// Created by duazel on 27/04/2020. +// + +#include "Robustness_bias_output.h" +#include <iostream> +#include <iomanip> + +Robustness_bias_output::Robustness_bias_output(const Individual & indiv, const char* filename_indiv, + const char* filename_mutation) + : file_indiv_(filename_indiv), file_mutations_(filename_mutation), + file_duplications_("duplications.csv"), individual_(indiv){ + file_indiv_ << "id,"; + file_indiv_ << "seq_length,"; + file_indiv_ << "has_mutate,"; + file_indiv_ << "is_neutral" << std::endl; + + file_mutations_ << "id,"; + file_mutations_ << "seq_length,"; + file_mutations_ << "has_mutate,"; + file_mutations_ << "is_neutral,"; + file_mutations_ << "type,"; + file_mutations_ << "impact_on_length,"; + file_mutations_ << "mutation_neutral" << ","; + file_mutations_ << "nb_non_coding_RNA" << std::endl; + + file_duplications_ << "id,"; + file_duplications_ << "is_neutral,"; + file_duplications_ << "nb_non_coding_RNA,"; + file_duplications_ << "nb_touched_genes," << ","; + file_duplications_ << "percent_touched" << std::endl; +} + +Robustness_bias_output::~Robustness_bias_output() { + file_mutations_.close(); + file_indiv_.close(); + file_duplications_.close(); +} + +int Robustness_bias_output::nb_touched_gene(const Individual & individual, int d1, int f1, int size) { + int count = 0; + + for (auto prot : individual.protein_list()) { + if (is_touched_gene(prot, d1, f1, size)) count++; + } + + return count; +} + +bool Robustness_bias_output::is_touched_gene(Protein *prot, int d1, int f1, int size) { + int d2,f2; + + if (prot->strand() == Strand::LEADING) { + d2 = prot->shine_dal_pos(); + f2 = prot->last_translated_pos(); + } else { + f2 = prot->shine_dal_pos(); + d2 = prot->last_translated_pos(); + } + + if (f1<d1) f1 += size; + if (d2<d1) d2 += size; + if (f2<d1) f2 += size; + + return !(f1 < d2 && d2 < f2); +} + +double Robustness_bias_output::mean_percent_touched(const Individual & individual, int d1, int f1, int size) { + double sum = 0; + int count = 0; + + for (auto prot : individual.protein_list()) { + int d2,f2; + if (prot->strand() == Strand::LEADING) { + d2 = prot->shine_dal_pos(); + f2 = prot->last_translated_pos(); + } else { + f2 = prot->shine_dal_pos(); + d2 = prot->last_translated_pos(); + } + double value = percent_touched(d1,f1,d2,f2,size); + if(value < -100 || value > 100) { + } + sum += value; + if (value>0) count++; + } + + return sum/count; +} + +double Robustness_bias_output::percent_touched(int d1, int f1, int d2, int f2, int size) { + if (f1<d1) f1 += size; + if (d2<d1) d2 += size; + if (f2<d1) f2 += size; + + if (f1<d2 && d2<f2) return 0; + if (f1<f2 && f2<d2) return 100.0*(f1-d1)/(f2+size-d2); + if (d2<f1 && f1<f2) return 100.0*(f1-d2)/(f2-d2); + if (d2<f2 && f2<f1) return 100; + if (f2<f1 && f1<d2) return 100.0*(f2-d1)/(f2+size-d2); + if (f2<d2 && d2<f1) return 100.0*(f2-d1+f1-d2)/(f2+size-d2); +} + +void Robustness_bias_output::add_duplication(int id, + bool is_neutral, + int16_t nb_non_coding_RNA, + int nb_touched_genes, + double percent_touched) { + file_duplications_ << id << ","; + file_duplications_ << is_neutral << ","; + file_duplications_ << nb_non_coding_RNA << ","; + file_duplications_ << nb_touched_genes << ","; + file_duplications_ << percent_touched << std::endl; +} + +void Robustness_bias_output::add_indiv(int id, + int seq_length, + bool has_mutate, + bool is_neutral) { + file_indiv_ << id << ","; + file_indiv_ << seq_length << ","; + file_indiv_ << has_mutate << ","; + file_indiv_ << is_neutral << std::endl; + + nb_replication++; + if (has_mutate) nb_mutant++; + if (is_neutral) nb_neutre++; +} + +void Robustness_bias_output::add_mutation(int id, + int seq_length, + bool has_mutate, + bool is_neutral, + int type, + int impact_on_length, + bool mutation_neutral, + int16_t nb_non_coding_RNA) { + std::string type_name[] = {"switch", "small_insertion", "small_deletion", "duplication", + "translocation", "inversion", "deletion"}; + nb_mutations++; + nb_mutations_type[type]++; + file_mutations_ << id << ","; + file_mutations_ << seq_length << ","; + file_mutations_ << has_mutate << ","; + file_mutations_ << is_neutral << ","; + file_mutations_ << type_name[type] << ","; + file_mutations_ << impact_on_length << ","; + file_mutations_ << mutation_neutral << ","; + file_mutations_ << nb_non_coding_RNA << std::endl; + + if(mutation_neutral) { + nb_neutral_mutations++; + sum_size_variation_neutral_mutation += impact_on_length; + nb_neutral_mutations_type[type]++; + } +} + + +int32_t impact_on_length(MutationEvent &mutationEvent, + unsigned int size) { + int32_t ret = 0; + if (mutationEvent.type() == MutationEventType::DELETION + || mutationEvent.type() == MutationEventType::DUPLICATION) { + if (mutationEvent.pos_2() > mutationEvent.pos_1()) { + ret = mutationEvent.pos_2() - mutationEvent.pos_1(); + } else { + ret = mutationEvent.pos_1() + size - mutationEvent.pos_2(); + } + } else { + if (mutationEvent.type() == MutationEventType::SMALL_INSERTION + || mutationEvent.type() == MutationEventType::SMALL_DELETION) { + ret = mutationEvent.number(); + } + } + if (mutationEvent.type() == MutationEventType::DELETION + || mutationEvent.type() == MutationEventType::SMALL_DELETION){ + return -ret; + } else { + return ret; + } +} + +void Robustness_bias_output::record_replication(unsigned int id, + Individual& child, Individual& parent, std::vector<MutationEvent*>& mutations, std::vector<bool>& mut_is_neutral) { + bool has_mutate; + bool is_neutral; + if(mutations.empty()){ + has_mutate = false; + is_neutral = true; + } else { + //std::cout << "Mutations Not Empty" << std::endl; + has_mutate = true; + child.clear_everything_except_dna_and_promoters(); + child.compute_phenotype(); + is_neutral = individual_.phenotype()->is_identical_to(*child.phenotype(),0); + } + + add_indiv(id, child.amount_of_dna(), has_mutate, is_neutral); + auto mut_neutral = mut_is_neutral.begin(); + + bool unneutral_mut = false; + for (auto mut = mutations.begin(); mut != mutations.end(); mut++, mut_neutral++) { + int length_mut = impact_on_length(**mut, child.amount_of_dna()); + child.compute_statistical_data(); + add_mutation(id, child.amount_of_dna(), has_mutate, is_neutral, (*mut)->type(), length_mut, *mut_neutral, child.nb_non_coding_RNAs()); + + if ((*mut)->type() == MutationEventType::DUPLICATION) { + int d1 = (*mut)->pos_1(); + int f1 = (*mut)->pos_2(); + int size = child.amount_of_dna(); + add_duplication(id,is_neutral, child.nb_non_coding_RNAs(), + nb_touched_gene(parent,d1,f1,size), + mean_percent_touched(parent,d1,f1,size)); + } + + if (!(*mut_neutral)) { + unneutral_mut = true; + } + } + if (mutations.size()>1) { + multiple_mutations++; + if (unneutral_mut && is_neutral) { + mult_unneutral_mut_into_neutral_replication++; + } + } +} + +void Robustness_bias_output::print_summary(std::ostream& os) { + std::string type_name[] = {"switch", "small_insertion", "small_deletion", "duplication", + "translocation", "inversion", "deletion"}; + os << "------------- Summary after " << nb_replication << " replications -------------\n"; + os << "nb neutral replications = " << nb_neutre << " | " << ((double) nb_neutre)/nb_replication*100 << "%\n"; + os << "nb mutants = " << nb_mutant << " | " << ((double) nb_mutant)/nb_replication*100 << "%\n"; + os << "\n"; + os << "+---------------+---------------+---------------+---------------+\n"; + os << "| mutation type | nb neutral | nb mutations | % neutral |\n"; + os << "+---------------+---------------+---------------+---------------+\n"; + for (int i=0; i<7; i++) { + os << "|" << std::setw(15) << type_name[i] << "|" << std::setw(15) << nb_neutral_mutations_type[i] << "|" << std::setw(15) << nb_mutations_type[i]; + os << "|" << std::setw(15) << std::setprecision(2) << ((double)nb_neutral_mutations_type[i])*100 / nb_mutations_type[i] << "|\n"; + os << "+---------------+---------------+---------------+---------------+\n"; + } + os << "|" << std::setw(15) << "Total" << "|" << std::setw(15) << nb_neutral_mutations << "|" << std::setw(15) << nb_mutations; + os << "|" << std::setw(15) << std::setprecision(2) << std::fixed << ((double)nb_neutral_mutations)*100 / nb_mutations << "|\n"; + os << "+---------------+---------------+---------------+---------------+\n"; + os << std::setw(1); + os << "nb mutations = " << nb_mutations << "\n"; + os << "nb neutral mutations = " << nb_neutral_mutations << " | " << ((double) nb_neutral_mutations)/nb_mutations*100 << "%\n"; + os << "mean size variation on neutral mutation = " << ((double) sum_size_variation_neutral_mutation)/nb_neutral_mutations << "\n"; + os << "mean size variation on neutral replication = " << ((double) sum_size_variation_neutral_mutation)/nb_neutre << "\n"; + os << "\n"; + os << "nb multiple mutations = " << multiple_mutations << "\n"; + os << "nb multiple unneutral mutations creating neutral replication = " << mult_unneutral_mut_into_neutral_replication <<"\n"; + os << "------------------------------------------------------------------------" << std::endl; +} diff --git a/src/post_treatments/libaevol_post/Robustness_bias_output.h b/src/post_treatments/libaevol_post/Robustness_bias_output.h new file mode 100644 index 000000000..1cc16c6b3 --- /dev/null +++ b/src/post_treatments/libaevol_post/Robustness_bias_output.h @@ -0,0 +1,52 @@ +// +// Created by duazel on 27/04/2020. +// + +#ifndef AEVOL_ROBUSTNESS_BIAS_OUTPUT_H +#define AEVOL_ROBUSTNESS_BIAS_OUTPUT_H + +#include "Individual.h" +#include "MutationEvent.h" +#include <fstream> + +using namespace aevol; + +class Robustness_bias_output { + public: + Robustness_bias_output(const Individual & indiv, const char * filename_indiv, const char * filename_mutation); + virtual ~Robustness_bias_output(); + + void add_indiv(int id, int seq_length, bool has_mutate, bool is_neutral); + void add_mutation(int id, int seq_length, bool has_mutate, bool is_neutral, int type, + int impact_on_length, bool mutation_neutral, int16_t nb_non_coding_RNA); + void add_duplication(int id, bool is_neutral, int16_t nb_non_coding_RNA, int nb_touched_genes, double percent_touched); + + void record_replication(unsigned int id, Individual& child, Individual& parent, std::vector<MutationEvent*>& mutations, std::vector<bool>& mut_is_neutral); + void print_summary(std::ostream & os); + + private: + std::ofstream file_indiv_; + std::ofstream file_mutations_; + std::ofstream file_duplications_; + Individual individual_; + + unsigned int nb_replication=0; + unsigned int nb_neutre=0; + unsigned int nb_mutant=0; + unsigned int nb_mutations=0; + unsigned int nb_neutral_mutations=0; + unsigned int nb_mutations_type[7]={0,0,0,0,0,0,0}; + unsigned int nb_neutral_mutations_type[7]={0,0,0,0,0,0,0}; + unsigned int sum_size_neutral_duplication=0; + unsigned int sum_size_neutral_deletion=0; + unsigned int multiple_mutations=0; + unsigned int mult_unneutral_mut_into_neutral_replication = 0; + long sum_size_variation_neutral_mutation=0; + double percent_touched(int d1, int f1, int d2, int f2, int size); + int nb_touched_gene(const Individual& individual, int d1, int f1, int size); + bool is_touched_gene(Protein* prot, int d1, int f1, int size); + double + mean_percent_touched(const Individual& individual, int d1, int f1, int size); +}; + +#endif //AEVOL_ROBUSTNESS_BIAS_OUTPUT_H diff --git a/src/post_treatments/libaevol_post/neutral_mutation_exp.cpp b/src/post_treatments/libaevol_post/neutral_mutation_exp.cpp new file mode 100644 index 000000000..0efd95292 --- /dev/null +++ b/src/post_treatments/libaevol_post/neutral_mutation_exp.cpp @@ -0,0 +1,193 @@ +// +// Created by duazel on 11/03/2020. +// + +#include "neutral_mutation_exp.h" + +#include "Dna.h" +#include "DnaMutator.h" +#include "ExpSetup.h" +#include "FuzzyFactory.h" +#include "Individual.h" +#include "JumpingMT.h" +#include "MutationEvent.h" +#include "MutationParams.h" +#include "neutral_mutation_output.h" + +#include <cmath> +#include <cstring> +#include <iostream> +#include <utility> +#include <vector> + +using namespace aevol; + +void perform_mutation(MutationEvent *mutEvent, Dna *dna) { + switch (mutEvent->type()) { + case MutationEventType::DELETION : + dna->do_deletion(mutEvent->pos_1(), mutEvent->pos_2()); + break; + + case MutationEventType::DO_SWITCH : + dna->do_switch(mutEvent->pos_1()); + break; + + case MutationEventType::DUPLICATION : + dna->do_duplication(mutEvent->pos_1(), mutEvent->pos_2(), + mutEvent->pos_3()); + break; + + case MutationEventType::INVERSION : + dna->do_inversion(mutEvent->pos_1(), mutEvent->pos_2()); + break; + + case MutationEventType::SMALL_DELETION : + dna->do_small_deletion(mutEvent->pos_1(), mutEvent->number()); + break; + + case MutationEventType::SMALL_INSERTION : + dna->do_small_insertion(mutEvent->pos_1(), mutEvent->number(), + mutEvent->seq()); + break; + + case MutationEventType::TRANSLOCATION : + dna->do_translocation(mutEvent->pos_1(), mutEvent->pos_2(), + mutEvent->pos_3(), mutEvent->pos_4(), + mutEvent->invert()); + } +} + +Individual *new_child(Individual *individual, std::vector<MutationEvent *> &mutations, + std::vector<int32_t> &lengths) { + auto child = new Individual(*individual); + DnaMutator dnaMutator(child, 0, 0); + dnaMutator.generate_mutations(); + MutationEvent *mutEvent; + mutEvent = dnaMutator.generate_next_mutation(child->amount_of_dna()); + Dna *dna = child->genetic_unit(0).dna(); + while (mutEvent) { + lengths.emplace_back(child->amount_of_dna()); + perform_mutation(mutEvent, dna); + mutations.emplace_back(new MutationEvent(*mutEvent)); + mutEvent = dnaMutator.generate_next_mutation(child->amount_of_dna()); + } + return child; +} + +Individual *new_child(Individual *individual, std::vector<MutationEvent *> &mutations, + std::vector<bool> &is_neutral) { + auto child = new Individual(*individual); + DnaMutator dnaMutator(child, 0, 0); + dnaMutator.generate_mutations(); + MutationEvent *mutEvent; + mutEvent = dnaMutator.generate_next_mutation(child->amount_of_dna()); + Dna *dna = child->genetic_unit(0).dna(); + while (mutEvent) { + perform_mutation(mutEvent, dna); + child->clear_everything_except_dna_and_promoters(); + child->compute_phenotype(); + is_neutral.emplace_back(individual->phenotype()->is_identical_to(*child->phenotype(),0)); + mutations.emplace_back(new MutationEvent(*mutEvent)); + mutEvent = dnaMutator.generate_next_mutation(child->amount_of_dna()); + } + return child; +} + +Individual *neutral_mutation(Individual *individual, unsigned int current_generation) { + Individual *child; + std::vector<MutationEvent *> mutations; + std::vector<int32_t> lengths; + while (true) { + child = new_child(individual, mutations, lengths); + if (mutations.empty()) { + delete child; + return individual; + } else { + child->clear_everything_except_dna_and_promoters(); + child->compute_phenotype(); + if (individual->phenotype()->is_identical_to(*child->phenotype(), 0)) { + break; + } else { + mutations.clear(); + delete child; + } + } + } + typedef std::pair<std::vector<MutationEvent *>::iterator, std::vector<int32_t>::iterator> pairIter; + for (pairIter p(mutations.begin(), lengths.begin()); p.first != mutations.end(); + p.first++, p.second++) { + out::record_mutation_event(**p.first, current_generation, *(p.second)); + } + delete individual; + return child; +} + +Individual *neutral_mutation_closer_to(Individual *individual, int32_t size_wanted, unsigned int current_generation) { + Individual *child; + std::vector<MutationEvent *> mutations; + std::vector<int32_t> lengths; + while (true) { + child = new_child(individual, mutations, lengths); + if (mutations.empty()) { + delete child; + return individual; + } else { + child->clear_everything_except_dna_and_promoters(); + child->compute_phenotype(); + if (individual->phenotype()->is_identical_to(*child->phenotype(), 0) && + std::abs(size_wanted - individual->amount_of_dna()) > + std::abs(size_wanted - child->amount_of_dna())) { + break; + } else { + mutations.clear(); + delete child; + } + } + } + typedef std::pair<std::vector<MutationEvent *>::iterator, std::vector<int32_t>::iterator> pairIter; + for (pairIter p(mutations.begin(), lengths.begin()); p.first != mutations.end(); + p.first++, p.second++) { + out::record_mutation_event(**p.first, current_generation, *(p.second)); + } + delete individual; + return child; +} + +Individual * run_to_size(int32_t wanted_size, const Individual & indiv) { + auto *individual = new Individual(indiv); + auto *expSetup = new ExpSetup(nullptr); + FuzzyFactory::fuzzyFactory = new FuzzyFactory(expSetup); + individual->clear_everything_except_dna_and_promoters(); + individual->compute_phenotype(); + unsigned int current_generation = 0; + while (individual->amount_of_dna() != wanted_size) { + if (current_generation % 1000 == 0) { + std::cout << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + } + out::result_file << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + individual = neutral_mutation_closer_to(individual, wanted_size, current_generation); + current_generation++; + } + out::result_file << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + std::cout << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + return individual; +} + +Individual * run_generations(unsigned int nb_generations, const Individual & indiv) { + auto *individual = new Individual(indiv); + auto *expSetup = new ExpSetup(nullptr); + FuzzyFactory::fuzzyFactory = new FuzzyFactory(expSetup); + individual->clear_everything_except_dna_and_promoters(); + individual->compute_phenotype(); + + for (unsigned int current_generation = 0; current_generation < nb_generations; ++current_generation) { + if (current_generation % 100 == 0) { + std::cout << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + } + out::result_file << current_generation << "," << individual->genetic_unit(0).dna()->length() << std::endl; + individual = neutral_mutation(individual, current_generation); + } + out::result_file << nb_generations << "," << individual->genetic_unit(0).dna()->length() << std::endl; + std::cout << nb_generations << "," << individual->genetic_unit(0).dna()->length() << std::endl; + return individual; +} \ No newline at end of file diff --git a/src/post_treatments/libaevol_post/neutral_mutation_exp.h b/src/post_treatments/libaevol_post/neutral_mutation_exp.h new file mode 100644 index 000000000..77bf3431f --- /dev/null +++ b/src/post_treatments/libaevol_post/neutral_mutation_exp.h @@ -0,0 +1,27 @@ +// +// Created by duazel on 11/03/2020. +// + +#ifndef AEVOL_NEUTRAL_MUTATION_EXP_H +#define AEVOL_NEUTRAL_MUTATION_EXP_H + +#include <string> + +#include "ExpManager.h" +#include "Individual.h" +#include "MutationParams.h" + +using namespace aevol; + +Individual *new_child(Individual *individual, + std::vector<MutationEvent *> &mutations, + std::vector<int32_t> &lengths); +Individual *new_child(Individual *individual, std::vector<MutationEvent *> &mutations, + std::vector<bool> &is_neutral); + +Individual *neutral_mutation(Individual *individual); + +Individual * run_generations(unsigned int nb_generations, const Individual & indiv); +Individual * run_to_size(int32_t wanted_size, const Individual & indiv); + +#endif //AEVOL_NEUTRAL_MUTATION_EXP_H diff --git a/src/post_treatments/libaevol_post/neutral_mutation_output.cpp b/src/post_treatments/libaevol_post/neutral_mutation_output.cpp new file mode 100644 index 000000000..38dcc3ffc --- /dev/null +++ b/src/post_treatments/libaevol_post/neutral_mutation_output.cpp @@ -0,0 +1,144 @@ +// +// Created by duazel on 12/03/2020. +// + +#include "neutral_mutation_output.h" + +unsigned int indent_level = 0; + +namespace out { + std::ofstream log; + std::ofstream result_file; + std::ofstream mutation_file; +} + +void out::init(const char *name_result, + const char *name_mutation) { + result_file.open(name_result); + mutation_file.open(name_mutation); + indent_level = 0; + + mutation_file << "# ##############################################################################\n"; + mutation_file << "# Mutations during the accumulations of neutral mutations\n"; + mutation_file << "# ##############################################################################\n"; + mutation_file << "# 1. Generation (mut. occurred when producing the indiv. of this\n" + "# generation)\n"; + mutation_file << "# 2. Genetic unit (which underwent the mutation, 0 = chromosome) \n"; + mutation_file << "# 3. Mutation type (0: switch, 1: smallins, 2: smalldel, 3:dupl, 4: del,\n" + "# 5:trans, 6:inv, 7:insert)\n"; + mutation_file << "# 4. pos_0 (position for the small events,\n" + "# begin_segment for the rearrangements\n"; + mutation_file << "# 5. pos_1 (-1 for the small events, \n" + "# end_segment for the rearrangements\n"; + mutation_file << "# 6. pos_2 (reinsertion point for duplic.,\n" + "# cutting point in segment for transloc.,\n" + "# -1 for other events)\n"; + mutation_file << "# 7. pos_3 (reinsertion point for transloc.,\n" + "# -1 for other events)\n"; + mutation_file << "# 8. invert (transloc, was the segment inverted (0/1)?)\n"; + mutation_file << "# 9. align_score (score that was needed for the rearrangement to occur,\n" + "# char score of the first alignment for ins_HT and repl_HT)\n"; + mutation_file << "# 10. align_score2 (score for the reinsertion for transloc,\n" + "# score of the second alignment for ins_HT and repl_HT)\n"; + mutation_file << "# 11. seg_len (segment length for rearrangement)\n"; + mutation_file << "# 12. repl_seg_len (replaced segment length for repl_HT,\n" + "# -1 for the others)\n"; + mutation_file << "# 13. GU_length (before the event)\n"; + mutation_file << "# 14. Impact of the mutation on the metabolic error\n" + "# (negative value = smaller gap after = beneficial mutation)\n"; + mutation_file << "# 14. Impact of the mutation on the fitness\n" + "# (negative value = smaller gap after = beneficial mutation)\n"; + mutation_file << "# 15. Number of coding RNAs possibly disrupted by the breakpoints\n"; + mutation_file << "# 16. Number of coding RNAs completely included in the segment\n" + "# (donor segment in the case of a transfer)\n"; + mutation_file << "# 17. Number of coding RNAs that were completely included in the replaced segment\n" + "# (meaningful only for repl_HT) \n"; + mutation_file << "################################################################################\n"; + mutation_file << "#\n"; + mutation_file << "# Header for R\n"; + mutation_file << "gener gen_unit mut_type pos_0 pos_1 pos_2 pos_3 invert align_score align_score_2 seg_len" + " repl_seg_len GU_len impact nbgenesatbreak nbgenesinseg nbgenesinreplseg" + << std::endl; +} + +void out::close() { + result_file.close(); + mutation_file.close(); +} + +int32_t mutation_event_pos_2(MutationEvent &mutationEvent) { + if (mutationEvent.type() == MutationEventType::DELETION + || mutationEvent.type() == MutationEventType::DUPLICATION + || mutationEvent.type() == MutationEventType::INVERSION + || mutationEvent.type() == MutationEventType::TRANSLOCATION) { + return mutationEvent.pos_2(); + } else { + return -1; + } +} + +int32_t mutation_event_pos_3(MutationEvent &mutationEvent) { + if (mutationEvent.type() == MutationEventType::DUPLICATION + || mutationEvent.type() == MutationEventType::TRANSLOCATION) { + return mutationEvent.pos_3(); + } else { + return -1; + } +} + +int32_t mutation_event_pos_4(MutationEvent &mutationEvent) { + if (mutationEvent.type() == MutationEventType::TRANSLOCATION) { + return mutationEvent.pos_4(); + } else { + return -1; + } +} + +int32_t mutation_event_invert(MutationEvent &mutationEvent) { + if (mutationEvent.type() == MutationEventType::TRANSLOCATION) { + return mutationEvent.invert(); + } else { + return 0; + } +} + +int32_t mutation_event_length_mutation(MutationEvent &mutationEvent, + unsigned int size) { + if (mutationEvent.type() == MutationEventType::DELETION + || mutationEvent.type() == MutationEventType::DUPLICATION) { + if (mutationEvent.pos_2() > mutationEvent.pos_1()) { + return mutationEvent.pos_2() - mutationEvent.pos_1(); + } else { + return mutationEvent.pos_1() + size - mutationEvent.pos_2(); + } + } else { + if (mutationEvent.type() == MutationEventType::SMALL_INSERTION + || mutationEvent.type() == MutationEventType::SMALL_DELETION) { + return mutationEvent.number(); + } else { + return 0; + } + } +} + +void out::record_mutation_event(MutationEvent &mutationEvent, + unsigned int number_generation, + unsigned int size) { + mutation_file << number_generation << " "; + mutation_file << 0 << " "; // GeneticUnit + mutation_file << mutationEvent.type() << " "; + mutation_file << mutationEvent.pos_1() << " "; + mutation_file << mutation_event_pos_2(mutationEvent) << " "; + mutation_file << mutation_event_pos_3(mutationEvent) << " "; + mutation_file << mutation_event_pos_4(mutationEvent) << " "; + mutation_file << mutation_event_invert(mutationEvent) << " "; + mutation_file << 0 << " "; // align_score + mutation_file << 0 << " "; // align_score2 + mutation_file << mutation_event_length_mutation(mutationEvent, size) << " "; + mutation_file << -1 << " "; // rep_seg_length + mutation_file << size << " "; + mutation_file << 0 << " "; // Impact of the mutation on the metabolic error + mutation_file << 0 << " "; // Nb of coding RNAs possibly disrupted by the breakpoints + mutation_file << 0 << " "; // Nb of coding RNAs included in the segment + mutation_file << 0 << std::endl; // Nb of coding RNAs that were completely included in the replaced segment +} diff --git a/src/post_treatments/libaevol_post/neutral_mutation_output.h b/src/post_treatments/libaevol_post/neutral_mutation_output.h new file mode 100644 index 000000000..c775fabf8 --- /dev/null +++ b/src/post_treatments/libaevol_post/neutral_mutation_output.h @@ -0,0 +1,25 @@ +// +// Created by duazel on 12/03/2020. +// + +#ifndef AEVOL_NEUTRAL_MUTATION_OUTPUT_H +#define AEVOL_NEUTRAL_MUTATION_OUTPUT_H + +#include <fstream> +#include "MutationEvent.h" + +using namespace aevol; + +namespace out { + extern std::ofstream result_file; + extern std::ofstream mutation_file; + + void init(const char *name_result, + const char *name_mutation); + + void close(); + + void record_mutation_event(MutationEvent &mutationEvent, unsigned int number_generation, unsigned int size); +} + +#endif //AEVOL_NEUTRAL_MUTATION_OUTPUT_H diff --git a/src/post_treatments/testJSON.cpp b/src/post_treatments/testJSON.cpp new file mode 100644 index 000000000..22feafaf0 --- /dev/null +++ b/src/post_treatments/testJSON.cpp @@ -0,0 +1,19 @@ +// +// Created by duazel on 27/05/2020. +// + +#include "IOJson.h" +#include "ExpManager.h" +using namespace aevol; + +int main(int argc, char **argv) { + IOJson test("param.in", "chromosome.txt"); + + test.load(new ExpManager(), true, NULL, 0, NULL, 0); + test.write("input.json"); + + +// IOJson test2("extracted.json"); +// test2.load(new ExpManager(), true, NULL, 0, NULL, 0); + // test2.write("output.json"); +} \ No newline at end of file -- GitLab