diff --git a/CMakeLists.txt b/CMakeLists.txt index 936b2ee9d3e5d5d95f4c104f1605e6a682385d6e..48d4d252063f33f1a8471d1060ad55f2e40bc55e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ add_definitions(-DVERSION="${PROJECT_VERSION}") set(AUTHORIZED_MTPERIOD 607 1279 2281 4253 11213 19937 44497 86243 132049 216091) + set(with-x ON CACHE BOOL "Whether to enable graphical outputs") set(with-omp OFF CACHE BOOL "Whether to enable OpenMP parallelization") set(with-tbb OFF CACHE BOOL "Whether to enable TBB parallelization") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cea458580e42e96ba6c179d1e4afa23b3df847d6..bb58106337a86500cd9c347dea3f63165261f34a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,8 +26,20 @@ target_link_libraries(aevol_create PUBLIC aevol) add_executable(aevol_modify aevol_modify.cpp) target_link_libraries(aevol_modify PUBLIC aevol) -add_executable(aevol_propagate aevol_propagate.cpp) -target_link_libraries(aevol_propagate PUBLIC aevol) +####################### +## Searching for libs +####################### +set(Boost_USE_STATIC_LIBS OFF) +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_RUNTIME OFF) +find_package(Boost 1.65.0 COMPONENTS filesystem) + +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + add_executable(aevol_propagate aevol_propagate.cpp) + target_link_libraries(aevol_propagate PUBLIC aevol ${Boost_LIBRARIES}) +endif() + add_executable(aevol_run aevol_run.cpp) target_link_libraries(aevol_run PUBLIC aevol) diff --git a/src/aevol_modify.cpp b/src/aevol_modify.cpp index f4c2bba044dc792296ed57573d4a7c0aeca9fefc..52fe78475f4b74c5c3b82284692a340ccbdc4048 100644 --- a/src/aevol_modify.cpp +++ b/src/aevol_modify.cpp @@ -501,14 +501,22 @@ int main(int argc, char* argv[]) { int32_t seed = atoi(line->words[1]); std::shared_ptr<JumpingMT> prng = std::make_shared<JumpingMT>(seed); - // Change prngs - #if __cplusplus == 201103L - sel->set_prng(make_unique<JumpingMT>(seed)); - world->set_prng(make_unique<JumpingMT>(seed)); - #else - sel->set_prng(std::make_unique<JumpingMT>(seed)); - world->set_prng(std::make_unique<JumpingMT>(seed)); - #endif + for (int16_t x = 0; x < world->width(); x++) { + for (int16_t y = 0; y < world->height(); y++) { + int32_t seed = prng->random(1000000); +#if __cplusplus == 201103L + world->grid(x,y)->set_reprod_prng(make_unique<JumpingMT>(seed)); + world->grid(x,y)->set_reprod_prng_simd(make_unique<JumpingMT>(seed)); + world->grid(x,y)->set_mut_prng(std::make_shared<JumpingMT>(seed)); + world->grid(x,y)->set_stoch_prng(std::make_shared<JumpingMT>(seed)); +#else + world->grid(x,y)->set_reprod_prng(std::make_unique<JumpingMT>(seed)); + world->grid(x,y)->set_reprod_prng_simd(std::make_unique<JumpingMT>(seed)); + world->grid(x,y)->set_mut_prng(std::make_shared<JumpingMT>(seed)); + world->grid(x,y)->set_stoch_prng(std::make_shared<JumpingMT>(seed)); +#endif + } + } printf("\tChange of the seed to %d in selection and world \n", atoi(line->words[1])); @@ -632,7 +640,7 @@ int main(int argc, char* argv[]) { sprintf(tree_file_name, "tree/tree_" TIMESTEP_FORMAT ".ae", timestep); #endif gzFile tree_file = gzopen(tree_file_name, "w"); - tree->write_to_tree_file(tree_file); + tree->write_to_tree_file(timestep,tree_file); gzclose(tree_file); printf("OK\n"); } diff --git a/src/aevol_propagate.cpp b/src/aevol_propagate.cpp index f1ce344e7f609098d08872d0e047adcd76fbd8f4..2ba8cd746155d6efc48bf9343d8c9ce0cf604683 100644 --- a/src/aevol_propagate.cpp +++ b/src/aevol_propagate.cpp @@ -75,6 +75,7 @@ static char* output_dir = nullptr; static bool keep_prng_states = false; static bool verbose = false; +static int32_t selseed = -1; int main(int argc, char* argv[]) { interpret_cmd_line_options(argc, argv); @@ -92,81 +93,73 @@ int main(int argc, char* argv[]) { // ================================================================= // Load the model experiment // ================================================================= - #ifndef __NO_X - ExpManager* exp_manager = new ExpManager_X11(); - #else +#ifndef __NO_X + ExpManager *exp_manager = new ExpManager_X11(); +#else ExpManager* exp_manager = new ExpManager(); - #endif +#endif exp_manager->load(input_dir, timestep, verbose, false); if (not keep_prng_states) { auto max = std::numeric_limits<int32_t>::max(); - #if __cplusplus == 201103L - auto prng = make_unique<JumpingMT>(time(nullptr)); - - exp_manager->sel()->set_prng( - make_unique<JumpingMT>(prng->random(max))); - exp_manager->world()->set_prng( - make_unique<JumpingMT>(prng->random(max))); - #else - auto prng = std::make_unique<JumpingMT>(time(nullptr)); - - exp_manager->sel()->set_prng( - std::make_unique<JumpingMT>(prng->random(max))); - exp_manager->world()->set_prng( - std::make_unique<JumpingMT>(prng->random(max))); - #endif - #if __cplusplus == 201103L + auto prng = make_unique<JumpingMT>(time(nullptr)); - for (int16_t x = 0 ; x < exp_manager->world()->width() ; x++) - for (int16_t y = 0 ; y < exp_manager->world()->height() ; y++) { - exp_manager->world()->grid(x,y)->set_reprod_prng(make_unique<JumpingMT>(prng->random(1000000))); - } + exp_manager->sel()->set_prng( + make_unique<JumpingMT>(prng->random(max))); + exp_manager->world()->set_prng( + make_unique<JumpingMT>(prng->random(max))); #else - for (int16_t x = 0 ; x < exp_manager->world()->width() ; x++) - for (int16_t y = 0 ; y < exp_manager->world()->height() ; y++) { - exp_manager->world()->grid(x,y)->set_reprod_prng(std::make_unique<JumpingMT>(prng->random(1000000))); - } -#endif - exp_manager->world()->set_mut_prng( - std::make_shared<JumpingMT>(prng->random(max))); - exp_manager->world()->set_stoch_prng( - std::make_shared<JumpingMT>(prng->random(max))); + auto prng = std::make_unique<JumpingMT>(time(nullptr)); for (int16_t x = 0; x < exp_manager->world()->width(); x++) { for (int16_t y = 0; y < exp_manager->world()->height(); y++) { - exp_manager->world()->grid(x, y)->set_mut_prng( - std::make_shared<JumpingMT>( - exp_manager->world()->mut_prng()->random(max))); - exp_manager->world()->grid(x, y)->individual()->set_mut_prng( - exp_manager->world()->grid(x, y)->mut_prng()); - - exp_manager->world()->grid(x, y)->set_stoch_prng( - std::make_shared<JumpingMT>( - exp_manager->world()->stoch_prng()->random(max))); - exp_manager->world()->grid(x, y)->individual()->set_stoch_prng( - exp_manager->world()->grid(x, y)->stoch_prng()); + int32_t seed = prng->random(1000000); +#if __cplusplus == 201103L + exp_manager->world()->grid(x,y)->set_reprod_prng(make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x,y)->set_reprod_prng_simd(make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x,y)->set_mut_prng(std::make_shared<JumpingMT>(seed)); + exp_manager->world()->grid(x,y)->set_stoch_prng(std::make_shared<JumpingMT>(seed)); +#else + exp_manager->world()->grid(x, y)->set_reprod_prng(std::make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x, y)->set_reprod_prng_simd(std::make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x, y)->set_mut_prng(std::make_shared<JumpingMT>(seed)); + exp_manager->world()->grid(x, y)->set_stoch_prng(std::make_shared<JumpingMT>(seed)); +#endif } } +#endif + exp_manager->world()->set_phen_target_prngs( - std::make_shared<JumpingMT>(prng->random(max)), - std::make_shared<JumpingMT>(prng->random(max))); - } - else - { - if (selseed != -1) - { + std::make_shared<JumpingMT>(prng->random(max)), + std::make_shared<JumpingMT>(prng->random(max))); + } else { + if (selseed != -1) { #if __cplusplus == 201103L auto prng = make_unique<JumpingMT>(selseed); #else auto prng = std::make_unique<JumpingMT>(selseed); #endif - exp_manager->Propagate(output_dir); + for (int16_t x = 0; x < exp_manager->world()->width(); x++) { + for (int16_t y = 0; y < exp_manager->world()->height(); y++) { + int32_t seed = prng->random(1000000); +#if __cplusplus == 201103L + exp_manager->world()->grid(x,y)->set_reprod_prng(make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x,y)->set_reprod_prng_simd(make_unique<JumpingMT>(seed)); +#else + exp_manager->world()->grid(x, y)->set_reprod_prng(std::make_unique<JumpingMT>(seed)); + exp_manager->world()->grid(x, y)->set_reprod_prng_simd(std::make_unique<JumpingMT>(seed)); +#endif + } + } + + exp_manager->save_copy(output_dir, timestep); + } + } } @@ -179,117 +172,117 @@ int main(int argc, char* argv[]) { \brief print help and exist */ -void print_help(char* prog_path) { - // Get the program file-name in prog_name (strip prog_path of the path) - char* prog_name; // No new, it will point to somewhere inside prog_path - if ((prog_name = strrchr(prog_path, '/'))) { - prog_name++; - } - else { - prog_name = prog_path; - } + void print_help(char *prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char *prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } else { + prog_name = prog_path; + } - printf("******************************************************************************\n"); - printf("* *\n"); - printf("* aevol - Artificial Evolution *\n"); - printf("* *\n"); - printf("* Aevol is a simulation platform that allows one to let populations of *\n"); - printf("* digital organisms evolve in different conditions and study experimentally *\n"); - printf("* the mechanisms responsible for the structuration of the genome and the *\n"); - printf("* transcriptome. *\n"); - printf("* *\n"); - printf("******************************************************************************\n"); - printf("\n"); - printf("%s:\n", prog_name); - printf("\tCreate a fresh copy of the experiment as it was at the given timestep.\n"); - printf("\tThe timestep number of the copy will be reset to 0.\n"); - printf("\n"); - printf("Usage : %s -h or --help\n", prog_name); - printf(" or : %s -V or --version\n", prog_name); - printf(" or : %s [-t TIMESTEP] [-K] [-o OUTDIR] [-v]\n", - prog_name); - printf("\nOptions\n"); - printf(" -h, --help\n\tprint this help, then exit\n"); - printf(" -V, --version\n\tprint version number, then exit\n"); - printf(" -t, --timestep TIMESTEP\n"); - printf("\tspecify timestep to propagate\n"); - printf(" -K, --keep-prng-st\n\tdo not alter prng states\n"); + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s:\n", prog_name); + printf("\tCreate a fresh copy of the experiment as it was at the given timestep.\n"); + printf("\tThe timestep number of the copy will be reset to 0.\n"); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s [-t TIMESTEP] [-K] [-o OUTDIR] [-v]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -t, --timestep TIMESTEP\n"); + printf("\tspecify timestep to propagate\n"); + printf(" -K, --keep-prng-st\n\tdo not alter prng states\n"); // printf(" -i, --in INDIR\n" // "\tspecify input directory (default \".\")\n"); - printf(" -o, --out OUTDIR\n" + printf(" -o, --out OUTDIR\n" "\tspecify output directory (default \"./output\")\n"); - printf(" -v, --verbose\n\tbe verbose\n"); -} + printf(" -v, --verbose\n\tbe verbose\n"); + } -void interpret_cmd_line_options(int argc, char* argv[]) { - // Define allowed options - const char* options_list = "hVt:o:v"; - static struct option long_options_list[] = { - {"help", no_argument, nullptr, 'h'}, - {"version", no_argument, nullptr, 'V'}, - {"timestep", required_argument, nullptr, 't'}, - {"keep-prng-st", no_argument, nullptr, 'K'}, + void interpret_cmd_line_options(int argc, char *argv[]) { + // Define allowed options + const char *options_list = "hVt:o:v"; + static struct option long_options_list[] = { + {"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"timestep", required_argument, nullptr, 't'}, + {"keep-prng-st", no_argument, nullptr, 'K'}, // {"in", required_argument, nullptr, 'i'}, - {"out", required_argument, nullptr, 'o'}, - {"verbose", no_argument, nullptr, 'v'}, - {0, 0, 0, 0} - }; - - // Get actual values of the CLI options - int option; - while ((option = getopt_long(argc, argv, options_list, long_options_list, - nullptr)) != -1) { - switch (option) { - case 'h' : { - print_help(argv[0]); - exit(EXIT_SUCCESS); - } - case 'V' : { - Utils::PrintAevolVersion(); - exit(EXIT_SUCCESS); - } - case 't' : { - timestep = atoi(optarg); - break; - } - case 'K' : { - keep_prng_states = true; - break; - } + {"out", required_argument, nullptr, 'o'}, + {"verbose", no_argument, nullptr, 'v'}, + {0, 0, 0, 0} + }; + + // Get actual values of the CLI options + int option; + while ((option = getopt_long(argc, argv, options_list, long_options_list, + nullptr)) != -1) { + switch (option) { + case 'h' : { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 't' : { + timestep = atoi(optarg); + break; + } + case 'K' : { + keep_prng_states = true; + break; + } // case 'i' : { // input_dir = new char[strlen(optarg) + 1]; // strcpy(input_dir, optarg); // break; // } - case 'o' : { - output_dir = new char[strlen(optarg) + 1]; - strcpy(output_dir, optarg); - break; + case 'o' : { + output_dir = new char[strlen(optarg) + 1]; + strcpy(output_dir, optarg); + break; + } + case 'v' : { + verbose = true; + break; + } + default : { + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } } - case 'v' : { - verbose = true; - break; + + // If input directory wasn't provided, use default + if (input_dir == nullptr) { + input_dir = new char[255]; + sprintf(input_dir, "%s", "."); } - default : { - // An error message is printed in getopt_long, we just need to exit - exit(EXIT_FAILURE); + // If output directory wasn't provided, use default + if (output_dir == nullptr) { + output_dir = new char[255]; + sprintf(output_dir, "%s", "output"); } - } - } - // If input directory wasn't provided, use default - if (input_dir == nullptr) { - input_dir = new char[255]; - sprintf(input_dir, "%s", "."); - } - // If output directory wasn't provided, use default - if (output_dir == nullptr) { - output_dir = new char[255]; - sprintf(output_dir, "%s", "output"); - } + // If timestep wasn't provided, use default + if (timestep == -1) { + timestep = OutputManager::last_gener(); + } + } - // If timestep wasn't provided, use default - if (timestep == -1) { - timestep = OutputManager::last_gener(); - } -} diff --git a/src/aevol_run.cpp b/src/aevol_run.cpp index 6efeb2ce9f061ea0f774a9b8c74b915fc1e146b4..4e94876acdb272d8477c0b53c71eb8db754c225a 100644 --- a/src/aevol_run.cpp +++ b/src/aevol_run.cpp @@ -73,6 +73,9 @@ static bool verbose = false; static int64_t t0 = -1; static int64_t t_end = -1; static int64_t nb_steps = -1; +static int grain_size = 1; +static bool w_mrca = false; + #ifndef __NO_X static bool show_display_on_startup = true; #endif @@ -108,10 +111,6 @@ int main(int argc, char* argv[]) { exp_manager->load(t0, verbose, true); exp_manager->set_t_end(t_end); - int64_t t0 = -1; - int64_t t_end = -1; - int64_t nb_steps = -1; - int grain_size = 1; // Make a numbered copy of each static input file diff --git a/src/libaevol/CMakeLists.txt b/src/libaevol/CMakeLists.txt index f804be2d5891f07f1f9a2dcfe7131ace86026745..a136a248ac9d013ab0bfa5285e803c99693bbddb 100644 --- a/src/libaevol/CMakeLists.txt +++ b/src/libaevol/CMakeLists.txt @@ -144,7 +144,23 @@ add_library(aevol InsertionHT.cpp InsertionHT.h ReplacementHT.cpp - ReplacementHT.h make_unique.h ae_logger.h) + ReplacementHT.h make_unique.h ae_logger.h + SIMD_Individual.h + SIMD_Individual.cpp + SaveWorld.h + SaveWorld.cpp + LightTree.h + LightTree.cpp + Dna_SIMD.h + Dna_SIMD.cpp + DnaMutator.h + DnaMutator.cpp + Stats_SIMD.h + Stats_SIMD.cpp + MutationEvent.h + MutationEvent.cpp + AncestorStats.h + AncestorStats.cpp) # ============================================================================ diff --git a/src/libaevol/ExpManager.cpp b/src/libaevol/ExpManager.cpp index 72ae72a4f69a88f915f34a342d24ecce4e093a8f..facf4a55525e719d6821dcdd23b3f35b80a1723a 100644 --- a/src/libaevol/ExpManager.cpp +++ b/src/libaevol/ExpManager.cpp @@ -310,6 +310,8 @@ void ExpManager::save_copy(char* dir, int64_t time) const output_m_->WriteLastGenerFile(dir); } + + void ExpManager::step_to_next_generation() { // TODO <david.parsons@inria.fr> Apply phenotypic target variation and noise world_->ApplyHabitatVariation(); diff --git a/src/libaevol/GeneticUnit.h b/src/libaevol/GeneticUnit.h index 6a29296bb73d4b161d4fed12058ac42db1c14955..d528853607c8fd7f669044671551913cc946a1e9 100644 --- a/src/libaevol/GeneticUnit.h +++ b/src/libaevol/GeneticUnit.h @@ -563,7 +563,6 @@ class GeneticUnit { // computation (mainly in post-treaments) bool transcribed_; bool translated_; - bool phenotypic_contributions_computed_; bool non_coding_computed_; bool distance_to_target_computed_; bool fitness_computed_; diff --git a/src/libaevol/Selection.cpp b/src/libaevol/Selection.cpp index 1a6ec21c6af5240473c8a18a59a0bbcd1021d82e..c0eb3a58f8f363cce5cb24b44563c7c857bee354 100644 --- a/src/libaevol/Selection.cpp +++ b/src/libaevol/Selection.cpp @@ -240,7 +240,7 @@ void Selection::step_to_next_generation() { world->grid(x, y)->old_one = Individual::CreateClone(world->indiv_at(x, y), 444444); }*/ if (fitness_function_ == FITNESS_GLOBAL_SUM) { - +#ifdef __REGUL int number_of_phenotypic_target_models = dynamic_cast<const Habitat_R&> (world->grid(0,0)->habitat()).number_of_phenotypic_target_models(); fitness_sum_tab_ = new double[number_of_phenotypic_target_models]; @@ -251,6 +251,10 @@ void Selection::step_to_next_generation() { fitness_sum_tab_[env_id] += dynamic_cast<Individual_R*>(world->indiv_at(i, j))->fitness(env_id); } } +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } // Do local competitions @@ -1090,10 +1094,13 @@ Individual *Selection::do_local_competition (int16_t x, int16_t y) { double sum_local_fit = 0.0; //double* loc_phenotype = new double[300]; +#ifdef __REGUL double ** fitness_sum_local_tab_; int number_of_phenotypic_target_models = dynamic_cast<const Habitat_R&> (world->grid(x,y)->habitat()).number_of_phenotypic_target_models(); +#endif if (fitness_function_ == FITNESS_LOCAL_SUM) { +#ifdef __REGUL fitness_sum_local_tab_ = new double*[fitness_function_scope_x_*fitness_function_scope_y_]; for (int tab_id = 0; tab_id < fitness_function_scope_x_*fitness_function_scope_y_; tab_id++) fitness_sum_local_tab_[tab_id] = new double[number_of_phenotypic_target_models]; @@ -1124,6 +1131,10 @@ Individual *Selection::do_local_competition (int16_t x, int16_t y) { } } } +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } int tab_id = 0; @@ -1135,19 +1146,29 @@ Individual *Selection::do_local_competition (int16_t x, int16_t y) { if (fitness_function_ == FITNESS_EXP) local_fit_array[count] = world->indiv_at(cur_x, cur_y)->fitness(); else if (fitness_function_ == FITNESS_GLOBAL_SUM) { +#ifdef __REGUL double composed_fitness = 0; for (int env_id = 0; env_id < number_of_phenotypic_target_models; env_id++) { composed_fitness += dynamic_cast<Individual_R*>(world->indiv_at(cur_x, cur_y))->fitness(env_id) / fitness_sum_tab_[env_id]; } composed_fitness/=number_of_phenotypic_target_models; local_fit_array[count] = composed_fitness; +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } else if (fitness_function_ == FITNESS_LOCAL_SUM) { +#ifdef __REGUL double composed_fitness = 0; for (int env_id = 0; env_id < number_of_phenotypic_target_models; env_id++) { composed_fitness += dynamic_cast<Individual_R*>(world->indiv_at(cur_x, cur_y))->fitness(env_id) / fitness_sum_local_tab_[tab_id][env_id]; } composed_fitness/=number_of_phenotypic_target_models; local_fit_array[count] = composed_fitness; +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } local_fit_array[count] = world->indiv_at(cur_x, cur_y)->fitness(); @@ -1161,9 +1182,14 @@ Individual *Selection::do_local_competition (int16_t x, int16_t y) { } if (fitness_function_ == FITNESS_LOCAL_SUM) { +#ifdef __REGUL for (int tab_id = 0; tab_id < fitness_function_scope_x_ * fitness_function_scope_y_; tab_id++) delete[] fitness_sum_local_tab_[tab_id]; delete[] fitness_sum_local_tab_; +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } //printf("Competition 2\n"); // Do the competitions between the individuals, based on one of the 4 methods: diff --git a/src/libaevol/raevol/Individual_R.h b/src/libaevol/raevol/Individual_R.h index 80bb6e1c13576d54ef602a7539e097a2215f285f..3e7cffb88634542c7693d09b962be3b661b12c5c 100644 --- a/src/libaevol/raevol/Individual_R.h +++ b/src/libaevol/raevol/Individual_R.h @@ -120,6 +120,9 @@ class Individual_R : public virtual Individual //void update_phenotype(); void update_phenotype( void ); + double fitness(int env_id) { return fitness_tab_[env_id]; } + double fitness() { return Individual::fitness(); } + void create_csv(char* directory_name); void draw_phenotype(const PhenotypicTarget& target, char* directoryName, int generation); void clear_dist_sum() { _dist_sum = 0; }; diff --git a/src/post_treatments/IndivAnalysis.cpp b/src/post_treatments/IndivAnalysis.cpp new file mode 100644 index 0000000000000000000000000000000000000000..daed86c69d5eef14a2e2ccad911b4c26da7bd825 --- /dev/null +++ b/src/post_treatments/IndivAnalysis.cpp @@ -0,0 +1,370 @@ +// +// Created by dparsons on 31/05/16. +// + +// ============================================================================ +// Includes +// ============================================================================ +#include "IndivAnalysis.h" + +#include "aevol.h" + +namespace aevol { + +// ============================================================================ +// Definition of static attributes +// ============================================================================ + +// ============================================================================ +// Constructors +// ============================================================================ +IndivAnalysis::IndivAnalysis(const Individual& indiv) : Individual(indiv) { + +}; + +// ============================================================================ +// Destructor +// ============================================================================ + +// ============================================================================ +// Methods +// ============================================================================ +/** + * Compute reproduction theoretical proportion of neutral offsprings. + * + * Compute the theoretical proportion of neutral offsprings given Carole's + * formula, based on the mutations and rearrangement rates and not on multiple + * replications. + * + * \return theoretical proportion of neutral offsprings + */ +double IndivAnalysis::compute_theoritical_f_nu() { + // We first have to collect information about genome structure. + // Abbreviations are chosen according to Carole's formula. + // Please notice that compared to the formula we have the beginning + // and ends of neutral regions instead of 'functional regions' + GeneticUnit& chromosome = genetic_unit_list_.front(); + int32_t L = chromosome.dna()->length(); + int32_t N_G = chromosome.nb_neutral_regions(); // which is not exactly Carole's original definition + int32_t* b_i = chromosome.beginning_neutral_regions(); + int32_t* e_i = chromosome.end_neutral_regions(); + int32_t lambda = chromosome.nb_bases_in_neutral_regions(); + int32_t l = L - lambda; // nb bases in 'functional regions' + + int32_t* lambda_i = NULL; // nb bases in ith neutral region + if (N_G > 0) // all the chromosome may be functional + { + lambda_i = new int32_t[N_G]; + + for (int32_t i = 0; i < N_G - 1; i++) { + lambda_i[i] = e_i[i] - b_i[i] + 1; + } + if (b_i[N_G - 1] > e_i[N_G - + 1]) // last neutral region is overlapping on the beginning of chromosome + { + lambda_i[N_G - 1] = (e_i[N_G - 1] + L) - b_i[N_G - 1] + 1; + } + else // no overlap + { + lambda_i[N_G - 1] = e_i[N_G - 1] - b_i[N_G - 1] + 1; + } + } + + // we now compute the probabilities of neutral reproduction for + // each type of mutation and rearrangement and update Fv + double Fv = 1; + + // mutation + insertion + deletion + double nu_local_mutation = 1 - ((double) l) / L; + Fv = pow(1 - point_mutation_rate() * (1 - nu_local_mutation), L); + Fv *= pow(1 - small_insertion_rate() * (1 - nu_local_mutation), L); + Fv *= pow(1 - small_deletion_rate() * (1 - nu_local_mutation), L); + + // inversion ~ two local mutations + double nu_inversion = nu_local_mutation * nu_local_mutation; + Fv *= pow(1 - inversion_rate() * (1 - nu_inversion), L); + + // translocation ~ inversion + insertion (mathematically) + Fv *= pow( + 1 - translocation_rate() * (1 - nu_inversion * nu_local_mutation), L); + + // long deletion + double nu_deletion = 0; // if N_G == 0, a deletion is always not neutral + for (int32_t i = 0; i < N_G; i++) { + nu_deletion += lambda_i[i] * (lambda_i[i] + 1); + } + nu_deletion /= ((double) 2 * L * L); + Fv *= pow(1 - deletion_rate() * (1 - nu_deletion), L); + + // duplication ~ big deletion + insertion + Fv *= pow(1 - duplication_rate() * (1 - nu_deletion * nu_local_mutation), + L); + + if (lambda_i != NULL) delete[] lambda_i; + + return Fv; +} + +/** + * + */ +void IndivAnalysis::compute_experimental_f_nu( + int32_t nb_indiv, + std::shared_ptr<JumpingMT> prng, + FILE* output_summary /* = nullptr*/, + bool verbose /* = false*/, + bool full_output /* = false*/) { + double nb_pos = 0; + double cumul_delta_err_pos = 0; + double cumul_delta_fitness_pos = 0; + double nb_neg = 0; + double cumul_delta_err_neg = 0; + double cumul_delta_fitness_neg = 0; + double max_pos = 0; + double max_fitness_pos = 0; + double max_neg = 0; + double max_fitness_neg = 0; + double nb_neutral_genetic = 0; + double nb_neutral_phenotypic = 0; + int32_t nb_events = 0; + + double parent_metabolic_error = dist_to_target_by_feature(METABOLISM); + double parent_fitness = fitness(); + + fprintf(output_summary, + "%" PRId64 " ",AeTime::time()); + + for (int32_t i = 0; i < nb_indiv; i++) { + Individual mutant(this, 0, prng, prng); + // Perform transfer, rearrangements and mutations + if (not mutant.allow_plasmids()) { + const GeneticUnit* chromosome = &(mutant.genetic_unit_list().front()); + nb_events = chromosome->dna()->perform_mutations(id_); + } + else { + printf("WARNING: Mutational Robustness does not handle multiple " + "Genetic Units\n"); + } + if (nb_events == 0) + { + nb_neutral_phenotypic++; + if (full_output) { + fprintf(output_summary,"%.15e ",0.0); + } + + } + else + { + mutant.EvaluateInContext(habitat()); + double new_metabolic_error = mutant.dist_to_target_by_feature( + METABOLISM); + double new_fitness = mutant.fitness(); + + if (new_metabolic_error == parent_metabolic_error) { + nb_neutral_phenotypic++; + nb_neutral_genetic++; + } + if (new_metabolic_error > parent_metabolic_error) { + nb_neg++; + if ((new_metabolic_error - parent_metabolic_error) > max_neg) { + max_neg = + new_metabolic_error - + parent_metabolic_error; + } + if ((new_fitness - parent_fitness) < max_fitness_neg) { + max_fitness_neg = new_fitness - parent_fitness; + + } + cumul_delta_err_neg += new_metabolic_error - parent_metabolic_error; + cumul_delta_fitness_neg += new_fitness - parent_fitness; + } + if (new_metabolic_error < parent_metabolic_error) { + nb_pos++; + if ((new_metabolic_error - parent_metabolic_error) < max_pos) { + max_pos = + new_metabolic_error - + parent_metabolic_error; + } + if ((new_fitness - parent_fitness) > max_fitness_neg) { + max_fitness_pos = new_fitness - parent_fitness; + + } + + cumul_delta_err_pos += new_metabolic_error - parent_metabolic_error; + cumul_delta_fitness_pos += new_fitness - parent_fitness; + } + + if (full_output) { + fprintf(output_summary,"%.15e ",(new_fitness - parent_fitness)); + } + } + } + + if (full_output) { + fprintf(output_summary,"\n"); + } + + + if (verbose) { + printf("f+: %f f0_Ph: %f f0_Gen: %f f-:%f\n", nb_pos, nb_neutral_phenotypic, nb_neutral_genetic, nb_neg); + } + + if (!full_output) { + fprintf(output_summary,"%.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e\n", + nb_pos / nb_indiv, nb_neutral_phenotypic / nb_indiv, nb_neutral_genetic / nb_indiv, nb_neg / nb_indiv, + cumul_delta_err_pos / nb_pos, cumul_delta_err_neg / nb_neg, + max_pos, max_neg, + cumul_delta_fitness_pos / nb_pos, cumul_delta_fitness_neg / nb_neg, + max_fitness_pos, max_fitness_neg); + } +} + +// ============================================================================ +// Non inline accessors +// ============================================================================ + + +void IndivAnalysis::compute_experimental_mutagenesis( + int32_t nb_indiv, + int32_t mutation_type, + std::shared_ptr<JumpingMT> prng, + FILE* output_summary /* = nullptr*/, + bool verbose /* = false*/, + bool full_output /* = false*/ ) { + double nb_pos = 0; + double cumul_delta_err_pos = 0; + double cumul_delta_fitness_pos = 0; + double nb_neg = 0; + double cumul_delta_err_neg = 0; + double cumul_delta_fitness_neg = 0; + double max_pos = 0; + double max_fitness_pos = 0; + double max_neg = 0; + double max_fitness_neg = 0; + double nb_neutral_genetic = 0; + double nb_neutral_phenotypic = 0; + int32_t nb_events = 0; + + double parent_metabolic_error = dist_to_target_by_feature(METABOLISM); + double parent_fitness = fitness(); + + fprintf(output_summary, + "%" PRId64 " ",AeTime::time()); + + for (int32_t i = 0; i < nb_indiv; i++) { + Individual mutant(this, 0, prng, prng); + // Perform one mutation of the specified type + if (not mutant.allow_plasmids()) { + const GeneticUnit* chromosome = &(mutant.genetic_unit_list().front()); + int taille_pre = chromosome->seq_length(); + switch (mutation_type) + { + case SWITCH:{ + chromosome->dna()->do_switch(); + break; + } + case S_INS: { + chromosome->dna()->do_small_insertion(); + break; + } + case S_DEL: { + chromosome->dna()->do_small_deletion(); + break; + } + case DUPL: { + chromosome->dna()->do_duplication(); + break; + } + case DEL: { + chromosome->dna()->do_deletion(); + break; + } + case TRANS: { + chromosome->dna()->do_translocation(); + break; + } + case INV: { + chromosome->dna()->do_inversion(); + break; + } + + default: { + fprintf(stderr, "Error, unexpected mutation type\n"); + break; + } + } + int taille_pos = chromosome->seq_length(); + + + + mutant.EvaluateInContext(habitat()); + double new_metabolic_error = mutant.dist_to_target_by_feature( + METABOLISM); + double new_fitness = mutant.fitness(); + + if (new_metabolic_error == parent_metabolic_error) { + nb_neutral_phenotypic++; + nb_neutral_genetic++; + // printf(" %d",taille_pre-taille_pos); + } + if (new_metabolic_error > parent_metabolic_error) { + nb_neg++; + if ((new_metabolic_error - parent_metabolic_error) > max_neg) { + max_neg = + new_metabolic_error - + parent_metabolic_error; + } + if ((new_fitness - parent_fitness) < max_fitness_neg) { + max_fitness_neg = new_fitness - parent_fitness; + + } + cumul_delta_err_neg += new_metabolic_error - parent_metabolic_error; + cumul_delta_fitness_neg += new_fitness - parent_fitness; + } + if (new_metabolic_error < parent_metabolic_error) { + nb_pos++; + if ((new_metabolic_error - parent_metabolic_error) < max_pos) { + max_pos = + new_metabolic_error - + parent_metabolic_error; + } + if ((new_fitness - parent_fitness) > max_fitness_neg) { + max_fitness_pos = new_fitness - parent_fitness; + + } + + cumul_delta_err_pos += new_metabolic_error - parent_metabolic_error; + cumul_delta_fitness_pos += new_fitness - parent_fitness; + } + + if (full_output) { + fprintf(output_summary,"%.15e ",(new_fitness - parent_fitness)); + } + } + } + + if (full_output) { + fprintf(output_summary,"\n"); + } + + if (verbose) { + printf("f+: %f f0_Ph: %f f0_Gen: %f f-:%f\n", nb_pos, nb_neutral_phenotypic, nb_neutral_genetic, nb_neg); + } + + if (!full_output) { + fprintf(output_summary, + "%.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e %.15e\n", + nb_pos / nb_indiv, nb_neutral_phenotypic / nb_indiv, nb_neutral_genetic / nb_indiv, nb_neg / nb_indiv, + cumul_delta_err_pos / nb_pos, cumul_delta_err_neg / nb_neg, + max_pos, max_neg, + cumul_delta_fitness_pos / nb_pos, cumul_delta_fitness_neg / nb_neg, + max_fitness_pos, max_fitness_neg); + } +} + + +// ============================================================================ +// Non inline accessors +// ============================================================================ + +} // namespace aevol diff --git a/src/post_treatments/IndivAnalysis.h b/src/post_treatments/IndivAnalysis.h new file mode 100644 index 0000000000000000000000000000000000000000..7e88a97175bb14414a8f0139de71f2ddd5d229b8 --- /dev/null +++ b/src/post_treatments/IndivAnalysis.h @@ -0,0 +1,76 @@ +// +// Created by dparsons on 31/05/16. +// + +#ifndef AEVOL_INDIVANALYSIS_H__ +#define AEVOL_INDIVANALYSIS_H__ + + +// ============================================================================ +// Includes +// ============================================================================ +#include "Individual.h" + +namespace aevol { + +/** + * + */ +class IndivAnalysis : public Individual { + public : + // ========================================================================== + // Constructors + // ========================================================================== + IndivAnalysis() = default; //< Default ctor + IndivAnalysis(const IndivAnalysis&) = delete; //< Copy ctor + IndivAnalysis(IndivAnalysis&&) = delete; //< Move ctor + + IndivAnalysis(const Individual&); + + // ========================================================================== + // Destructor + // ========================================================================== + virtual ~IndivAnalysis() = default; //< Destructor + + // ========================================================================== + // Operators + // ========================================================================== + /// Copy assignment + IndivAnalysis& operator=(const IndivAnalysis& other) = delete; + /// Move assignment + IndivAnalysis& operator=(IndivAnalysis&& other) = delete; + + // ========================================================================== + // Public Methods + // ========================================================================== + double compute_theoritical_f_nu(); + void compute_experimental_f_nu(int32_t nb_indiv, + std::shared_ptr<JumpingMT> prng, + FILE* output_summary = nullptr, + bool verbose = false, + bool full_output = false); + + void compute_experimental_mutagenesis(int32_t nb_indiv, + int32_t mutation_type, + std::shared_ptr<JumpingMT> prng, + FILE* output_summary = nullptr, + bool verbose = false, + bool full_output = false); + + + // ========================================================================== + // Accessors + // ========================================================================== + + protected : + // ========================================================================== + // Protected Methods + // ========================================================================== + + // ========================================================================== + // Attributes + // ========================================================================== +}; + +} // namespace aevol +#endif //AEVOL_INDIVANALYSIS_H__ diff --git a/src/post_treatments/anc_network_knockout.cpp b/src/post_treatments/anc_network_knockout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2f0d0862baaa67963986166b7d2bff5132150ee --- /dev/null +++ b/src/post_treatments/anc_network_knockout.cpp @@ -0,0 +1,971 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + + + + +// ================================================================= +// Libraries +// ================================================================= +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <err.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <list> + +#include <cstdint> +#include <fstream> +#include <limits> +#include <string> + + +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + + +using namespace aevol; + + +enum check_type +{ + FULL_CHECK = 0, + LIGHT_CHECK = 1, + ENV_CHECK = 2, + NO_CHECK = 3 +}; + + + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); + +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void extract_network_single_target_model(Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); + + +int main(int argc, char** argv) +{ + // The input file (lineage.ae or lineage.rae) must contain the following information: + // + // - common data (ae_common::write_to_backup) + // - begin gener (int64_t) + // - end gener (int64_t) + // - final individual index (int32_t) + // - initial genome size (int32_t) + // - initial ancestor (nb genetic units + sequences) (Individual::write_to_backup) + // - replication report of ancestor at time t0+1 (ae_replic_report::write_to_backup) + // - replication report of ancestor at time t0+2 (ae_replic_report::write_to_backup) + // - replication report of ancestor at time t0+3 (ae_replic_report::write_to_backup) + // - ... + // - replication report of ancestor at time t_end_ (ae_replic_report::write_to_backup) + + + + + // ===================== + // Parse command line + // ===================== + + // Default values + char* lineage_file_name = NULL; + bool verbose = false; + check_type check = LIGHT_CHECK; + double tolerance = 0; + + const char * short_options = "hVvncf:lt:"; + static struct option long_options[] = + { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V' }, + {"verbose", no_argument, NULL, 'v'}, + {"nocheck", no_argument, NULL, 'n'}, + {"fullcheck", no_argument, NULL, 'c'}, + {"file", required_argument, NULL, 'f'}, + {"tolerance", required_argument, NULL, 't'}, + {0, 0, 0, 0} + }; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + case 'n' : check = NO_CHECK; break; + case 'c' : check = FULL_CHECK; break; + case 'f' : + { + if (strcmp(optarg, "") == 0) + { + fprintf(stderr, "ERROR : Option -f or --file : missing argument.\n"); + exit(EXIT_FAILURE); + } + lineage_file_name = new char[strlen(optarg) + 1]; + sprintf(lineage_file_name, "%s", optarg); + break; + } + case 't' : + { + if (strcmp(optarg, "") == 0) + { + fprintf(stderr, "ERROR : Option -t or --tolerance : missing argument.\n"); + exit(EXIT_FAILURE); + } + check = ENV_CHECK; + tolerance = atof(optarg); + break; + } + default : + { + fprintf(stderr, "ERROR : Unknown option, check your syntax.\n"); + print_help(argv[0]); + exit(EXIT_FAILURE); + } + } + } + + + + if (lineage_file_name == NULL) + { + fprintf(stderr, "ERROR : Option -f or --file missing. \n"); + exit(EXIT_FAILURE); + } + + + printf("\n"); + printf("WARNING : Parameter change during simulation is not managed in general.\n"); + printf(" Only changes in environmental target done with aevol_modify are handled.\n"); + printf("\n"); + + // ======================= + // Open the lineage file + // ======================= + gzFile lineage_file = gzopen(lineage_file_name, "r"); + if (lineage_file == Z_NULL) + { + fprintf(stderr, "ERROR : Could not read the lineage file %s\n", lineage_file_name); + exit(EXIT_FAILURE); + } + + int64_t t0 = 0; + int64_t t_end = 0; + int32_t final_indiv_index = 0; + int32_t final_indiv_rank = 0; + + + gzread(lineage_file, &t0, sizeof(t0)); + gzread(lineage_file, &t_end, sizeof(t_end)); + gzread(lineage_file, &final_indiv_index, sizeof(final_indiv_index)); + gzread(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank)); + + if (verbose) + { + printf("\n\n"); + printf("===============================================================================\n"); + printf(" Statistics of the ancestors of indiv. %" PRId32 + " (rank %" PRId32 ") from time %" PRId64 " to %" PRId64 "\n", + final_indiv_index, final_indiv_rank, t0, t_end); + printf("================================================================================\n"); + } + + // ============================ + // Init files + // ============================ + std::ofstream network; + network.open("anc_network_knockout.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"Value,"<<"Metaerror_lost,"<<"Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" + <<std::endl; + network.flush(); + network.close(); + + float filter_values[3] = {0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "anc_network_filtered_" + str_filter_value + ".csv"; + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Enhancer," << "Inhibitor," << "Both," << "Value" + << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + + file_name = "anc_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges," << "filter_nb_enhancing," + << "filter_nb_inhibitor," << "filter_nb_both,filter_nb_edges" << std::endl; + network.flush(); + network.close(); + } + + float filter_values_2[4] = {0.0, 0.01, 0.001, 0.005}; + + for (float filter_value : filter_values_2) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "anc_network_dump_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Source," << "Destination," << "Enhancer_or_Inhibitor," << + "Value" << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + } + + network.open("anc_network_knockout_single_env.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"TargetModel,"<<"Value"<<"Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent"<<std::endl; + network.flush(); + network.close(); + + // ============================= + // Open the experience manager + // ============================= + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // The current version doesn't allow for phenotypic variation nor for + // different phenotypic targets among the grid + if (not exp_manager->world()->phenotypic_target_shared()) + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for per grid-cell phenotypic target"); + auto phenotypicTargetHandler = + exp_manager->world()->phenotypic_target_handler(); + if (not (phenotypicTargetHandler->var_method() == NO_VAR)) + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for variable phenotypic targets"); + + int64_t backup_step = exp_manager->backup_step(); + + // ================================================== + // Prepare the initial ancestor and write its stats + // ================================================== + GridCell* grid_cell = new GridCell(lineage_file, exp_manager, nullptr); + // Individual*indiv = Individual::CreateIndividual(exp_manager, lineage_file); + auto* indiv = grid_cell->individual(); + + Individual_R* best = dynamic_cast<Individual_R*>(indiv); + best->do_transcription_translation_folding(); + + int nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + double* fabs_metaerror_loss = new double[nb_edges]; + double* fabs_fitness_loss = new double[nb_edges]; + double* fabs_metaerror_loss_percent = new double[nb_edges]; + double* fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + + int nb_iteration = 100; + printf("Running %d evals for %d edges\n",nb_iteration,nb_edges); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n",i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + fabs_metaerror_loss[i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness-best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(best,fabs_metaerror_loss,fabs_fitness_loss, + fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + filter_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + dump_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + + int nb_phenotypic_target_models = dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()-> + phenotypic_target_handler())->phenotypic_target_models_.size(); + printf("Running with a single phenotypic target models : %d\n",nb_phenotypic_target_models); + + double** ptm_fabs_metaerror_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_metaerror_loss_percent = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss_percent = new double*[nb_phenotypic_target_models]; + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n",target_id); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->set_single_env(target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness-best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(best,nb_phenotypic_target_models,ptm_fabs_metaerror_loss,ptm_fabs_fitness_loss,ptm_fabs_metaerror_loss_percent,ptm_fabs_fitness_loss_percent); + + + // ========================================================================== + // Replay the mutations to get the successive ancestors and analyze them + // ========================================================================== + ReplicationReport* rep = nullptr; + + int32_t index; + + ExpManager* exp_manager_backup = nullptr; + Habitat *backup_habitat = nullptr; + + bool check_now = false; + + aevol::AeTime::plusplus(); + while (time() <= t_end) + { + rep = new ReplicationReport(lineage_file, indiv); + index = rep->id(); // who we are building... + + // Check now? + check_now = ((check == FULL_CHECK && Utils::mod(time(), backup_step) == 0) || + (check == ENV_CHECK && Utils::mod(time(), backup_step) == 0) || + (check == LIGHT_CHECK && time() == t_end)); + + if (verbose) + printf("Rebuilding ancestor at generation %" PRId64 + " (index %" PRId32 ")...", time(), index); + + indiv->Reevaluate(); + + // TODO <david.parsons@inria.fr> Check for phenotypic variation has to be + // done for all the grid cells, disable checking until coded + +// // Check, and possibly update, the environment according to the backup files +// // (update necessary if the env. was modified by aevol_modify at some point) +// if (Utils::mod(time(), backup_step) == 0) +// { +// char world_file_name[255]; +// sprintf(world_file_name, "./" WORLD_FNAME_FORMAT, time()); +// gzFile world_file = gzopen(world_file_name, "r"); +// backup_habitat = new Habitat(world_file, pth); // TODO vld: fix pth +// +// if (! env->is_identical_to(*backup_env, tolerance)) +// { +// printf("Warning: At time()=%" PRId64 ", the replayed environment is not the same\n", time()); +// printf(" as the one saved at time()=%" PRId64 "... \n", time()); +// printf(" with tolerance of %lg\n", tolerance); +// printf("Replacing the replayed environment by the one stored in the backup.\n"); +// delete env; +// h = new Habitat(*backup_habitat); +// } +// delete backup_habitat; +// } + + + // Warning: this portion of code won'time() work if the number of units changes + // during the evolution + + // 2) Replay replication (create current individual's child) + GeneticUnit& gen_unit = indiv->genetic_unit_nonconst(0); + GeneticUnit* stored_gen_unit = nullptr; + Individual* stored_indiv = nullptr; + + if (check_now) + { + exp_manager_backup = new ExpManager(); + exp_manager_backup->load(time(), true, false); + stored_indiv = new Individual( + *(Individual*) exp_manager_backup->indiv_by_id(index)); + stored_gen_unit = &(stored_indiv->genetic_unit_nonconst(0)); + } + + // For each genetic unit, replay the replication (undergo all mutations) + // TODO <david.parsons@inria.fr> disabled for multiple GUs + const auto& dnarep = rep->dna_replic_report(); + + for (const auto& mut: dnarep.HT()) + gen_unit.dna()->undergo_this_mutation(*mut); + for (const auto& mut: dnarep.rearrangements()) + gen_unit.dna()->undergo_this_mutation(*mut); + for (const auto& mut: dnarep.mutations()) + gen_unit.dna()->undergo_this_mutation(*mut); + + if (check_now) + { + if (verbose) + { + printf("Checking the sequence of the unit..."); + fflush(NULL); + } + + char * str1 = new char[gen_unit.dna()->length() + 1]; + memcpy(str1, gen_unit.dna()->data(), \ + gen_unit.dna()->length()*sizeof(char)); + str1[gen_unit.dna()->length()] = '\0'; + + char * str2 = new char[(stored_gen_unit->dna())->length() + 1]; + memcpy(str2, (stored_gen_unit->dna())->data(), + (stored_gen_unit->dna())->length()*sizeof(char)); + str2[(stored_gen_unit->dna())->length()] = '\0'; + + if (strncmp(str1, str2, stored_gen_unit->dna()->length()) == 0) { + if (verbose) + printf(" OK\n"); + } + else { + if (verbose) printf(" ERROR !\n"); + fprintf(stderr, "Error: the rebuilt genetic unit is not the same as \n"); + fprintf(stderr, "the one saved at generation %" PRId64 "... ", time()); + fprintf(stderr, "Rebuilt unit : %" PRId32 " bp\n %s\n", (int32_t)strlen(str1), str1); + fprintf(stderr, "Stored unit : %" PRId32 " bp\n %s\n", (int32_t)strlen(str2), str2); + + delete [] str1; + delete [] str2; + gzclose(lineage_file); + delete indiv; + delete stored_indiv; + delete exp_manager_backup; + delete exp_manager; + exit(EXIT_FAILURE); + } + + delete [] str1; + delete [] str2; + } + + // 3) All the mutations have been replayed, we can now evaluate the new individual + Individual_R* best = dynamic_cast<Individual_R*>(indiv); + best->do_transcription_translation_folding(); + + nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + delete [] fabs_metaerror_loss; + delete [] fabs_fitness_loss; + delete [] fabs_metaerror_loss_percent; + delete [] fabs_fitness_loss_percent; + + fabs_metaerror_loss = new double[nb_edges]; + fabs_fitness_loss = new double[nb_edges]; + fabs_metaerror_loss_percent = new double[nb_edges]; + fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + printf("Running %d evals for %d edges\n",nb_iteration,nb_edges); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n",i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + fabs_metaerror_loss[i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness-best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(best,fabs_metaerror_loss,fabs_fitness_loss, + fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + filter_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + dump_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + + printf("Running with a single phenotypic target models : %d\n",nb_phenotypic_target_models); + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + delete [] ptm_fabs_metaerror_loss[i]; + delete [] ptm_fabs_fitness_loss[i]; + delete [] ptm_fabs_metaerror_loss_percent[i]; + delete [] ptm_fabs_fitness_loss_percent[i]; + } + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n",target_id); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->set_single_env(target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness-best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(best,nb_phenotypic_target_models,ptm_fabs_metaerror_loss,ptm_fabs_fitness_loss,ptm_fabs_metaerror_loss_percent,ptm_fabs_fitness_loss_percent); + + + if (verbose) printf(" OK\n"); + + delete rep; + + if (check_now) + { + delete stored_indiv; + delete exp_manager_backup; + } + + aevol::AeTime::plusplus(); + } + + gzclose(lineage_file); + + delete exp_manager; + delete indiv; + + exit(EXIT_SUCCESS); +} + + + +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("network_knockout.csv",std::ofstream::app); + + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<aevol::AeTime::time()<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<aevol::AeTime::time()<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + i_edges++; + } + } + + network.flush(); + network.close(); +} + +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << aevol::AeTime::time() << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << aevol::AeTime::time() << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("network_knockout_single_env.csv",std::ofstream::trunc); + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Ancstats post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rancstats -h\n"); + printf("or : rancstats [-vn] -f lineage_file \n"); +#else + printf("Usage : ancstats -h\n"); + printf("or : ancstats [-vn] -f lineage_file \n"); +#endif + printf("\n"); + printf("This program compute some statistics for the individuals within lineage_file.\n"); + printf("\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome and environment checks every\n"); + printf("\t <BACKUP_STEP> generations. Default behaviour is\n"); + printf("\t lighter as it only perform sthese checks at the\n"); + printf("\t ending generation.\n"); + printf("\n"); + printf("\t-f lineage_file or --file lineage_file : \n"); + printf("\t Compute the statistics for the individuals within lineage_file.\n"); + printf("\t-t tolerance or --tolerance tolerance : \n"); + printf("\t Tolerance used to compare the replayed environment to environment in backup\n"); + printf("\n"); +} diff --git a/src/post_treatments/ancestor_mutagenesis.cpp b/src/post_treatments/ancestor_mutagenesis.cpp new file mode 100644 index 0000000000000000000000000000000000000000..31992c0b61f7419603c7be8502012f323982397d --- /dev/null +++ b/src/post_treatments/ancestor_mutagenesis.cpp @@ -0,0 +1,444 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +#include <cerrno> +#include <cstdlib> +#include <cstdio> +#include <cstring> +#include <cinttypes> +#include <cmath> +#include <cassert> +#include <getopt.h> +#include <sys/stat.h> +#include <list> +#include <zlib.h> + +#include "aevol.h" +#include "IndivAnalysis.h" + +using std::list; + +using namespace aevol; + +// ================================================================= +// Command line option variables +// ================================================================= +int32_t wanted_rank = -1; +int32_t wanted_index = -1; +static char* lineage_file_name = nullptr; +int32_t mutation_type = 0; +int32_t nb_mutants = -1; +static int32_t begin = 0; //< First generation to analyse +static int32_t end = -1; //< Last generation to analyse +static int32_t period = 1; //< Period of analysis +static bool verbose = false; +static bool full_output = false; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void interpret_cmd_line_options(int argc, char* argv[]); +int write_headers(FILE* output_file,bool full_output); + +int main(int argc, char* argv[]) { + interpret_cmd_line_options(argc, argv); + + // ======================= + // Open the lineage file + // ======================= + gzFile lineage_file = gzopen(lineage_file_name, "r"); + if (lineage_file == Z_NULL) { + Utils::ExitWithUsrMsg(std::string("Could not read lineage file ") + + lineage_file_name + "\n"); + } + + int64_t t0 = 0; + int64_t t_end = 0; + int32_t final_indiv_index = 0; + int32_t final_indiv_rank = 0; + + gzread(lineage_file, &t0, sizeof(t0)); + gzread(lineage_file, &t_end, sizeof(t_end)); + gzread(lineage_file, &final_indiv_index, sizeof(final_indiv_index)); + gzread(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank)); + + if (verbose) { + printf("\n\n"); + printf( + "===============================================================================\n"); + printf(" Mutagenesis analysis of the ancestors of indiv. %" + PRId32 + " (rank %" + PRId32 + ") from time %" + PRId64 + " to %" + PRId64 + "\n", + final_indiv_index, final_indiv_rank, t0, t_end); + printf( + "================================================================================\n"); + } + + // ============================= + // Open the experience manager + // ============================= + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // The current version doesn't allow for phenotypic variation nor for + // different phenotypic targets among the grid + if (not exp_manager->world()->phenotypic_target_shared()) { + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for per grid-cell phenotypic target\n"); + } + auto phenotypicTargetHandler = + exp_manager->world()->phenotypic_target_handler(); + if (phenotypicTargetHandler->var_method() != NO_VAR) { + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for variable phenotypic targets\n"); + } + + // ========================= + // Open the output file(s) + // ========================= + +char mutation_type_name[24]; + switch (mutation_type) { + case SWITCH: { + snprintf(mutation_type_name, 23, "point-mutation"); + break; + } + case S_INS: { + snprintf(mutation_type_name, 23, "small-insertion"); + printf("mutation-type : small insertions\n"); + break; + } + case S_DEL: { + snprintf(mutation_type_name, 23, "small-deletion"); + break; + } + case DUPL: { + snprintf(mutation_type_name, 23, "duplication"); + break; + } + case DEL: { + snprintf(mutation_type_name, 23, "large-deletion"); + break; + } + case TRANS: { + snprintf(mutation_type_name, 23, "translocation"); + break; + } + case INV: { + snprintf(mutation_type_name, 23, "inversion"); + break; + } + default: { + fprintf(stderr, "Error, unexpected mutation type.\n"); + exit(EXIT_FAILURE); + } + } + + char output_file_name[256]; + snprintf(output_file_name, 255, + "mutagenesis-n%d-%s.txt",nb_mutants,mutation_type_name); + + printf("%s\n",output_file_name); + + FILE* output_summary = fopen(output_file_name, "w"); + if (output_summary == nullptr) { + Utils::ExitWithUsrMsg(std::string("Could not create ") + output_file_name); + } + + printf("ouput file opened\n"); + write_headers(output_summary,full_output); + + std::shared_ptr <JumpingMT> prng = std::make_shared<JumpingMT>(9695); + + // ============================== + // Prepare the initial ancestor + // ============================== + GridCell* grid_cell = new GridCell(lineage_file, exp_manager, nullptr); + IndivAnalysis indiv(*(grid_cell->individual())); + indiv.Evaluate(); + // indiv->compute_statistical_data(); + // indiv->compute_non_coding(); + + + + if(begin == 0) { + indiv.compute_experimental_mutagenesis(nb_mutants, mutation_type, prng, output_summary, verbose,full_output); + } + + // ========================================================================== + // Replay the mutations to get the successive ancestors and analyze them + // ========================================================================== + ReplicationReport* rep = nullptr; + + int32_t index; + + aevol::AeTime::plusplus(); + while ((time() <= t_end) && (((time() < end) || (end == -1)))) { + rep = new ReplicationReport(lineage_file, &indiv); + index = rep->id(); // who we are building... + indiv.Reevaluate(); + + if (verbose) { + printf("Ancestor at generation %" + PRId64 + " has index %" + PRId32 + "\n", time(), index); + } + + + // 2) Replay replication (create current individual's child) + GeneticUnit& gen_unit = indiv.genetic_unit_nonconst(0); + + + // For each genetic unit, replay the replication (undergo all mutations) + // TODO <david.parsons@inria.fr> disabled for multiple GUs + const auto& dnarep = rep->dna_replic_report(); + + for (const auto& mut: dnarep.HT()) + gen_unit.dna()->undergo_this_mutation(*mut); + for (const auto& mut: dnarep.rearrangements()) + { + gen_unit.dna()->undergo_this_mutation(*mut); + // 3) All the mutations have been replayed, we can now evaluate the new individual + indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + if ((time() >= begin) && ((time() < end) || (end == -1)) && + (((time() - begin) % period) == 0)) { + indiv.compute_experimental_mutagenesis(nb_mutants, mutation_type, prng, output_summary, + verbose,full_output); + } + } + for (const auto& mut: dnarep.mutations()) + { + gen_unit.dna()->undergo_this_mutation(*mut); + // 3) All the mutations have been replayed, we can now evaluate the new individual + indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + if ((time() >= begin) && ((time() < end) || (end == -1)) && + (((time() - begin) % period) == 0)) { + indiv.compute_experimental_mutagenesis(nb_mutants, mutation_type, prng, output_summary,verbose,full_output); + } + } + // 3) All the mutations have been replayed, we can now evaluate the new individual + //GB indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + //GB if ((time() >= begin) && ((time() < end) || (end == -1)) && + //GB (((time() - begin) % period) == 0)) { + //GB indiv.compute_experimental_f_nu(nb_mutants, prng, output_summary, nullptr, + //GB verbose); + //GB} + delete rep; + + aevol::AeTime::plusplus(); + } + + gzclose(lineage_file); + fclose(output_summary); + delete exp_manager; + return EXIT_SUCCESS; +} + + +void interpret_cmd_line_options(int argc, char* argv[]) { + const char* short_options = "hVvfm:n:b:e:P:o:"; + static struct option long_options[] = { + {"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"verbose", no_argument, nullptr, 'v'}, + {"full", no_argument, nullptr, 'f'}, + {"mutation-type", required_argument, nullptr, 'm'}, + {"nb-mutants", required_argument, nullptr, 'n'}, + {"begin", required_argument, nullptr, 'b'}, + {"end", required_argument, nullptr, 'e'}, + {"period", required_argument, nullptr, 'P'}, + {0, 0, 0, 0} + }; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, + nullptr)) != -1) { + switch (option) { + case 'h' : + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V' : + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'v' : + verbose = true; + break; + case 'f' : + full_output = true; + break; + case 'b' : + begin = atol(optarg); + break; + case 'e' : + end = atol(optarg); + break; + case 'm': + mutation_type = (MutationType) atol(optarg); + if (mutation_type == SWITCH) { + } + else if ((mutation_type == S_INS) || (mutation_type == S_DEL) || + (mutation_type == DUPL) || (mutation_type == DEL) || + (mutation_type == TRANS) || (mutation_type == INV)) { + } + else { + fprintf(stderr, + "%s: error: So far, mutagenesis is implemented only for " + "point mutations, small insertions, \n" + " small deletions, duplications, deletions, " + "translocations or inversions.\n" + " It is not available yet for lateral transfer.\n", + argv[0]); + exit(EXIT_FAILURE); + } + break; + case 'n' : + nb_mutants = atol(optarg); + break; + case 'P' : + period = atol(optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + + // There should be only one remaining arg: the lineage file + if (optind != argc - 1) { + Utils::ExitWithUsrMsg("please specify a lineage file"); + } + + lineage_file_name = new char[strlen(argv[optind]) + 1]; + sprintf(lineage_file_name, "%s", argv[optind]); +} + +void print_help(char* prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char* prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } + else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: generate and analyse mutants for the provided lineage.\n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s LINEAGE_FILE [-b TIMESTEP] [-e TIMESTEP] [-n NB_MUTANTS] [-P PERIOD] [-o output] [-v]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -b, --begin TIMESTEP\n"); + printf("\ttimestep at which to start the analysis\n"); + printf(" -e, --end TIMESTEP\n"); + printf("\ttimestep at which to stop the analysis\n"); + printf(" -n, --nb-mutants NB_MUTANTS\n"); + printf("\tnumber of mutants to be generated\n"); + printf("\t-m MUTATIONTYPE or --mutation-type MUTATIONTYPE : \n"); + printf( + "\t Integer type of the mutation carried by each mutant: 0 for a point mutation, 1 for a \n"); + printf( + "\t small insertion, 2 for small deletions, 3 for a duplication, 4 for a large deletion, \n"); + printf("\t 5 for a translocation or 6 for an inversion. \n"); + printf("\n"); + printf(" -f, --full\n"); + printf("\tfull output (otherwize synthetic output will be produced). -f option must be used with care as it may produce very large output files\n"); + + printf(" -P, --period\n"); + printf("\tperiod with which to perform the analysis\n"); + printf(" -v, --verbose\n\tbe verbose\n"); +} + + +int write_headers(FILE* output_file,bool full_output) { + // -------------------------------------- + // Write headers in robustness files + // -------------------------------------- + if (!full_output) + { + fprintf(output_file,"# ------------------------------------------------------------------\n"); + fprintf(output_file,"# Evolvability, Robustness and Antirobustness statistics for mutants\n"); + fprintf(output_file,"# ------------------------------------------------------------------\n"); + fprintf(output_file,"# \n"); + fprintf(output_file,"# 1. Generation \n"); + fprintf(output_file,"# 2. Fraction of positive mutants (2*10 is Evolvability) \n"); + fprintf(output_file,"# 3. Fraction of neutral mutants (aka reproductive robustness) \n"); + fprintf(output_file,"# 4. Fraction of neutral mutants (aka mutational robustness) \n"); + fprintf(output_file,"# 5. Fraction of negative mutants \n"); + fprintf(output_file,"# 8. Cumul of delta-gaps of positive mutants\n"); + fprintf(output_file,"# 9. Cumul of delta-gaps of negative mutants\n"); + fprintf(output_file,"# 6. Delta-gap for the best mutants \n"); + fprintf(output_file,"# 7. Delta-gap for the worst mutants \n"); + fprintf(output_file,"# 10. Cumul of delta-fitness of positive mutants (2*10 is Evolvability)\n"); + fprintf(output_file,"# 11. Cumum of delta-fitness of negative mutants\n"); + fprintf(output_file,"# 12. Delta-fitness for the best mutants\n"); + fprintf(output_file,"# 13. Delta-fitness for the worst mutants\n\n\n"); + } + else + { + fprintf(output_file,"# ------------------------------------------------------------------\n"); + fprintf(output_file,"# Evolvability, Robustness and Antirobustness statistics for mutants\n"); + fprintf(output_file,"# ------------------------------------------------------------------\n"); + fprintf(output_file,"# \n"); + fprintf(output_file,"# 1. Generation \n"); + fprintf(output_file,"# 2 to n+2. delta-fitness of each tested mutants \n\n\n"); + + } + return 0; +} diff --git a/src/post_treatments/ancestor_robustness.cpp b/src/post_treatments/ancestor_robustness.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8117851d981fe9369b3c97c609c652120cfe129a --- /dev/null +++ b/src/post_treatments/ancestor_robustness.cpp @@ -0,0 +1,375 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ============================================================================ +// Includes +// ============================================================================ +#include <list> +#include <getopt.h> + +#include "aevol.h" +#include "IndivAnalysis.h" + +using namespace aevol; + +// Helper functions +void interpret_cmd_line_options(int argc, char* argv[]); +void print_help(char* prog_path); +int write_headers(FILE* output_file,bool full_output); + +// Command-line option variables +static char* lineage_file_name = nullptr; +static int32_t nb_mutants = 1000; //< Number of mutants per individual +static int32_t begin = 0; //< First generation to analyse +static int32_t end = -1; //< Last generation to analyse +static int32_t period = 1; //< Period of analysis +static char* output_file_name = "robustness_summary.txt"; +static bool verbose = false; +static bool full_output = false; + +int main(int argc, char* argv[]) { + interpret_cmd_line_options(argc, argv); + + // ======================= + // Open the lineage file + // ======================= + gzFile lineage_file = gzopen(lineage_file_name, "r"); + if (lineage_file == Z_NULL) { + Utils::ExitWithUsrMsg(std::string("Could not read lineage file ") + + lineage_file_name + "\n"); + } + + int64_t t0 = 0; + int64_t t_end = 0; + int32_t final_indiv_index = 0; + int32_t final_indiv_rank = 0; + + gzread(lineage_file, &t0, sizeof(t0)); + gzread(lineage_file, &t_end, sizeof(t_end)); + gzread(lineage_file, &final_indiv_index, sizeof(final_indiv_index)); + gzread(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank)); + + if (verbose) { + printf("\n\n"); + printf( + "===============================================================================\n"); + printf(" Robustness of the ancestors of indiv. %" + PRId32 + " (rank %" + PRId32 + ") from time %" + PRId64 + " to %" + PRId64 + "\n", + final_indiv_index, final_indiv_rank, t0, t_end); + printf( + "================================================================================\n"); + } + + // ============================= + // Open the experience manager + // ============================= + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // The current version doesn't allow for phenotypic variation nor for + // different phenotypic targets among the grid + if (not exp_manager->world()->phenotypic_target_shared()) { + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for per grid-cell phenotypic target\n"); + } + auto phenotypicTargetHandler = + exp_manager->world()->phenotypic_target_handler(); + if (phenotypicTargetHandler->var_method() != NO_VAR) { + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for variable phenotypic targets\n"); + } + + // ========================= + // Open the output file(s) + // ========================= + + // // Create missing directories + // int status; + // status = mkdir("stats/ancestor_stats/", 0755); + // if ((status == -1) && (errno != EEXIST)) + // err(EXIT_FAILURE, "stats/ancestor_stats/"); + + FILE* output_summary = fopen(output_file_name, "w"); + if (output_summary == nullptr) { + Utils::ExitWithUsrMsg(std::string("Could not create ") + output_file_name); + } + + write_headers(output_summary,full_output); + + std::shared_ptr <JumpingMT> prng = std::make_shared<JumpingMT>(9695); + + // ============================== + // Prepare the initial ancestor + // ============================== + GridCell* grid_cell = new GridCell(lineage_file, exp_manager, nullptr); + IndivAnalysis indiv(*(grid_cell->individual())); + indiv.Evaluate(); + // indiv->compute_statistical_data(); + // indiv->compute_non_coding(); + + // ============================== + // Compute robustness of the initial ancestor + // ============================== + + if (begin == 0) { + indiv.compute_experimental_f_nu(nb_mutants, prng, output_summary, + verbose,full_output); + } + + // ========================================================================== + // Replay the mutations to get the successive ancestors and analyze them + // ========================================================================== + ReplicationReport* rep = nullptr; + + int32_t index; + + aevol::AeTime::plusplus(); + while ((time() <= t_end) && (((time() < end) || (end == -1)))) { + rep = new ReplicationReport(lineage_file, &indiv); + index = rep->id(); // who we are building... + indiv.Reevaluate(); + + if (verbose) { + printf("Ancestor at generation %" + PRId64 + " has index %" + PRId32 + "\n", time(), index); + } + + + // 2) Replay replication (create current individual's child) + GeneticUnit& gen_unit = indiv.genetic_unit_nonconst(0); + + + // For each genetic unit, replay the replication (undergo all mutations) + // TODO <david.parsons@inria.fr> disabled for multiple GUs + const auto& dnarep = rep->dna_replic_report(); + + for (const auto& mut: dnarep.HT()) + gen_unit.dna()->undergo_this_mutation(*mut); + for (const auto& mut: dnarep.rearrangements()) + { + gen_unit.dna()->undergo_this_mutation(*mut); + // 3) All the mutations have been replayed, we can now evaluate the new individual + indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + if ((time() >= begin) && ((time() < end) || (end == -1)) && + (((time() - begin) % period) == 0)) { + indiv.compute_experimental_f_nu(nb_mutants, prng, output_summary, + verbose,full_output); + } + } + for (const auto& mut: dnarep.mutations()) + { + gen_unit.dna()->undergo_this_mutation(*mut); + // 3) All the mutations have been replayed, we can now evaluate the new individual + indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + if ((time() >= begin) && ((time() < end) || (end == -1)) && + (((time() - begin) % period) == 0)) { + indiv.compute_experimental_f_nu(nb_mutants, prng, output_summary, + verbose,full_output); + } + } + // 3) All the mutations have been replayed, we can now evaluate the new individual + //GB indiv.Reevaluate(); + + // if we are between "begin" and "end" and at the correct period, compute robustness + + //GB if ((time() >= begin) && ((time() < end) || (end == -1)) && + //GB (((time() - begin) % period) == 0)) { + //GB indiv.compute_experimental_f_nu(nb_mutants, prng, output_summary, nullptr, + //GB verbose); + //GB} + delete rep; + + aevol::AeTime::plusplus(); + } + + gzclose(lineage_file); + fclose(output_summary); + delete exp_manager; + return EXIT_SUCCESS; +} + +void interpret_cmd_line_options(int argc, char* argv[]) { + const char* short_options = "hVvfn:b:e:P:o:"; + static struct option long_options[] = { + {"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"verbose", no_argument, nullptr, 'v'}, + {"full", no_argument, nullptr, 'f'}, + {"nb-mutants", required_argument, nullptr, 'n'}, + {"begin", required_argument, nullptr, 'b'}, + {"end", required_argument, nullptr, 'e'}, + {"period", required_argument, nullptr, 'P'}, + {"output", required_argument, nullptr, 'o'}, + {0, 0, 0, 0} + }; + + int option; + while ((option = getopt_long(argc, argv, short_options, long_options, + nullptr)) != -1) { + switch (option) { + case 'h' : + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V' : + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'v' : + verbose = true; + break; + case 'f' : + full_output = true; + break; + case 'b' : + begin = atol(optarg); + break; + case 'e' : + end = atol(optarg); + break; + case 'n' : + nb_mutants = atol(optarg); + break; + case 'P' : + period = atol(optarg); + break; + case 'o' : + output_file_name = new char[strlen(optarg) + 1]; + sprintf(output_file_name, "%s", optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + + // There should be only one remaining arg: the lineage file + if (optind != argc - 1) { + Utils::ExitWithUsrMsg("please specify a lineage file"); + } + + lineage_file_name = new char[strlen(argv[optind]) + 1]; + sprintf(lineage_file_name, "%s", argv[optind]); +} + +void print_help(char* prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char* prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } + else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: generate and analyse mutants for the provided lineage.\n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s LINEAGE_FILE [-b TIMESTEP] [-e TIMESTEP] [-n NB_MUTANTS] [-P PERIOD] [-o output] [-v]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -b, --begin TIMESTEP\n"); + printf("\ttimestep at which to start the analysis\n"); + printf(" -e, --end TIMESTEP\n"); + printf("\ttimestep at which to stop the analysis\n"); + printf(" -n, --nb-mutants NB_MUTANTS\n"); + printf("\tnumber of mutants to be generated\n"); + printf(" -f, --full\n"); + printf("\tfull output (otherwize synthetic output will be produced). -f option must be used with care as it may produce very large output files\n"); + printf(" -P, --period\n"); + printf("\tperiod with which to perform the analysis\n"); + printf(" -o, --output\n"); + printf("\toutput file name\n"); + printf(" -v, --verbose\n\tbe verbose\n"); +} + + + +int write_headers(FILE* output_file,bool full_output) { + // -------------------------------------- + // Write headers in robustness files + // -------------------------------------- + if (!full_output) + { + fprintf(output_file,"# ------------------------------------------------------\n"); + fprintf(output_file,"# Evolvability, Robustness and Antirobustness statistics\n"); + fprintf(output_file,"# ------------------------------------------------------\n"); + fprintf(output_file,"# \n"); + fprintf(output_file,"# 1. Generation \n"); + fprintf(output_file,"# 2. Fraction of positive offspring (2*10 is Evolvability) \n"); + fprintf(output_file,"# 3. Fraction of neutral offspring (aka reproductive robustness) \n"); + fprintf(output_file,"# 4. Fraction of neutral mutants (aka mutational robustness) \n"); + fprintf(output_file,"# 5. Fraction of negative offspring \n"); + fprintf(output_file,"# 8. Cumul of delta-gaps of positive offspring\n"); + fprintf(output_file,"# 9. Cumul of delta-gaps of negative offspring\n"); + fprintf(output_file,"# 6. Delta-gap for the best offspring \n"); + fprintf(output_file,"# 7. Delta-gap for the worst offspring \n"); + fprintf(output_file,"# 10. Cumul of delta-fitness of positive offspring (2*10 is Evolvability)\n"); + fprintf(output_file,"# 11. Cumum of delta-fitness of negative offspring\n"); + fprintf(output_file,"# 12. Delta-fitness for the best offspring\n"); + fprintf(output_file,"# 13. Delta-fitness for the worst offspring\n\n\n"); + } + else + { + fprintf(output_file,"# ------------------------------------------------------\n"); + fprintf(output_file,"# Evolvability, Robustness and Antirobustness statistics\n"); + fprintf(output_file,"# ------------------------------------------------------\n"); + fprintf(output_file,"# \n"); + fprintf(output_file,"# 1. Generation \n"); + fprintf(output_file,"# 2 to n+2. delta-fitness of each tested offspring \n\n\n"); + + } + return 0; +} diff --git a/src/post_treatments/ancestor_stats.cpp b/src/post_treatments/ancestor_stats.cpp index 86e4cac7f99048e7fb389b7afd78d515d6eccb99..d80a6f8139d662b11687c006c866944063248f81 100644 --- a/src/post_treatments/ancestor_stats.cpp +++ b/src/post_treatments/ancestor_stats.cpp @@ -284,7 +284,7 @@ int main(int argc, char* argv[]) { int l_y = index%exp_manager_backup->world()->height(); stored_indiv = exp_manager_backup->world()->indiv_at(l_x,l_y); - stored_gen_unit = stored_indiv->genetic_unit_list().cbegin(); + stored_gen_unit = &(stored_indiv->genetic_unit_nonconst(0)); } // For each genetic unit, replay the replication (undergo all mutations) @@ -398,15 +398,20 @@ int main(int argc, char* argv[]) { indiv->Evaluate(); double ** fitness_sum_local_tab_; + int16_t xx = indiv->grid_cell()->x(), yy = indiv->grid_cell()->y(); FitnessFunction fitness_function_ = exp_manager->sel()->fitness_func(); + +#ifdef __REGUL int32_t fitness_function_scope_x_ = exp_manager->sel()->fitness_function_scope_x(); int32_t fitness_function_scope_y_ = exp_manager->sel()->fitness_function_scope_y(); double* fitness_sum_tab_; int number_of_phenotypic_target_models = dynamic_cast<const Habitat_R&> (exp_manager->world()->grid(0,0)->habitat()).number_of_phenotypic_target_models(); +#endif if (fitness_function_ == FITNESS_GLOBAL_SUM) { +#ifdef __REGUL fitness_sum_tab_ = new double[number_of_phenotypic_target_models]; for (int env_id = 0; env_id < number_of_phenotypic_target_models; env_id++) { fitness_sum_tab_[env_id] = 0; @@ -415,7 +420,12 @@ int main(int argc, char* argv[]) { fitness_sum_tab_[env_id] += dynamic_cast<Individual_R*>(exp_manager->world()->indiv_at(i, j))->fitness(env_id); } } +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } else if (fitness_function_ == FITNESS_LOCAL_SUM) { +#ifdef __REGUL int16_t grid_width = exp_manager->world()->width(); int16_t grid_height = exp_manager->world()->height(); @@ -445,11 +455,15 @@ int main(int argc, char* argv[]) { } } +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif } - - int16_t cur_x = (xx + grid_width) % grid_width; - int16_t cur_y = (yy + grid_height) % grid_height; +#ifdef __REGUL + int16_t cur_x = (xx + exp_manager->world()->width()) % exp_manager->world()->width(); + int16_t cur_y = (yy + exp_manager->world()->height()) % exp_manager->world()->height(); if (fitness_function_ == FITNESS_GLOBAL_SUM) { double composed_fitness = 0; @@ -468,7 +482,11 @@ int main(int argc, char* argv[]) { composed_fitness/=number_of_phenotypic_target_models; fitmeta<<t0<<","<<"-1"<<","<<"1"<<","<<composed_fitness<<std::endl; } - +#else + printf("Fitness local sum is not supported for Aevol (only R-Aevol)\n"); + exit(-1); +#endif + indiv->Reevaluate(); indiv->compute_statistical_data(); indiv->compute_non_coding(); diff --git a/src/post_treatments/checkpoint_network_knockout.cpp b/src/post_treatments/checkpoint_network_knockout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8751faa960266bd9019c9d2bc321664581ebebb --- /dev/null +++ b/src/post_treatments/checkpoint_network_knockout.cpp @@ -0,0 +1,1120 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ================================================================= +// Libraries +// ================================================================= +#include <errno.h> +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <sys/stat.h> + +#include <list> + +#include <cstdint> +#include <fstream> +#include <limits> +#include <string> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +enum check_type +{ + FULL_CHECK = 0, + LIGHT_CHECK = 1, + NO_CHECK = 2 +}; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void extract_network_single_target_model(int time, Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); + +int main(int argc, char** argv) +{ + // The output file (lineage.ae or lineage.rae) contains the following information: + // + // - common data (ae_common::write_to_backup) + // - begin gener (int32_t) + // - end gener (int32_t) + // - final individual index (int32_t) + // - initial genome size (int32_t) + // - initial ancestor (nb genetic units + sequences) (Individual::write_to_backup) + // - replication report of ancestor at generation begin_gener+1 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+2 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+3 (ae_replic_report::write_to_backup) + // - ... + // - replication report of ancestor at generation end_gener (ae_replic_report::write_to_backup) + + + printf("\n WARNING : Parameters' change in the middle of a simulation is not managed.\n"); + + + // ===================== + // Parse command line + // ===================== + + // Default values + check_type check_genome = LIGHT_CHECK; + bool verbose = false; + int64_t t0 = 0; + int64_t t_end = -1; + int32_t final_indiv_index = -1; + int32_t final_indiv_rank = -1; + char tree_file_name[50]; + + const char * short_options = "hVvncb:i:r:e:"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"nocheck", no_argument, NULL, 'n'}, + {"fullcheck", no_argument, NULL, 'c'}, + {"begin", required_argument, NULL, 'b'}, + {"index", required_argument, NULL, 'i'}, + {"rank", required_argument, NULL, 'r'}, + {"end", required_argument, NULL, 'e' }, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + case 'n' : check_genome = NO_CHECK; break; + case 'c' : check_genome = FULL_CHECK; break; + case 'b' : t0 = atol(optarg); break; + case 'i' : final_indiv_index = atol(optarg); break; + case 'r' : final_indiv_rank = atol(optarg); break; + case 'e' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -e or --end : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + t_end = atol(optarg); + + break; + } + } + } + + // Set undefined command line parameters to default values + if (t_end == -1) { + // Set t_end to the content of the LAST_GENER file if it exists. + // If it doesn't, print help and exit + FILE* lg_file = fopen(LAST_GENER_FNAME, "r"); + if (lg_file != NULL) { + if (fscanf(lg_file, "%" PRId64, &t_end) == EOF) { + printf("ERROR: failed to read last generation from file %s\n", + LAST_GENER_FNAME); + exit(EXIT_FAILURE); + } + fclose(lg_file); + } + else { + printf("%s: error: You must provide a generation number.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + // Load the simulation + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t_end, true, false); + + World* world = exp_manager->world(); + int16_t grid_width = world->width(); + int16_t grid_height = world->height(); + // Check that the tree was recorded + if (not exp_manager->record_tree()) { + Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during " + "evolution, could not reconstruct the lineage"); + } + + int64_t tree_step = exp_manager->tree_step(); + + //delete exp_manager; + + + // The tree + Tree* tree = NULL; + + // Indices, ranks and replication reports of the individuals in the lineage + int32_t* indices = new int32_t[t_end - t0 + 1]; + //~ int32_t * ranks = new int32_t[end_gener - begin_gener + 1]; + ReplicationReport** reports = new ReplicationReport*[t_end - t0]; + // NB: we do not need the report of the ancestor at generation begin_gener + // (it might be the generation 0, for which we have no reports) + // reports[0] = how ancestor at generation begin_gener + 1 was created + // reports[i] = how ancestor at generation begin_gener + i + 1 was created + // reports[end_gener - begin_gener - 1] = how the final individual was created + // + // ----------------------------------------------------------------------------------------- + // reports | gener_0 => gener_1 | gener_1 => gener_2 | ... | gener_n-1 => gener_n | //////////////// | + // ----------------------------------------------------------------------------------------- + // indices | index at gener_0 | index at gener_1 | ... | index at gener_n-1 | index at gener_n | + // ----------------------------------------------------------------------------------------- + + + // ============================ + // Init files + // ============================ + std::ofstream network; + network.open("checkpoint_network_knockout.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"Value,"<<"Metaerror_lost,"<<"Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" + <<std::endl; + network.flush(); + network.close(); + + float filter_values[3] = {0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_filtered_" + str_filter_value + ".csv"; + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Enhancer," << "Inhibitor," << "Both," << "Value" + << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + + file_name = "checkpoint_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges," << "filter_nb_enhancing," + << "filter_nb_inhibitor," << "filter_nb_both,filter_nb_edges" << std::endl; + network.flush(); + network.close(); + } + + float filter_values_2[4] = {0.0, 0.01, 0.001, 0.005}; + + for (float filter_value : filter_values_2) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_dump_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Source," << "Destination," << "Enhancer_or_Inhibitor," << + "Value" << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + } + + network.open("checkpoint_network_knockout_single_env.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"TargetModel,"<<"Value"<<"Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent"<<std::endl; + network.flush(); + network.close(); + + + // ========================= + // Load the last tree file + // ========================= + + if (verbose) + { + printf("\n\n"); + printf("====================================\n"); + printf(" Loading the last tree file ... "); + fflush(stdout); + } + + + // Example for ae_common::rec_params->tree_step() == 100 : + // + // tree_000100.ae ==> generations 1 to 100. + // tree_000200.ae ==> generations 101 to 200. + // tree_000300.ae ==> generations 201 to 300. + // etc. + // + // Thus, the information for generation end_gener are located + // in the file called (end_gener/ae_common::rec_params->tree_step() + 1) * ae_common::rec_params->tree_step(), + // except if end_gener%ae_common::rec_params->tree_step()==0. + + #ifdef __REGUL + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + #else + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + #endif + + tree = new Tree(exp_manager, tree_file_name); + + if (verbose) + { + printf("OK\n"); + printf("====================================\n"); + } + + + // ============================================================================ + // Find the index of the final individual and retrieve its replication report + // ============================================================================ + if (final_indiv_index != -1) + { + // The index was directly provided, get the replication report and update the indices and ranks tables + reports[t_end - t0 - 1] = + new ReplicationReport(*(tree->report_by_index(t_end, + final_indiv_index))); + final_indiv_rank = reports[t_end - t0 - 1]->rank(); + + indices[t_end - t0] = final_indiv_index; + } + else + { + if (final_indiv_rank == -1) + { + // No index nor rank was given in the command line. + // By default, we construct the lineage of the best individual, the rank of which + // is simply the number of individuals in the population. + final_indiv_rank = exp_manager->nb_indivs(); + } + + // Retrieve the replication report of the individual of interest (at t_end) + reports[t_end - t0 - 1] = new ReplicationReport(*(tree->report_by_rank(t_end, final_indiv_rank))); + final_indiv_index = reports[t_end - t0 - 1]->id(); + + indices[t_end - t0] = final_indiv_index; + //~ ranks[end_gener - begin_gener] = final_indiv_rank; + } + + if (verbose) printf("The final individual has the index %" PRId32 " (rank %" PRId32 ")\n", final_indiv_index, final_indiv_rank); + + + // ======================= + // Open the output file + // ======================= + char output_file_name[101]; + + #ifdef __REGUL + snprintf(output_file_name, 100, + "lineage-b%06" PRId64 "-e%06" PRId64 "-i%" PRId32 "-r%" PRId32 ".ae", + t0, t_end, final_indiv_index, final_indiv_rank); + #else + snprintf(output_file_name, 100, + "lineage-b%06" PRId64 "-e%06" PRId64 "-i%" PRId32 "-r%" PRId32 ".ae", + t0, t_end, final_indiv_index, final_indiv_rank); + #endif + + gzFile lineage_file = gzopen(output_file_name, "w"); + if (lineage_file == NULL) + { + fprintf(stderr, "File %s could not be created, exiting.\n", output_file_name); + fprintf(stderr, "Please check your permissions in this directory.\n"); + exit(EXIT_FAILURE); + } + + + + + // =================================================== + // Retrieve the replication reports of the ancestors + // =================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("======================================================================\n"); + printf(" Parsing tree files to retrieve the ancestors' replication reports... \n"); + printf("======================================================================\n"); + } + + + // Retrieve the index of the first ancestor from the last replication report + indices[t_end - t0 -1] = reports[t_end - t0 - 1]->parent_id(); + + // For each generation (going backwards), retrieve the index of the parent and + // the corresponding replication report + for (int64_t i = t_end - t0 - 2 ; i >= 0 ; i--) + { + int64_t t = t0 + i + 1; + + // We want to fill reports[i], that is to say, how the ancestor + // at generation begin_gener + i + 1 was created + if (verbose) + printf("Getting the replication report for the ancestor at generation %" PRId64 "\n", t); + + // If we've exhausted the current tree file, load the next one + if (Utils::mod(t, tree_step) == 0) + { + // Change the tree file + delete tree; + + #ifdef __REGUL + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + #else + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + #endif + + tree = new Tree(exp_manager, tree_file_name); + } + + // Copy the replication report of the ancestor + //printf("Looking for the report %d at %d\n",t,indices[i + 1]); + //bool notfound = true; + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t, + x * grid_height + y))); + if (rep->id() == indices[i + 1]) { + reports[i] = rep; + //printf("FOUND !!\n"); + //notfound=false; + break; + } else + delete rep; + } + + //if (notfound) printf("ERROR NOT FOUND\n"); + + + + // Retreive the index and rank of the next ancestor from the report + indices[i] = reports[i]->parent_id(); + } + delete exp_manager; + + + if (verbose) printf("OK\n"); + + + // ============================================================================= + // Get the initial genome from the backup file and write it in the output file + // ============================================================================= + + if (verbose) + { + printf("\n\n\n"); + printf("=============================================== \n"); + printf(" Getting the initial genome sequence... "); + fflush(NULL); + } + + printf("Computing Network Knockout for generation %d\n",t0); + // Load the simulation + exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // Copy the initial ancestor + // NB : The list of individuals is sorted according to the index + Individual* initial_ancestor; + //= exp_manager->indiv_by_id(indices[0]); + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + if (exp_manager->world()->indiv_at(x,y)->id() == indices[0]) { + initial_ancestor = exp_manager->world()->indiv_at(x,y); + } + } + + + Individual_R* best = dynamic_cast<Individual_R*>(initial_ancestor); + best->do_transcription_translation_folding(); + + int nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + double* fabs_metaerror_loss = new double[nb_edges]; + double* fabs_fitness_loss = new double[nb_edges]; + double* fabs_metaerror_loss_percent = new double[nb_edges]; + double* fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + + int nb_iteration = 10; + printf("Running %d evals for %d edges\n",nb_iteration,nb_edges); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n",i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + fabs_metaerror_loss[i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness-best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(t0,best,fabs_metaerror_loss,fabs_fitness_loss, + fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + filter_network(t0,best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + dump_network(t0,best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + + int nb_phenotypic_target_models = dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()-> + phenotypic_target_handler())->phenotypic_target_models_.size(); + printf("Running with a single phenotypic target models : %d\n",nb_phenotypic_target_models); + + double** ptm_fabs_metaerror_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_metaerror_loss_percent = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss_percent = new double*[nb_phenotypic_target_models]; + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n",target_id); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->set_single_env(target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness-best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(t0,best,nb_phenotypic_target_models,ptm_fabs_metaerror_loss,ptm_fabs_fitness_loss,ptm_fabs_metaerror_loss_percent,ptm_fabs_fitness_loss_percent); + + + delete best; + + if (verbose) + { + printf("OK\n"); + printf("=============================================== \n"); + } + + + // =============================================================================== + // Write the replication reports of the successive ancestors in the output file + // (and, optionally, check that the rebuilt genome is correct each time a backup + // is available) + // =============================================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("============================================================ \n"); + printf(" Write the replication reports in the output file... \n"); + printf("============================================================ \n"); + } + + std::list<GeneticUnit>::const_iterator unit; + + Individual* stored_indiv = nullptr; + std::list<GeneticUnit>::const_iterator stored_gen_unit; + + ExpManager* exp_manager_backup = NULL; + + // NB: I must keep the genome encapsulated inside an Individual, because + // replaying the mutations has side effects on the list of promoters, + // which is stored in the individual + bool check_genome_now = false; + + for (int64_t i = 0 ; i < t_end - t0 ; i++) + { + // Where are we in time... + int64_t t = t0 + i + 1; + + if (Utils::mod(t, exp_manager->backup_step()) == 0) { + + printf("Computing Network Knockout for generation %d\n",t); + + // Load the simulation + exp_manager_backup = new ExpManager(); + exp_manager_backup->load(t, true, false); + + // Copy the ancestor from the backup + for (int16_t x = 0; x < grid_width; x++) + for (int16_t y = 0; y < grid_height; y++) { + if (exp_manager_backup->world()->indiv_at(x, y)->id() == indices[i + 1]) { + stored_indiv = exp_manager_backup->world()->indiv_at(x, y); + break; + } + } + + Individual_R *best = dynamic_cast<Individual_R *>(stored_indiv); + best->do_transcription_translation_folding(); + + nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges += ((Rna_R *) rna)->nb_influences(); + } + + delete[] fabs_metaerror_loss; + delete[] fabs_fitness_loss; + delete[] fabs_metaerror_loss_percent; + delete[] fabs_fitness_loss_percent; + + fabs_metaerror_loss = new double[nb_edges]; + fabs_fitness_loss = new double[nb_edges]; + fabs_metaerror_loss_percent = new double[nb_edges]; + fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + printf("Running %d evals for %d edges\n", nb_iteration, nb_edges); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n", i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + fabs_metaerror_loss[i_edges] += std::fabs( + base_metaerror - best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness - best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += + (std::fabs(base_metaerror - best->dist_to_target_by_feature(METABOLISM))) / + best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += + (std::fabs(base_fitness - best->fitness())) / best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(t,best, fabs_metaerror_loss, fabs_fitness_loss, + fabs_metaerror_loss_percent, fabs_fitness_loss_percent); + filter_network(t,best, fabs_metaerror_loss, fabs_fitness_loss, fabs_metaerror_loss_percent, + fabs_fitness_loss_percent); + dump_network(t,best, fabs_metaerror_loss, fabs_fitness_loss, fabs_metaerror_loss_percent, + fabs_fitness_loss_percent); + + printf("Running with a single phenotypic target models : %d\n", nb_phenotypic_target_models); + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + delete[] ptm_fabs_metaerror_loss[i]; + delete[] ptm_fabs_fitness_loss[i]; + delete[] ptm_fabs_metaerror_loss_percent[i]; + delete[] ptm_fabs_fitness_loss_percent[i]; + } + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n", target_id); + dynamic_cast<PhenotypicTargetHandler_R *>(exp_manager->world()->phenotypic_target_handler())->set_single_env( + target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs( + base_metaerror - best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness - best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += + (std::fabs(base_metaerror - best->dist_to_target_by_feature(METABOLISM))) / + best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += + (std::fabs(base_fitness - best->fitness())) / best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(t,best, nb_phenotypic_target_models, ptm_fabs_metaerror_loss, + ptm_fabs_fitness_loss, ptm_fabs_metaerror_loss_percent, + ptm_fabs_fitness_loss_percent); + + + delete best; + delete exp_manager_backup; + } + + } + + + gzclose(lineage_file); + delete [] reports; + delete exp_manager; + + exit(EXIT_SUCCESS); +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + // default values : + // begin_gener = 0 + // indiv = best individual at generation end_gener + + // there must be a genome backup file for begin_gener + + // not relevant if crossover + + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Lineage post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rlineage -h\n"); + printf("or : rlineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#else + printf("Usage : lineage -h\n"); + printf("or : lineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#endif + printf("\n"); +#ifdef __REGUL + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.rae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#else + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.ae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#endif + printf("\n"); + printf("WARNING: This program should not be used for simulations run with lateral\n"); + printf("transfer. When an individual has more than one parent, the notion of lineage\n"); + printf("used here is not relevant.\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome checks every <BACKUP_STEP>\n"); + printf("\t generations. Default behaviour is lighter as it\n"); + printf("\t only performs these checks at the ending generation.\n"); + printf("\n"); + printf("\t-i index or --index index : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t index is index. The index must be comprised \n"); + printf("\t between 0 and N-1, with N the size of the \n"); + printf("\t population at the ending generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-r rank or --rank rank : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t rank is rank. The rank must be comprised \n"); + printf("\t between 1 and N, with N the size of the \n"); + printf("\t population at the endind generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-b gener1 or --begin gener1 : \n"); + printf("\t Retrieve the lineage up to generation gener1.\n"); + printf("\t There must be a genome backup file for this\n"); + printf("\t generation. If not specified, the program \n"); + printf("\t retrieves the lineage up to generation 0.\n"); + printf("\n"); + printf("\t-e end_gener or --end end_gener : \n"); + printf("\t Retrieve the lineage of the individual of end_gener \n"); + printf("\t (default: that contained in file last_gener.txt, if any)\n"); + printf("\n"); +} + + + +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("checkpoint_network_knockout.csv",std::ofstream::app); + + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<time<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<time<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + i_edges++; + } + } + + network.flush(); + network.close(); +} + +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << time << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "checkpoint_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << time << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(int time, Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("checkpoint_network_knockout_single_env.csv",std::ofstream::trunc); + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} diff --git a/src/post_treatments/checkpoint_stats.cpp b/src/post_treatments/checkpoint_stats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be6bcd9f8b89ab064b2e7073b5846f8c5d95f04c --- /dev/null +++ b/src/post_treatments/checkpoint_stats.cpp @@ -0,0 +1,835 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ================================================================= +// Libraries +// ================================================================= +#include <errno.h> +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <sys/stat.h> + +#include <list> + +#include <cstdint> +#include <fstream> +#include <limits> +#include <string> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +enum check_type +{ + FULL_CHECK = 0, + LIGHT_CHECK = 1, + NO_CHECK = 2 +}; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void extract_network_single_target_model(Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); + +int main(int argc, char** argv) +{ + // The output file (lineage.ae or lineage.rae) contains the following information: + // + // - common data (ae_common::write_to_backup) + // - begin gener (int32_t) + // - end gener (int32_t) + // - final individual index (int32_t) + // - initial genome size (int32_t) + // - initial ancestor (nb genetic units + sequences) (Individual::write_to_backup) + // - replication report of ancestor at generation begin_gener+1 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+2 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+3 (ae_replic_report::write_to_backup) + // - ... + // - replication report of ancestor at generation end_gener (ae_replic_report::write_to_backup) + + + printf("\n WARNING : Parameters' change in the middle of a simulation is not managed.\n"); + + + // ===================== + // Parse command line + // ===================== + + // Default values + check_type check_genome = LIGHT_CHECK; + bool verbose = false; + int64_t t0 = 0; + int64_t t_end = -1; + int32_t final_indiv_index = -1; + int32_t final_indiv_rank = -1; + char tree_file_name[50]; + + const char * short_options = "hVvncb:i:r:e:"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"nocheck", no_argument, NULL, 'n'}, + {"fullcheck", no_argument, NULL, 'c'}, + {"begin", required_argument, NULL, 'b'}, + {"index", required_argument, NULL, 'i'}, + {"rank", required_argument, NULL, 'r'}, + {"end", required_argument, NULL, 'e' }, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + case 'n' : check_genome = NO_CHECK; break; + case 'c' : check_genome = FULL_CHECK; break; + case 'b' : t0 = atol(optarg); break; + case 'i' : final_indiv_index = atol(optarg); break; + case 'r' : final_indiv_rank = atol(optarg); break; + case 'e' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -e or --end : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + t_end = atol(optarg); + + break; + } + } + } + + // Set undefined command line parameters to default values + if (t_end == -1) { + // Set t_end to the content of the LAST_GENER file if it exists. + // If it doesn't, print help and exit + FILE* lg_file = fopen(LAST_GENER_FNAME, "r"); + if (lg_file != NULL) { + if (fscanf(lg_file, "%" PRId64, &t_end) == EOF) { + printf("ERROR: failed to read last generation from file %s\n", + LAST_GENER_FNAME); + exit(EXIT_FAILURE); + } + fclose(lg_file); + } + else { + printf("%s: error: You must provide a generation number.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + // Load the simulation + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t_end, true, false); + + World* world = exp_manager->world(); + int16_t grid_width = world->width(); + int16_t grid_height = world->height(); + // Check that the tree was recorded + if (not exp_manager->record_tree()) { + Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during " + "evolution, could not reconstruct the lineage"); + } + + int64_t tree_step = exp_manager->tree_step(); + + //delete exp_manager; + + + // The tree + Tree* tree = NULL; + + // Indices, ranks and replication reports of the individuals in the lineage + int32_t* indices = new int32_t[t_end - t0 + 1]; + //~ int32_t * ranks = new int32_t[end_gener - begin_gener + 1]; + ReplicationReport** reports = new ReplicationReport*[t_end - t0]; + // NB: we do not need the report of the ancestor at generation begin_gener + // (it might be the generation 0, for which we have no reports) + // reports[0] = how ancestor at generation begin_gener + 1 was created + // reports[i] = how ancestor at generation begin_gener + i + 1 was created + // reports[end_gener - begin_gener - 1] = how the final individual was created + // + // ----------------------------------------------------------------------------------------- + // reports | gener_0 => gener_1 | gener_1 => gener_2 | ... | gener_n-1 => gener_n | //////////////// | + // ----------------------------------------------------------------------------------------- + // indices | index at gener_0 | index at gener_1 | ... | index at gener_n-1 | index at gener_n | + // ----------------------------------------------------------------------------------------- + + + // ============================ + // Init files + // ============================ + std::ofstream network; + network.open("checkpoint_stats.csv",std::ofstream::trunc); + network<<"Generation,"<<"genome_size,"<<"Fitness,"<<"Metaerror,"<<"nb_func_genes,"<<"nb_non_func_genes," + <<"nb_coding_RNAs,"<<"nb_non_coding_RNAs,"<<"nb_links,"<<"nb_enhancing_links,"<<"nb_operating_links," + <<std::endl; + + + + // ========================= + // Load the last tree file + // ========================= + + if (verbose) + { + printf("\n\n"); + printf("====================================\n"); + printf(" Loading the last tree file ... "); + fflush(stdout); + } + + + // Example for ae_common::rec_params->tree_step() == 100 : + // + // tree_000100.ae ==> generations 1 to 100. + // tree_000200.ae ==> generations 101 to 200. + // tree_000300.ae ==> generations 201 to 300. + // etc. + // + // Thus, the information for generation end_gener are located + // in the file called (end_gener/ae_common::rec_params->tree_step() + 1) * ae_common::rec_params->tree_step(), + // except if end_gener%ae_common::rec_params->tree_step()==0. + + #ifdef __REGUL + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + #else + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + #endif + + tree = new Tree(exp_manager, tree_file_name); + + if (verbose) + { + printf("OK\n"); + printf("====================================\n"); + } + + + // ============================================================================ + // Find the index of the final individual and retrieve its replication report + // ============================================================================ + if (final_indiv_index != -1) + { + // The index was directly provided, get the replication report and update the indices and ranks tables + reports[t_end - t0 - 1] = + new ReplicationReport(*(tree->report_by_index(t_end, + final_indiv_index))); + final_indiv_rank = reports[t_end - t0 - 1]->rank(); + + indices[t_end - t0] = final_indiv_index; + } + else + { + if (final_indiv_rank == -1) + { + // No index nor rank was given in the command line. + // By default, we construct the lineage of the best individual, the rank of which + // is simply the number of individuals in the population. + final_indiv_rank = exp_manager->nb_indivs(); + } + + // Retrieve the replication report of the individual of interest (at t_end) + reports[t_end - t0 - 1] = new ReplicationReport(*(tree->report_by_rank(t_end, final_indiv_rank))); + final_indiv_index = reports[t_end - t0 - 1]->id(); + + indices[t_end - t0] = final_indiv_index; + //~ ranks[end_gener - begin_gener] = final_indiv_rank; + } + + if (verbose) printf("The final individual has the index %" PRId32 " (rank %" PRId32 ")\n", final_indiv_index, final_indiv_rank); + + + // ======================= + // Open the output file + // ======================= + char output_file_name[101]; + + #ifdef __REGUL + snprintf(output_file_name, 100, + "lineage-b%06" PRId64 "-e%06" PRId64 "-i%" PRId32 "-r%" PRId32 ".ae", + t0, t_end, final_indiv_index, final_indiv_rank); + #else + snprintf(output_file_name, 100, + "lineage-b%06" PRId64 "-e%06" PRId64 "-i%" PRId32 "-r%" PRId32 ".ae", + t0, t_end, final_indiv_index, final_indiv_rank); + #endif + + gzFile lineage_file = gzopen(output_file_name, "w"); + if (lineage_file == NULL) + { + fprintf(stderr, "File %s could not be created, exiting.\n", output_file_name); + fprintf(stderr, "Please check your permissions in this directory.\n"); + exit(EXIT_FAILURE); + } + + + + + // =================================================== + // Retrieve the replication reports of the ancestors + // =================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("======================================================================\n"); + printf(" Parsing tree files to retrieve the ancestors' replication reports... \n"); + printf("======================================================================\n"); + } + + + // Retrieve the index of the first ancestor from the last replication report + indices[t_end - t0 -1] = reports[t_end - t0 - 1]->parent_id(); + + // For each generation (going backwards), retrieve the index of the parent and + // the corresponding replication report + for (int64_t i = t_end - t0 - 2 ; i >= 0 ; i--) + { + int64_t t = t0 + i + 1; + + // We want to fill reports[i], that is to say, how the ancestor + // at generation begin_gener + i + 1 was created + if (verbose) + printf("Getting the replication report for the ancestor at generation %" PRId64 "\n", t); + + // If we've exhausted the current tree file, load the next one + if (Utils::mod(t, tree_step) == 0) + { + // Change the tree file + delete tree; + + #ifdef __REGUL + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + #else + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + #endif + + tree = new Tree(exp_manager, tree_file_name); + } + + // Copy the replication report of the ancestor + //printf("Looking for the report %d at %d\n",t,indices[i + 1]); + //bool notfound = true; + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t, + x * grid_height + y))); + if (rep->id() == indices[i + 1]) { + reports[i] = rep; + //printf("FOUND !!\n"); + //notfound=false; + break; + } else + delete rep; + } + + //if (notfound) printf("ERROR NOT FOUND\n"); + + + + // Retreive the index and rank of the next ancestor from the report + indices[i] = reports[i]->parent_id(); + } + delete exp_manager; + + + if (verbose) printf("OK\n"); + + + // ============================================================================= + // Get the initial genome from the backup file and write it in the output file + // ============================================================================= + + if (verbose) + { + printf("\n\n\n"); + printf("=============================================== \n"); + printf(" Getting the initial genome sequence... "); + fflush(NULL); + } + + printf("Computing stats for generation %d\n",t0); + // Load the simulation + exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // Copy the initial ancestor + // NB : The list of individuals is sorted according to the index + Individual* initial_ancestor; + //= exp_manager->indiv_by_id(indices[0]); + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + if (exp_manager->world()->indiv_at(x,y)->id() == indices[0]) { + initial_ancestor = exp_manager->world()->indiv_at(x,y); + } + } + + + Individual_R* best = dynamic_cast<Individual_R*>(initial_ancestor); + best->do_transcription_translation_folding(); + + best->evaluated_ = false; + best->Evaluate(); + + + int nb_edges = 0, nb_edges_enhance = 0, nb_edges_operating=0; + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + nb_edges ++; + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + + } + + + network<<t0<<","<<best->amount_of_dna()<<","<<best->fitness()<<","<<best->dist_to_target_by_feature(METABOLISM)<<"," + <<best->nb_functional_genes()<<","<<best->nb_non_functional_genes()<<"," + <<best->nb_coding_RNAs()<<","<<best->nb_non_coding_RNAs()<<","<<nb_edges<<","<<nb_edges_enhance<<","<< + nb_edges_operating<<std::endl; + + delete best; + + if (verbose) + { + printf("OK\n"); + printf("=============================================== \n"); + } + + + // =============================================================================== + // Write the replication reports of the successive ancestors in the output file + // (and, optionally, check that the rebuilt genome is correct each time a backup + // is available) + // =============================================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("============================================================ \n"); + printf(" Write the replication reports in the output file... \n"); + printf("============================================================ \n"); + } + + std::list<GeneticUnit>::const_iterator unit; + + Individual* stored_indiv = nullptr; + std::list<GeneticUnit>::const_iterator stored_gen_unit; + + ExpManager* exp_manager_backup = NULL; + + // NB: I must keep the genome encapsulated inside an Individual, because + // replaying the mutations has side effects on the list of promoters, + // which is stored in the individual + bool check_genome_now = false; + + for (int64_t i = 0 ; i < t_end - t0 ; i++) + { + // Where are we in time... + int64_t t = t0 + i + 1; + + if (Utils::mod(t, exp_manager->backup_step()) == 0) { + + printf("Computing Network Knockout for generation %d\n",t); + + // Load the simulation + exp_manager_backup = new ExpManager(); + exp_manager_backup->load(t, true, false); + + // Copy the ancestor from the backup + for (int16_t x = 0; x < grid_width; x++) + for (int16_t y = 0; y < grid_height; y++) { + if (exp_manager_backup->world()->indiv_at(x, y)->id() == indices[i + 1]) { + stored_indiv = exp_manager_backup->world()->indiv_at(x, y); + break; + } + } + + Individual_R *best = dynamic_cast<Individual_R *>(stored_indiv); + best->do_transcription_translation_folding(); + + best->evaluated_ = false; + best->Evaluate(); + + + network<<t<<","<<best->amount_of_dna()<<","<<best->fitness()<<","<<best->dist_to_target_by_feature(METABOLISM)<<"," + <<best->nb_functional_genes()<<","<<best->nb_non_functional_genes()<<"," + <<best->nb_coding_RNAs()<<","<<best->nb_non_coding_RNAs()<<","<<nb_edges<<","<<nb_edges_enhance<<","<< + nb_edges_operating<<std::endl; + + best->clear_everything_except_dna_and_promoters(); + best->genetic_unit_list_nonconst().clear(); + + delete best; + delete exp_manager_backup; + } + + } + + network.flush(); + network.close(); + + gzclose(lineage_file); + delete [] reports; + delete exp_manager; + + exit(EXIT_SUCCESS); +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + // default values : + // begin_gener = 0 + // indiv = best individual at generation end_gener + + // there must be a genome backup file for begin_gener + + // not relevant if crossover + + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Lineage post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rlineage -h\n"); + printf("or : rlineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#else + printf("Usage : lineage -h\n"); + printf("or : lineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#endif + printf("\n"); +#ifdef __REGUL + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.rae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#else + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.ae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#endif + printf("\n"); + printf("WARNING: This program should not be used for simulations run with lateral\n"); + printf("transfer. When an individual has more than one parent, the notion of lineage\n"); + printf("used here is not relevant.\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome checks every <BACKUP_STEP>\n"); + printf("\t generations. Default behaviour is lighter as it\n"); + printf("\t only performs these checks at the ending generation.\n"); + printf("\n"); + printf("\t-i index or --index index : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t index is index. The index must be comprised \n"); + printf("\t between 0 and N-1, with N the size of the \n"); + printf("\t population at the ending generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-r rank or --rank rank : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t rank is rank. The rank must be comprised \n"); + printf("\t between 1 and N, with N the size of the \n"); + printf("\t population at the endind generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-b gener1 or --begin gener1 : \n"); + printf("\t Retrieve the lineage up to generation gener1.\n"); + printf("\t There must be a genome backup file for this\n"); + printf("\t generation. If not specified, the program \n"); + printf("\t retrieves the lineage up to generation 0.\n"); + printf("\n"); + printf("\t-e end_gener or --end end_gener : \n"); + printf("\t Retrieve the lineage of the individual of end_gener \n"); + printf("\t (default: that contained in file last_gener.txt, if any)\n"); + printf("\n"); +} + + + +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("checkpoint_network_knockout.csv",std::ofstream::app); + + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<aevol::AeTime::time()<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<aevol::AeTime::time()<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + i_edges++; + } + } + + network.flush(); + network.close(); +} + +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << aevol::AeTime::time() << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "checkpoint_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << aevol::AeTime::time() << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.01, 0.001, 0.005}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "checkpoint_network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_metaerror_loss[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("checkpoint_network_knockout_single_env.csv",std::ofstream::trunc); + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << aevol::AeTime::time() << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} diff --git a/src/post_treatments/coalescence.cpp b/src/post_treatments/coalescence.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19b600d6d8b02bfee1878eff5d3f14c1326b1bf7 --- /dev/null +++ b/src/post_treatments/coalescence.cpp @@ -0,0 +1,411 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ================================================================= +// Libraries +// ================================================================= +#include <errno.h> +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <sys/stat.h> + +#include <list> +#include <vector> +#include <unordered_map> +#include <algorithm> +#include <iostream> +#include <fstream> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); + +int main(int argc, char** argv) +{ + printf("\n WARNING : Parameters' change in the middle of a simulation is not managed.\n"); + + + // ===================== + // Parse command line + // ===================== + + // Default values + bool verbose = false; + + char tree_file_name[50]; + + const char * short_options = "hVv::"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + //case 'n' : check_genome = NO_CHECK; break; + //case 'c' : check_genome = FULL_CHECK; break; + //case 'b' : t0 = atol(optarg); break; + //case 'i' : final_indiv_index = atol(optarg); break; + //case 'r' : final_indiv_rank = atol(optarg); break; + } + } + + // There should be only one remaining arg: the lineage file + if (optind != argc - 1) { + Utils::ExitWithUsrMsg("please specify a lineage file"); + } + + + + + char* lineage_file_name = new char[strlen(argv[optind]) + 1]; + sprintf(lineage_file_name, "%s", argv[optind]); + + // ======================= + // Open the lineage file + // ======================= + gzFile lineage_file = gzopen(lineage_file_name, "r"); + if (lineage_file == Z_NULL) { + fprintf(stderr, "ERROR : Could not read the lineage file %s\n", lineage_file_name); + exit(EXIT_FAILURE); + } + + int64_t t0 = 0; + int64_t t_end = 0; + int32_t final_indiv_index = 0; + int32_t final_indiv_rank = 0; + + gzread(lineage_file, &t0, sizeof(t0)); + gzread(lineage_file, &t_end, sizeof(t_end)); + gzread(lineage_file, &final_indiv_index, sizeof(final_indiv_index)); + gzread(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank)); + + + + + // Load the simulation + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // Check that the tree was recorded + if (not exp_manager->record_tree()) { + Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during " + "evolution, could not reconstruct the lineage"); + } + + int64_t tree_step = exp_manager->tree_step(); + + //delete exp_manager; + + + // The tree + Tree* tree = NULL; + + + // ========================= + // Load the last tree file + // ========================= + + GridCell* grid_cell = new GridCell(lineage_file, exp_manager, nullptr); + auto* indiv = grid_cell->individual(); + int32_t index = indiv->id(); + + + int nb_muts = 0; + + ReplicationReport* rep_f = nullptr; + + World* world = exp_manager->world(); + int16_t grid_width = world->width(); + int16_t grid_height = world->height(); + unsigned int pop_size = grid_height * grid_width; + + + std::vector<int> coalescence_time; + coalescence_time.resize(t_end); + + aevol::AeTime::set_time(t0); + std::ofstream coalescence_file; + coalescence_file.open("coalescence.csv",std::ofstream::trunc); + coalescence_file<<"Generation,"<<"Coalescence"<<std::endl; + + map<int,Tree*> map_tree; + + while (time() < t_end) + { + if (verbose) + printf("Computing Coalescence at generation %" PRId64 + " for the lineage (index %" PRId32 ")...", time(), index); + + if (time() != t0) { + rep_f = new ReplicationReport(lineage_file, indiv); + index = rep_f->id(); // who we are building... + //printf("Update index %d\n",index); + + + // For each genetic unit, replay the replication (undergo all mutations) + const auto &dnarep = rep_f->dna_replic_report(); + + nb_muts = dnarep.rearrangements().size() + dnarep.mutations().size(); + } + + if (nb_muts >= 1) { + // Search the coalescence time for this individual + + std::vector<int> previous; + previous.push_back(index); + std::vector<int> current; + + bool coal_found = false; + int coal_time = 1; + int64_t local_time = time()+1; + + for (auto t : map_tree) { + if (t.first <= time()) { + delete t.second; + + map_tree.erase(t.first); + } + } + + delete map_tree[time()]; + + if (map_tree.find(((int) ((local_time - 1) / tree_step) + 1) * tree_step) == map_tree.end()) { + sprintf(tree_file_name, "tree/tree_" + TIMESTEP_FORMAT + ".ae", ((int) ((local_time - 1) / tree_step) + 1) * tree_step); + map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step] = new Tree(exp_manager, tree_file_name); + tree = map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step]; + + printf("Loading tree %d\n",((int) ((local_time - 1) / tree_step) + 1) * tree_step); + } else + tree = map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step]; + + while (!coal_found) { + if (local_time >= t_end) + break; + + + if (Utils::mod(local_time-1, tree_step) == 0) { + + if (map_tree.find(((int) ((local_time - 1) / tree_step) + 1) * tree_step) == map_tree.end()) { + sprintf(tree_file_name, "tree/tree_" + TIMESTEP_FORMAT + ".ae", ((int) ((local_time - 1) / tree_step) + 1) * tree_step); + map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step] = new Tree(exp_manager, tree_file_name); + tree = map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step]; + printf("Loading tree %d\n",((int) ((local_time - 1) / tree_step) + 1) * tree_step); + } else { + tree = map_tree[((int) ((local_time - 1) / tree_step) + 1) * tree_step]; + } + } + + + std::map<int32_t, ReplicationReport*> reports = tree->reports(local_time); + + #pragma omp parallel for + for (int i = 0; i < pop_size; i++) { + ReplicationReport* rep = new ReplicationReport(*(reports[i])); + + auto foundPrevious = std::find(previous.begin(),previous.end(),rep->parent_id()); + + if ( foundPrevious != previous.end() ) { + #pragma omp critical + { + current.push_back(rep->id()); + } + } + + delete rep; + } + + + if (current.size() == pop_size) { + coalescence_time[time()] = coal_time; + coal_found = true; + } else { + local_time++; + coal_time++; + previous.swap(current); + current.clear(); + } + + + } + + + + delete rep_f; + + coalescence_file<<AeTime::time()<<","<<coalescence_time[AeTime::time()]<<std::endl; + } + + aevol::AeTime::plusplus(); + if (verbose) printf(" OK\n"); + } + + + + + +// for (int gen = 0; gen < t_end; gen++) { +// +// } + + coalescence_file.flush(); + coalescence_file.close(); + + //delete exp_manager; + + free(lineage_file_name); + + exit(EXIT_SUCCESS); +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + // default values : + // begin_gener = 0 + // indiv = best individual at generation end_gener + + // there must be a genome backup file for begin_gener + + // not relevant if crossover + + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Lineage post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rlineage -h\n"); + printf("or : rlineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#else + printf("Usage : lineage -h\n"); + printf("or : lineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#endif + printf("\n"); +#ifdef __REGUL + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.rae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#else + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.ae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#endif + printf("\n"); + printf("WARNING: This program should not be used for simulations run with lateral\n"); + printf("transfer. When an individual has more than one parent, the notion of lineage\n"); + printf("used here is not relevant.\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome checks every <BACKUP_STEP>\n"); + printf("\t generations. Default behaviour is lighter as it\n"); + printf("\t only performs these checks at the ending generation.\n"); + printf("\n"); + printf("\t-i index or --index index : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t index is index. The index must be comprised \n"); + printf("\t between 0 and N-1, with N the size of the \n"); + printf("\t population at the ending generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-r rank or --rank rank : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t rank is rank. The rank must be comprised \n"); + printf("\t between 1 and N, with N the size of the \n"); + printf("\t population at the endind generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-b gener1 or --begin gener1 : \n"); + printf("\t Retrieve the lineage up to generation gener1.\n"); + printf("\t There must be a genome backup file for this\n"); + printf("\t generation. If not specified, the program \n"); + printf("\t retrieves the lineage up to generation 0.\n"); + printf("\n"); + printf("\t-e end_gener or --end end_gener : \n"); + printf("\t Retrieve the lineage of the individual of end_gener \n"); + printf("\t (default: that contained in file last_gener.txt, if any)\n"); + printf("\n"); +} diff --git a/src/post_treatments/extract.cpp b/src/post_treatments/extract.cpp index b1217f785e866af0f8828563d469f368431c9fc7..22d0012a3593b1f3535545c50a55a2332286520c 100644 --- a/src/post_treatments/extract.cpp +++ b/src/post_treatments/extract.cpp @@ -230,47 +230,47 @@ inline void analyse_gu(GeneticUnit* gen_unit, int32_t gen_unit_number, void print_help(char* prog_path) { - // Get the program file-name in prog_name (strip prog_path of the path) - char* prog_name; // No new, it will point to somewhere inside prog_path - if ((prog_name = strrchr(prog_path, '/'))) prog_name++; - else prog_name = prog_path; - - printf("******************************************************************************\n"); - printf("* *\n"); - printf("* aevol - Artificial Evolution *\n"); - printf("* *\n"); - printf("* Aevol is a simulation platform that allows one to let populations of *\n"); - printf("* digital organisms evolve in different conditions and study experimentally *\n"); - printf("* the mechanisms responsible for the structuration of the genome and the *\n"); - printf("* transcriptome. *\n"); - printf("* *\n"); - printf("******************************************************************************\n"); - printf("\n"); - printf("%s:\n", prog_name); - printf("\tExtracts the genotype and/or data about the phenotype of individuals\n"); - printf("\tin the provided population and write them into text files easy to parse\n"); - printf("\twith e.g. matlab.\n"); - printf("\n"); - printf("Usage : %s -h\n", prog_name); - printf(" or : %s -V or --version\n", prog_name); - printf(" or : %s [-t TIMESTEP] [-S SEQ_FILE] [-T TRIANGLE_FILE] [-U NUM_GU] [-a]\n", - prog_name); - printf("\nOptions\n"); - printf(" -h, --help\n\tprint this help, then exit\n"); - printf(" -V, --version\n\tprint version number, then exit\n"); - printf(" -t TIMESTEP\n"); - printf("\tspecify timestep of the individual(s) of interest\n"); - printf(" -S SEQ_FILE\n"); - printf("\textract sequences into file SEQ_FILE\n"); - printf(" -T TRIANGLE_FILE\n"); - printf("\textract phenotypic data into file TRIANGLE_FILE\n"); - printf(" -U NUM_GU\n"); - printf("\tonly treat genetic unit #NUM_GU (default: treat all genetic units)\n"); - printf(" -a\n"); - printf("\ttreat all the individuals (default: treat only the best)\n"); -} - - printf("\n\ + // Get the program file-name in prog_name (strip prog_path of the path) + char *prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) prog_name++; + else prog_name = prog_path; + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s:\n", prog_name); + printf("\tExtracts the genotype and/or data about the phenotype of individuals\n"); + printf("\tin the provided population and write them into text files easy to parse\n"); + printf("\twith e.g. matlab.\n"); + printf("\n"); + printf("Usage : %s -h\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s [-t TIMESTEP] [-S SEQ_FILE] [-T TRIANGLE_FILE] [-U NUM_GU] [-a]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -t TIMESTEP\n"); + printf("\tspecify timestep of the individual(s) of interest\n"); + printf(" -S SEQ_FILE\n"); + printf("\textract sequences into file SEQ_FILE\n"); + printf(" -T TRIANGLE_FILE\n"); + printf("\textract phenotypic data into file TRIANGLE_FILE\n"); + printf(" -U NUM_GU\n"); + printf("\tonly treat genetic unit #NUM_GU (default: treat all genetic units)\n"); + printf(" -a\n"); + printf("\ttreat all the individuals (default: treat only the best)\n"); + + + printf("\n\ This program extracts some data about the individuals and write\n\ them into text files easy to parse with e.g. matlab.\n\ \n\ @@ -312,6 +312,7 @@ seq_020000_best :\n\ \n\ extract -b -r 20000 -s seq_020000_best\n\ or extract -b -p populations/pop_020000.ae -s seq_020000_best\n"); +} void interpret_cmd_line_options(int argc, char* argv[]) { // Define allowed options diff --git a/src/post_treatments/extract_network.cpp b/src/post_treatments/extract_network.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e585e6a7d4e2c9c6c158d6bbdbc5c545a8839cdf --- /dev/null +++ b/src/post_treatments/extract_network.cpp @@ -0,0 +1,162 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons, Jonathan Rouzaud-Cornabas +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + + + + +// ================================================================= +// Libraries +// ================================================================= +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> + +#include <cstdint> +#include <fstream> +#include <limits> + +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(Individual_R* indiv); + +int main(int argc, char* argv[]) +{ + // Initialize command-line option variables with default values + bool best_only = true; + int32_t num_gener = -1; + + // 2) Define allowed options + const char * options_list = ":hVv:b:g:"; + static struct option long_options_list[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V' }, + {"best", no_argument, NULL, 'b' }, + {"gener", required_argument, NULL, 'g' }, + { 0, 0, 0, 0 } + }; + + // 3) Get actual values of the command-line options + int option; + while ((option = getopt_long(argc, argv, options_list, long_options_list, NULL)) != -1) + { + switch (option) + { + case 'h' : + { + //print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'b' : + { + best_only = true; + break; + } + case 'g' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -g or --gener : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + num_gener = atol(optarg); + + break; + } + } + } + + // If num_gener is not provided, assume last gener + if (num_gener == -1) { + num_gener = OutputManager::last_gener(); + } + + // Open the files + auto exp_manager = new ExpManager(); + exp_manager->load(num_gener, false, false); + + + // The best individual is already known because it is the last in the list + // Thus we do not need to know anything about the environment and to evaluate the individuals + + // Parse the individuals + if (best_only) + { + Individual_R* best = dynamic_cast<Individual_R*>(exp_manager->best_indiv()); + best->do_transcription_translation_folding(); // We need to recompute proteins if not already done (ie if using a population file and not a full backup) + extract_network(best); + } + else + { + for (const auto& indiv: exp_manager->indivs()) { + indiv->do_transcription_translation_folding(); + extract_network(dynamic_cast<Individual_R*>(indiv)); + } + } + + delete exp_manager; + + return EXIT_SUCCESS; +} + +void extract_network(Individual_R* indiv) { + std::ofstream network; + network.open("network.csv",std::ofstream::trunc); + network<<"Individual,"<<"Enhancer_or_Inhibitor,"<<"Value"<<std::endl; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<indiv->id()<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<indiv->id()<<",0,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<std::endl; + } + } + } + + network.flush(); + network.close(); +} \ No newline at end of file diff --git a/src/post_treatments/lucas_network_knockout.cpp b/src/post_treatments/lucas_network_knockout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fc345947500844d361c776555ca5c06d444cafc1 --- /dev/null +++ b/src/post_treatments/lucas_network_knockout.cpp @@ -0,0 +1,1132 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ================================================================= +// Libraries +// ================================================================= +#include <errno.h> +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <sys/stat.h> + +#include <list> +#include <vector> +#include <unordered_map> +#include <algorithm> +#include <iostream> +#include <fstream> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +class Node { + public: + Node(unsigned long long lid) { id = lid; }; + + unsigned long long id; + std::unordered_map<unsigned long long, Node*> next_nodes; + Node* root = nullptr; + int dist_to_parent = 0; + bool to_delete = false; + bool is_last = false; + std::string nhx = ""; +}; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void regul_or_not(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, + double* fabs_fitness_loss_percent); +void extract_network_single_target_model(int time, Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); +void shuffle_randomized(int time, Individual_R* indiv, double fabs_metaerror_loss, double fabs_fitness_loss, + double fabs_metaerror_loss_percent, double fabs_fitness_loss_percent); + +int main(int argc, char** argv) +{ + // The output file (lineage.ae or lineage.rae) contains the following information: + // + // - common data (ae_common::write_to_backup) + // - begin gener (int32_t) + // - end gener (int32_t) + // - final individual index (int32_t) + // - initial genome size (int32_t) + // - initial ancestor (nb genetic units + sequences) (Individual::write_to_backup) + // - replication report of ancestor at generation begin_gener+1 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+2 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+3 (ae_replic_report::write_to_backup) + // - ... + // - replication report of ancestor at generation end_gener (ae_replic_report::write_to_backup) + + + printf("\n WARNING : Parameters' change in the middle of a simulation is not managed.\n"); + + + // ===================== + // Parse command line + // ===================== + + // Default values + //check_type check_genome = LIGHT_CHECK; + bool verbose = false; + int64_t t0 = 0; + int64_t t_end = -1; + + char tree_file_name[50]; + + const char * short_options = "hVv:e:"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"end", required_argument, NULL, 'e'}, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + //case 'n' : check_genome = NO_CHECK; break; + //case 'c' : check_genome = FULL_CHECK; break; + case 'b' : t0 = atol(optarg); break; + //case 'i' : final_indiv_index = atol(optarg); break; + //case 'r' : final_indiv_rank = atol(optarg); break; + case 'e' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -e or --end : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + t_end = atol(optarg); + + break; + } + } + } + + //verbose=true; + + // Set undefined command line parameters to default values + if (t_end == -1) { + // Set t_end to the content of the LAST_GENER file if it exists. + // If it doesn't, print help and exit + FILE* lg_file = fopen(LAST_GENER_FNAME, "r"); + if (lg_file != NULL) { + if (fscanf(lg_file, "%" PRId64, &t_end) == EOF) { + printf("ERROR: failed to read last generation from file %s\n", + LAST_GENER_FNAME); + exit(EXIT_FAILURE); + } + fclose(lg_file); + } + else { + printf("%s: error: You must provide a generation number.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + // printf("Loading at generation %d\n",t_end); + + // Load the simulation + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t_end, false, false); + + // Check that the tree was recorded + if (not exp_manager->record_tree()) { + Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during " + "evolution, could not reconstruct the lineage"); + } + + int64_t tree_step = exp_manager->tree_step(); + + //delete exp_manager; + + + // The tree + Tree* tree = NULL; + + // ============================ + // Init files + // ============================ + std::ofstream network; + network.open("lucas_network_knockout.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"Value,"<<"Metaerror_lost,"<<"Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" + <<std::endl; + network.flush(); + network.close(); + + network.open("lucas_network_knockout_shuffle_randomized.csv",std::ofstream::trunc); + network<<"Generation,"<<"Metaerror_lost,"<<"Fitness_lost,"<<"Metaerror_lost_percent,"<<"Fitness_lost_percent" + <<std::endl; + network.flush(); + network.close(); + + float filter_values[3] = {0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_filtered_" + str_filter_value + ".csv"; + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Enhancer," << "Inhibitor," << "Both," << "Value" + << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + + file_name = "lucas_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges," << "filter_nb_enhancing," + << "filter_nb_inhibitor," << "filter_nb_both,filter_nb_edges" << std::endl; + network.flush(); + network.close(); + } + + float filter_values_a[4] = {0.0, 0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values_a) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_regul_or_not_" + str_filter_value + ".csv"; + network.open(file_name, std::ofstream::trunc); + network << "Generation," + << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + + file_name = "lucas_network_regul_or_not_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges," << "filter_nb_enhancing," + << "filter_nb_inhibitor," << "filter_nb_both,filter_nb_edges" << std::endl; + network.flush(); + network.close(); + } + + float filter_values_2[4] ={0.0, 0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values_2) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_dump_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Generation," << "Source," << "Destination," << "Enhancer_or_Inhibitor," << + "Value" << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + network.flush(); + network.close(); + } + + network.open("lucas_network_knockout_single_env.csv",std::ofstream::trunc); + network<<"Generation,"<<"Enhancer_or_Inhibitor,"<<"TargetModel,"<<"Value"<<"Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent"<<std::endl; + network.flush(); + network.close(); + + // ========================= + // Load the last tree file + // ========================= + + if (verbose) + { + printf("\n\n"); + printf("====================================\n"); + printf(" Loading the last tree file ... "); + fflush(stdout); + } + + + // Example for ae_common::rec_params->tree_step() == 100 : + // + // tree_000100.ae ==> generations 1 to 100. + // tree_000200.ae ==> generations 101 to 200. + // tree_000300.ae ==> generations 201 to 300. + // etc. + // + // Thus, the information for generation end_gener are located + // in the file called (end_gener/ae_common::rec_params->tree_step() + 1) * ae_common::rec_params->tree_step(), + // except if end_gener%ae_common::rec_params->tree_step()==0. + + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + + tree = new Tree(exp_manager, tree_file_name); + + if (verbose) + { + printf("OK\n"); + printf("====================================\n"); + } + + + World* world = exp_manager->world(); + int16_t grid_width = world->width(); + int16_t grid_height = world->height(); + int32_t pop_size = grid_height * grid_width; + + int32_t lucas = t_end; + + // ============================================================================ + // Find the index of the final individual and retrieve its replication report + // ============================================================================ + std::set<unsigned long long> current; + + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t_end, + x * grid_height + y))); + + current.insert(rep->parent_id()); + } + + // ======================= + // Open the output file + // ======================= + + + + // =================================================== + // Retrieve the replication reports of the ancestors + // =================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("======================================================================\n"); + printf(" Parsing tree files to retrieve the ancestors' replication reports... \n"); + printf("======================================================================\n"); + } + + // For each generation (going backwards), retrieve the index of the parent and + // the corresponding replication report + for (int64_t t = t_end - 1 ; t > 0 ; t--) + { + + if (current.size() == 1 && (Utils::mod(t, exp_manager->backup_step()) == 0)) { + + Individual* initial_ancestor = exp_manager->best_indiv(); + printf("LUCAS FOUND at %d (%d)\n",t,(*current.begin())); + + ExpManager* exp_manager_2 = new ExpManager(); + exp_manager_2->load(t, false, false); + + //= exp_manager->indiv_by_id(indices[0]); + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + if (exp_manager_2->world()->indiv_at(x,y)->id() == (*current.begin())) { + initial_ancestor = exp_manager_2->world()->indiv_at(x,y); + break; + } + } + + delete exp_manager; + exp_manager = exp_manager_2; + + printf("-------------> Loading BEST lucas\n"); + Individual_R* best = dynamic_cast<Individual_R*>(initial_ancestor); + //best->clear_everything_except_dna_and_promoters(); + best->do_transcription_translation_folding(); + + int nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + double* fabs_metaerror_loss = new double[nb_edges]; + double* fabs_fitness_loss = new double[nb_edges]; + double* fabs_metaerror_loss_percent = new double[nb_edges]; + double* fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + + int nb_iteration = 10; +/* best->evaluated_ = false; + best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat());*/ + + printf("Running %d evals for %d edges : %e (%d %d) %e\n",nb_iteration,nb_edges,best->fitness(),best->amount_of_dna(), + best->protein_list().size(),best->dist_to_target_by_feature(METABOLISM)); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n",i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + //best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat()); + best->Evaluate(); + fabs_metaerror_loss[i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness-best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(t,best,fabs_metaerror_loss,fabs_fitness_loss, + fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + filter_network(t,best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + dump_network(t,best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + + /*** regul or not ***/ + + double* rornot_fabs_metaerror_loss = new double[4]; + double* rornot_fabs_fitness_loss = new double[4]; + double* rornot_fabs_metaerror_loss_percent = new double[4]; + double* rornot_fabs_fitness_loss_percent = new double[4]; + + for (int j = 0; j < 4; j++) { + fabs_metaerror_loss[j] = 0; + fabs_fitness_loss[j] = 0; + fabs_metaerror_loss_percent[j] = 0; + fabs_fitness_loss_percent[j] = 0; + } + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + int i_filter = 0; + for (float filter_value : filter_values) { + printf("Evaluate if/how much organism uses regulation : filter at %f\n", filter_value); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n", i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat()); + + int i_edges = 0; + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + double **enhance_backup = new double*[best->get_rna_list_coding().size()]; + double **operate_backup = new double*[best->get_rna_list_coding().size()]; + int rna_idx = 0; + for (auto &rna: best->get_rna_list_coding()) { + enhance_backup[rna_idx] = new double[((Rna_R *) rna)->nb_influences()]; + operate_backup[rna_idx] = new double[((Rna_R *) rna)->nb_influences()]; + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + enhance_backup[rna_idx][i] = ((Rna_R *) rna)->_enhancing_coef_list[i]; + operate_backup[rna_idx][i] = ((Rna_R *) rna)->_operating_coef_list[i]; + if (filter_value == 0) { + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + } else if (fabs_fitness_loss_percent[i_edges] < filter_value) { + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + } + i_edges++; + } + rna_idx++; + } + + best->evaluated_ = false; + best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat()); + + //printf("Fit %e %e Meta %f %f\n",base_fitness,best->fitness(),base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + rornot_fabs_metaerror_loss[i_filter] += std::fabs( + base_metaerror - best->dist_to_target_by_feature(METABOLISM)); + rornot_fabs_fitness_loss[i_filter] += std::fabs(base_fitness - best->fitness()); + + rornot_fabs_metaerror_loss_percent[i_filter] += + (std::fabs(base_metaerror - best->dist_to_target_by_feature(METABOLISM))) / + best->dist_to_target_by_feature(METABOLISM); + rornot_fabs_fitness_loss_percent[i_filter] += + (std::fabs(base_fitness - best->fitness())) / best->fitness(); + + rna_idx = 0; + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup[rna_idx][i]; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup[rna_idx][i]; + } + delete [] enhance_backup[rna_idx]; + delete [] operate_backup[rna_idx]; + rna_idx++; + } + + delete [] enhance_backup; + delete [] operate_backup; + } + i_filter++; + } + + i_filter = 0; + for (float filter_value : filter_values) { + /*printf("%f -- M %e F %e MP %e FP %e\n",filter_value,rornot_fabs_metaerror_loss[i_filter], + rornot_fabs_fitness_loss[i_filter], + rornot_fabs_metaerror_loss_percent[i_filter],rornot_fabs_fitness_loss_percent[i_filter]);*/ + + rornot_fabs_metaerror_loss[i_filter] /= nb_iteration; + rornot_fabs_fitness_loss[i_filter] /= nb_iteration; + rornot_fabs_metaerror_loss_percent[i_filter] /= nb_iteration; + rornot_fabs_fitness_loss_percent[i_filter] /= nb_iteration; + + i_filter++; + } + + regul_or_not(t,best,rornot_fabs_metaerror_loss,rornot_fabs_fitness_loss,rornot_fabs_metaerror_loss_percent, + rornot_fabs_fitness_loss_percent); + + /// Write stats files + + //// Single env + + int nb_phenotypic_target_models = dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()-> + phenotypic_target_handler())->phenotypic_target_models_.size(); + printf("Running with a single phenotypic target models : %d\n",nb_phenotypic_target_models); + + double** ptm_fabs_metaerror_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_metaerror_loss_percent = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss_percent = new double*[nb_phenotypic_target_models]; + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n",target_id); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->set_single_env(target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat()); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness-best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(t,best,nb_phenotypic_target_models,ptm_fabs_metaerror_loss,ptm_fabs_fitness_loss,ptm_fabs_metaerror_loss_percent,ptm_fabs_fitness_loss_percent); + + /** Regul or not with shuffle randomized signals **/ + + double sh_rand_fabs_metaerror_loss = 0; + double sh_rand_fabs_fitness_loss = 0; + double sh_rand_fabs_metaerror_loss_percent = 0; + double sh_rand_fabs_fitness_loss_percent = 0; + + /*for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + }*/ + + + nb_iteration = 100; +/* best->evaluated_ = false; + best->EvaluateInContext(exp_manager->world()->grid(0,0)->habitat());*/ + + printf("Running %d evals for %d edges : %e (%d %d) %e\n",nb_iteration,nb_edges,best->fitness(),best->amount_of_dna(), + best->protein_list().size(),best->dist_to_target_by_feature(METABOLISM)); + + + double base_metaerror = 0; + double base_fitness = 0; + for (int i = 0; i < nb_iteration; i++) { + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + base_metaerror += best->dist_to_target_by_feature(METABOLISM); + base_fitness += best->fitness(); + printf("MF R -- %e %e (%e %e)\n",base_metaerror,base_fitness,best->dist_to_target_by_feature(METABOLISM),best->fitness()); + } + + base_metaerror /= nb_iteration; + base_fitness /= nb_iteration; + + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d -- %e %e %e %e\n",i,base_metaerror,base_fitness,sh_rand_fabs_metaerror_loss,sh_rand_fabs_fitness_loss); + exp_manager->world()->ApplyHabitatVariation(); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->ShuffleRandomlySignals(); + + best->evaluated_ = false; + best->Evaluate(); + + + + sh_rand_fabs_metaerror_loss += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + sh_rand_fabs_fitness_loss += std::fabs(base_fitness-best->fitness()); + + sh_rand_fabs_metaerror_loss_percent += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + sh_rand_fabs_fitness_loss_percent += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + printf("MF R -- %e %e (%e %e) -- %e %e\n",sh_rand_fabs_metaerror_loss,sh_rand_fabs_fitness_loss, + best->dist_to_target_by_feature(METABOLISM),best->fitness(),sh_rand_fabs_metaerror_loss_percent, + sh_rand_fabs_fitness_loss_percent); + } + + + sh_rand_fabs_metaerror_loss /= nb_iteration; + sh_rand_fabs_fitness_loss /= nb_iteration; + sh_rand_fabs_metaerror_loss_percent /= nb_iteration; + sh_rand_fabs_fitness_loss_percent /= nb_iteration; + + printf("MF R -- %e %e -- %e %e\n",sh_rand_fabs_metaerror_loss,sh_rand_fabs_fitness_loss, + sh_rand_fabs_metaerror_loss_percent, + sh_rand_fabs_fitness_loss_percent); + + shuffle_randomized(t,best,sh_rand_fabs_metaerror_loss,sh_rand_fabs_fitness_loss, + sh_rand_fabs_metaerror_loss_percent, + sh_rand_fabs_fitness_loss_percent); + + + delete best; + + exit(1); + } else + lucas = t; + + if (verbose) + printf("Getting the replication report for the ancestor at generation %" PRId64 "\n", t); + + // If we've exhausted the current tree file, load the next one + if (Utils::mod(t, tree_step) == 0) + { + // Change the tree file + delete tree; + + + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + + tree = new Tree(exp_manager, tree_file_name); + } + + std::set<unsigned long long> previous = current; + current.clear(); + + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t, + x * grid_height + y))); + + + auto foundFilter = std::find(previous.begin(),previous.end(),rep->id()); + + if ( foundFilter != previous.end() ) { + current.insert(rep->parent_id()); + } + } + } + + + if (verbose) printf("OK\n"); + + // Dump the tre into NHX format + + //delete exp_manager; + + exit(EXIT_SUCCESS); +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + // default values : + // begin_gener = 0 + // indiv = best individual at generation end_gener + + // there must be a genome backup file for begin_gener + + // not relevant if crossover + + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Lineage post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rlineage -h\n"); + printf("or : rlineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#else + printf("Usage : lineage -h\n"); + printf("or : lineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#endif + printf("\n"); +#ifdef __REGUL + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.rae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#else + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.ae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#endif + printf("\n"); + printf("WARNING: This program should not be used for simulations run with lateral\n"); + printf("transfer. When an individual has more than one parent, the notion of lineage\n"); + printf("used here is not relevant.\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome checks every <BACKUP_STEP>\n"); + printf("\t generations. Default behaviour is lighter as it\n"); + printf("\t only performs these checks at the ending generation.\n"); + printf("\n"); + printf("\t-i index or --index index : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t index is index. The index must be comprised \n"); + printf("\t between 0 and N-1, with N the size of the \n"); + printf("\t population at the ending generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-r rank or --rank rank : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t rank is rank. The rank must be comprised \n"); + printf("\t between 1 and N, with N the size of the \n"); + printf("\t population at the endind generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-b gener1 or --begin gener1 : \n"); + printf("\t Retrieve the lineage up to generation gener1.\n"); + printf("\t There must be a genome backup file for this\n"); + printf("\t generation. If not specified, the program \n"); + printf("\t retrieves the lineage up to generation 0.\n"); + printf("\n"); + printf("\t-e end_gener or --end end_gener : \n"); + printf("\t Retrieve the lineage of the individual of end_gener \n"); + printf("\t (default: that contained in file last_gener.txt, if any)\n"); + printf("\n"); +} + +void shuffle_randomized(int time, Individual_R* indiv, double fabs_metaerror_loss, double fabs_fitness_loss, + double fabs_metaerror_loss_percent, double fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout_shuffle_randomized.csv",std::ofstream::app); + + std::cout<<time<<","<<fabs_metaerror_loss<<"," + <<fabs_fitness_loss<<","<<fabs_metaerror_loss_percent<<","<<fabs_fitness_loss_percent<<std::endl; + + network<<time<<","<<fabs_metaerror_loss<<"," + <<fabs_fitness_loss<<","<<fabs_metaerror_loss_percent<<","<<fabs_fitness_loss_percent<<std::endl; + + network.flush(); + network.close(); +} + +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout.csv",std::ofstream::app); + + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<time<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<time<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + i_edges++; + } + } + + network.flush(); + network.close(); +} + +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << time << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "lucas_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << time << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void regul_or_not(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, + double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + int i_filter = 0; + for (float filter_value : filter_values) { + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_regul_or_not_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + network << time << "," + << fabs_metaerror_loss[i_filter] << "," << fabs_fitness_loss[i_filter]<< + ","<<fabs_metaerror_loss_percent[i_filter]<<","<<fabs_fitness_loss_percent[i_filter]<< std::endl; + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "lucas_network_regul_or_not_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << time << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(int time, Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout_single_env.csv",std::ofstream::trunc); + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} diff --git a/src/post_treatments/network_knockout.cpp b/src/post_treatments/network_knockout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aaba6c44baf326a6c529938ad7c67d3841c01ab7 --- /dev/null +++ b/src/post_treatments/network_knockout.cpp @@ -0,0 +1,518 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons, Jonathan Rouzaud-Cornabas +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + + + + +// ================================================================= +// Libraries +// ================================================================= +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> + +#include <cstdint> +#include <fstream> +#include <limits> +#include <string> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void extract_network_single_target_model(Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); + +int main(int argc, char* argv[]) +{ + // Initialize command-line option variables with default values + bool best_only = true; + int32_t num_gener = -1; + + // 2) Define allowed options + const char * options_list = ":hVv:b:g:"; + static struct option long_options_list[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V' }, + {"best", no_argument, NULL, 'b' }, + {"gener", required_argument, NULL, 'g' }, + { 0, 0, 0, 0 } + }; + + // 3) Get actual values of the command-line options + int option; + while ((option = getopt_long(argc, argv, options_list, long_options_list, NULL)) != -1) + { + switch (option) + { + case 'h' : + { + //print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'b' : + { + best_only = true; + break; + } + case 'g' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -g or --gener : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + num_gener = atol(optarg); + + break; + } + } + } + + // If num_gener is not provided, assume last gener + if (num_gener == -1) { + num_gener = OutputManager::last_gener(); + } + + // Open the files + auto exp_manager = new ExpManager(); + exp_manager->load(num_gener, false, false); + + + // The best individual is already known because it is the last in the list + // Thus we do not need to know anything about the environment and to evaluate the individuals + + // Parse the individuals + if (best_only) + { + Individual_R* best = dynamic_cast<Individual_R*>(exp_manager->best_indiv()); + best->do_transcription_translation_folding(); + + int nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + double* fabs_metaerror_loss = new double[nb_edges]; + double* fabs_fitness_loss = new double[nb_edges]; + double* fabs_metaerror_loss_percent = new double[nb_edges]; + double* fabs_fitness_loss_percent = new double[nb_edges]; + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] = 0; + fabs_fitness_loss[i] = 0; + fabs_metaerror_loss_percent[i] = 0; + fabs_fitness_loss_percent[i] = 0; + } + + + int nb_iteration = 100; + printf("Running %d evals for %d edges\n",nb_iteration,nb_edges); + for (int i = 0; i < nb_iteration; i++) { + printf("Iteration %d\n",i); + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + fabs_metaerror_loss[i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + fabs_fitness_loss[i_edges] += std::fabs(base_fitness-best->fitness()); + + fabs_metaerror_loss_percent[i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + fabs_fitness_loss_percent[i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + for (int i = 0; i < nb_edges; i++) { + fabs_metaerror_loss[i] /= nb_iteration; + fabs_fitness_loss[i] /= nb_iteration; + fabs_metaerror_loss_percent[i] /= nb_iteration; + fabs_fitness_loss_percent[i] /= nb_iteration; + } + + extract_network(best,fabs_metaerror_loss,fabs_fitness_loss, + fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + filter_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + dump_network(best,fabs_metaerror_loss,fabs_fitness_loss,fabs_metaerror_loss_percent,fabs_fitness_loss_percent); + + int nb_phenotypic_target_models = dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()-> + phenotypic_target_handler())->phenotypic_target_models_.size(); + printf("Running with a single phenotypic target models : %d\n",nb_phenotypic_target_models); + + double** ptm_fabs_metaerror_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_metaerror_loss_percent = new double*[nb_phenotypic_target_models]; + double** ptm_fabs_fitness_loss_percent = new double*[nb_phenotypic_target_models]; + + for (int i = 0; i < nb_phenotypic_target_models; i++) { + ptm_fabs_metaerror_loss[i] = new double[nb_edges]; + ptm_fabs_fitness_loss[i] = new double[nb_edges]; + ptm_fabs_metaerror_loss_percent[i] = new double[nb_edges]; + ptm_fabs_fitness_loss_percent[i] = new double[nb_edges]; + } + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + + for (int j = 0; j < nb_edges; j++) { + ptm_fabs_metaerror_loss[target_id][j] = 0; + ptm_fabs_fitness_loss[target_id][j] = 0; + ptm_fabs_metaerror_loss_percent[target_id][j] = 0; + ptm_fabs_fitness_loss_percent[target_id][j] = 0; + } + + printf("Testing with phenotypic target model %d\n",target_id); + dynamic_cast<PhenotypicTargetHandler_R*>(exp_manager->world()->phenotypic_target_handler())->set_single_env(target_id); + + best->evaluated_ = false; + best->Evaluate(); + + double base_metaerror = best->dist_to_target_by_feature(METABOLISM); + double base_fitness = best->fitness(); + + int i_edges = 0; + + for (auto &rna: best->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + double enhance_backup = ((Rna_R *) rna)->_enhancing_coef_list[i]; + double operate_backup = ((Rna_R *) rna)->_operating_coef_list[i]; + ((Rna_R *) rna)->_enhancing_coef_list[i] = 0; + ((Rna_R *) rna)->_operating_coef_list[i] = 0; + + best->evaluated_ = false; + best->Evaluate(); + + //printf("Testing with phenotypic target model %d : %lf %lf\n",target_id,base_metaerror,best->dist_to_target_by_feature(METABOLISM)); + + ptm_fabs_metaerror_loss[target_id][i_edges] += std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)); + ptm_fabs_fitness_loss[target_id][i_edges] += std::fabs(base_fitness-best->fitness()); + + ptm_fabs_metaerror_loss_percent[target_id][i_edges] += (std::fabs(base_metaerror-best->dist_to_target_by_feature(METABOLISM)))/best->dist_to_target_by_feature(METABOLISM); + ptm_fabs_fitness_loss_percent[target_id][i_edges] += (std::fabs(base_fitness-best->fitness()))/best->fitness(); + + ((Rna_R *) rna)->_enhancing_coef_list[i] = enhance_backup; + ((Rna_R *) rna)->_operating_coef_list[i] = operate_backup; + + i_edges++; + } + } + } + + extract_network_single_target_model(best,nb_phenotypic_target_models,ptm_fabs_metaerror_loss,ptm_fabs_fitness_loss,ptm_fabs_metaerror_loss_percent,ptm_fabs_fitness_loss_percent); + + } + else + { +/* for (const auto& indiv: exp_manager->indivs()) { + indiv->do_transcription_translation_folding(); + //indiv->Evalutate(); + extract_network(dynamic_cast<Individual_R*>(indiv)); + }*/ + } + + delete exp_manager; + + return EXIT_SUCCESS; +} + +void extract_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("network_knockout.csv",std::ofstream::trunc); + network<<"Individual,"<<"Enhancer_or_Inhibitor,"<<"Value,"<<"Metaerror_lost,"<<"Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" <<std::endl; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<indiv->id()<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<indiv->id()<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + + i_edges++; + nb_edges++; + } + } + + network.flush(); + network.close(); + + float filter_value = 0.0; + std::string str_filter_value = std::to_string(filter_value); + + std::string file_name = "network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Individual," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges"<< std::endl; + network << indiv->id() << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << std::endl; + network.close(); +} + +void filter_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::trunc); + network << "Individual," << "Enhancer," << "Inhibitor," << "Both," << "Value," + << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << indiv->id() << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << indiv->id() << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << indiv->id() << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::trunc); + network << "Individual," << "nb_enhancing," << "nb_inhibitor," << "nb_both,nb_edges," << "filter_nb_enhancing," + << "filter_nb_inhibitor," << "filter_nb_both,filter_nb_edges" << std::endl; + network << indiv->id() << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::trunc); + network << "Individual," << "Source,"<<"Destination,"<<"Enhancer_or_Inhibitor," << + "Value" << "Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent" << std::endl; + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << indiv->id() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<<"," + << fabs_metaerror_loss_percent[i_edges] << "," << fabs_fitness_loss_percent[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << indiv->id() << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] <<"," + << fabs_metaerror_loss_percent[i_edges] << "," << fabs_fitness_loss_percent[i_edges]<< std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("network_knockout_single_env.csv",std::ofstream::trunc); + network<<"Individual,"<<"Enhancer_or_Inhibitor,"<<"TargetModel,"<<"Value"<<"Metaerror_lost,Fitness_lost,Metaerror_lost_percent,Fitness_lost_percent"<<std::endl; + + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << indiv->id() << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << indiv->id() << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} \ No newline at end of file diff --git a/src/post_treatments/polymorphism.cpp b/src/post_treatments/polymorphism.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8d830be2f200225d92688a95e9f02b0d79e3adc9 --- /dev/null +++ b/src/post_treatments/polymorphism.cpp @@ -0,0 +1,791 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ================================================================= +// Libraries +// ================================================================= +#include <errno.h> +#include <inttypes.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <zlib.h> +#include <sys/stat.h> + +#include <list> +#include <vector> +#include <unordered_map> +#include <algorithm> +#include <iostream> +#include <fstream> +// ================================================================= +// Project Files +// ================================================================= +#include "aevol.h" + +using namespace aevol; + +class Node { + public: + Node(unsigned long long lid) { id = lid; }; + + unsigned long long id; + std::unordered_map<unsigned long long, Node*> next_nodes; + Node* root = nullptr; + int dist_to_parent = 0; + bool to_delete = false; + bool is_last = false; + std::string nhx = ""; +}; + +// ================================================================= +// Function declarations +// ================================================================= +void print_help(char* prog_path); +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, + double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent); +void regul_or_not(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, + double* fabs_fitness_loss_percent); +void extract_network_single_target_model(int time, Individual_R* best, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent); +void shuffle_randomized(int time, Individual_R* indiv, double fabs_metaerror_loss, double fabs_fitness_loss, + double fabs_metaerror_loss_percent, double fabs_fitness_loss_percent); + +int main(int argc, char** argv) +{ + // The output file (lineage.ae or lineage.rae) contains the following information: + // + // - common data (ae_common::write_to_backup) + // - begin gener (int32_t) + // - end gener (int32_t) + // - final individual index (int32_t) + // - initial genome size (int32_t) + // - initial ancestor (nb genetic units + sequences) (Individual::write_to_backup) + // - replication report of ancestor at generation begin_gener+1 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+2 (ae_replic_report::write_to_backup) + // - replication report of ancestor at generation begin_gener+3 (ae_replic_report::write_to_backup) + // - ... + // - replication report of ancestor at generation end_gener (ae_replic_report::write_to_backup) + + + printf("\n WARNING : Parameters' change in the middle of a simulation is not managed.\n"); + + + // ===================== + // Parse command line + // ===================== + + // Default values + //check_type check_genome = LIGHT_CHECK; + bool verbose = false; + int64_t t0 = 0; + int64_t t_end = -1; + + char tree_file_name[50]; + + const char * short_options = "hVv:e:"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"end", required_argument, NULL, 'e'}, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) + { + switch(option) + { + case 'h' : + { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : + { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 'v' : verbose = true; break; + //case 'n' : check_genome = NO_CHECK; break; + //case 'c' : check_genome = FULL_CHECK; break; + case 'b' : t0 = atol(optarg); break; + //case 'i' : final_indiv_index = atol(optarg); break; + //case 'r' : final_indiv_rank = atol(optarg); break; + case 'e' : + { + if (strcmp(optarg, "") == 0) + { + printf("%s: error: Option -e or --end : missing argument.\n", argv[0]); + exit(EXIT_FAILURE); + } + + t_end = atol(optarg); + + break; + } + } + } + + //verbose=true; + + // Set undefined command line parameters to default values + if (t_end == -1) { + // Set t_end to the content of the LAST_GENER file if it exists. + // If it doesn't, print help and exit + FILE* lg_file = fopen(LAST_GENER_FNAME, "r"); + if (lg_file != NULL) { + if (fscanf(lg_file, "%" PRId64, &t_end) == EOF) { + printf("ERROR: failed to read last generation from file %s\n", + LAST_GENER_FNAME); + exit(EXIT_FAILURE); + } + fclose(lg_file); + } + else { + printf("%s: error: You must provide a generation number.\n", argv[0]); + exit(EXIT_FAILURE); + } + } + + // printf("Loading at generation %d\n",t_end); + + // Load the simulation + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t_end, false, false); + + // Check that the tree was recorded + if (not exp_manager->record_tree()) { + Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during " + "evolution, could not reconstruct the lineage"); + } + + int64_t tree_step = exp_manager->tree_step(); + + //delete exp_manager; + + + // The tree + Tree* tree = NULL; + + // ============================ + // Init files + // ============================ + std::ofstream network; + network.open("polymorphism.csv",std::ofstream::trunc); + network<<"Generation,"<<"Individual,"<<"nb_edges,genome_size,nb_protein,nb_rnas,"<<"Fitness,"<<"Metaerror"<<std::endl; + network.flush(); + network.close(); + + // ========================= + // Load the last tree file + // ========================= + + if (verbose) + { + printf("\n\n"); + printf("====================================\n"); + printf(" Loading the last tree file ... "); + fflush(stdout); + } + + + // Example for ae_common::rec_params->tree_step() == 100 : + // + // tree_000100.ae ==> generations 1 to 100. + // tree_000200.ae ==> generations 101 to 200. + // tree_000300.ae ==> generations 201 to 300. + // etc. + // + // Thus, the information for generation end_gener are located + // in the file called (end_gener/ae_common::rec_params->tree_step() + 1) * ae_common::rec_params->tree_step(), + // except if end_gener%ae_common::rec_params->tree_step()==0. + + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t_end); + + tree = new Tree(exp_manager, tree_file_name); + + if (verbose) + { + printf("OK\n"); + printf("====================================\n"); + } + + + World* world = exp_manager->world(); + int16_t grid_width = world->width(); + int16_t grid_height = world->height(); + int32_t pop_size = grid_height * grid_width; + + int32_t lucas = t_end; + + // ============================================================================ + // Find the index of the final individual and retrieve its replication report + // ============================================================================ + std::set<unsigned long long> current; + + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t_end, + x * grid_height + y))); + + current.insert(rep->parent_id()); + delete rep; + } + + // ======================= + // Open the output file + // ======================= + + + + // =================================================== + // Retrieve the replication reports of the ancestors + // =================================================== + + if (verbose) + { + printf("\n\n\n"); + printf("======================================================================\n"); + printf(" Parsing tree files to retrieve the ancestors' replication reports... \n"); + printf("======================================================================\n"); + } + + // For each generation (going backwards), retrieve the index of the parent and + // the corresponding replication report + for (int64_t t = t_end - 1 ; t > 0 ; t--) + { + + if ((t % 1000) == 0) { + printf("%d -- Number of ancestor %d\n",t,current.size()); + } + + if (current.size() == 2 && (Utils::mod(t, exp_manager->backup_step()) == 0)) { + + Individual* initial_ancestor; // = exp_manager->best_indiv(); + //printf("LUCAS FOUND at %d (%d)\n",t,(*current.begin())); + + ExpManager* exp_manager_2 = new ExpManager(); + exp_manager_2->load(t, false, false); + + delete exp_manager; + exp_manager = exp_manager_2; + + for (auto cur_indiv_id : current) { + //= exp_manager->indiv_by_id(indices[0]); + for (int16_t x = 0; x < grid_width; x++) + for (int16_t y = 0; y < grid_height; y++) { + if (exp_manager_2->world()->indiv_at(x, y)->id() == cur_indiv_id) { + initial_ancestor = exp_manager_2->world()->indiv_at(x, y); + //break; + } + } + + + + printf("-------------> Loading BEST lucas\n"); + Individual_R *best = dynamic_cast<Individual_R *>(initial_ancestor); + //best->clear_everything_except_dna_and_promoters(); + best->do_transcription_translation_folding(); + + int nb_edges = 0; + for (auto &rna: best->get_rna_list_coding()) { + nb_edges+=((Rna_R *) rna)->nb_influences(); + } + + int nb_iteration = 100; + + printf("Running %d evals for %d edges : %e (%d %d) %e\n", nb_iteration, nb_edges, best->fitness(), + best->amount_of_dna(), + best->protein_list().size(), best->dist_to_target_by_feature(METABOLISM)); + + + double base_metaerror = 0; + double base_fitness = 0; + for (int i = 0; i < nb_iteration; i++) { + exp_manager->world()->ApplyHabitatVariation(); + + best->evaluated_ = false; + best->Evaluate(); + + base_metaerror += best->dist_to_target_by_feature(METABOLISM); + base_fitness += best->fitness(); + } + + base_metaerror /= nb_iteration; + base_fitness /= nb_iteration; + + network.open("polymorphism.csv",std::ofstream::app); + network<<t<<","<<cur_indiv_id<<","<<nb_edges<<","<<best->amount_of_dna()<<","<<best->protein_list().size() + <<","<<best->get_rna_list_coding().size()<<","<<base_fitness<<","<<base_metaerror<<std::endl; + network.flush(); + network.close(); + + } + + exit(1); + } else + lucas = t; + + if (verbose) + printf("Getting the replication report for the ancestor at generation %" PRId64 "\n", t); + + // If we've exhausted the current tree file, load the next one + if (Utils::mod(t, tree_step) == 0) + { + // Change the tree file + delete tree; + + + sprintf(tree_file_name,"tree/tree_%06" PRId64 ".ae", t); + + tree = new Tree(exp_manager, tree_file_name); + } + + std::set<unsigned long long> previous = current; + current.clear(); + + for (int16_t x = 0 ; x < grid_width ; x++) + for (int16_t y = 0 ; y < grid_height ; y++) { + ReplicationReport* rep = new ReplicationReport(*(tree->report_by_index(t, + x * grid_height + y))); + + + auto foundFilter = std::find(previous.begin(),previous.end(),rep->id()); + + if ( foundFilter != previous.end() ) { + current.insert(rep->parent_id()); + } + delete rep; + } + } + + + if (verbose) printf("OK\n"); + + // Dump the tre into NHX format + + //delete exp_manager; + + exit(EXIT_SUCCESS); +} + +/*! + \brief + +*/ +void print_help(char* prog_path) +{ + // default values : + // begin_gener = 0 + // indiv = best individual at generation end_gener + + // there must be a genome backup file for begin_gener + + // not relevant if crossover + + printf("\n"); + printf("*********************** aevol - Artificial Evolution ******************* \n"); + printf("* * \n"); + printf("* Lineage post-treatment program * \n"); + printf("* * \n"); + printf("************************************************************************ \n"); + printf("\n\n"); + printf("This program is Free Software. No Warranty.\n"); + printf("Copyright (C) 2009 LIRIS.\n"); + printf("\n"); +#ifdef __REGUL + printf("Usage : rlineage -h\n"); + printf("or : rlineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#else + printf("Usage : lineage -h\n"); + printf("or : lineage [-vn] [-i index | -r rank] [-b gener1] -e end_gener \n"); +#endif + printf("\n"); +#ifdef __REGUL + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.rae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#else + printf("This program retrieves the ancestral lineage of an individual and writes \n"); + printf("it in an output file called lineage.ae. Specifically, it retrieves the \n"); + printf("lineage of the individual of end_gener whose index is index, going \n"); + printf("back in time up to gener1. This program requires at least one population backup\n"); + printf("file (for the generation gener1), one environment backup file (for the generation gener1)\n"); + printf("and all tree files for generations gener1 to end_gener.\n"); +#endif + printf("\n"); + printf("WARNING: This program should not be used for simulations run with lateral\n"); + printf("transfer. When an individual has more than one parent, the notion of lineage\n"); + printf("used here is not relevant.\n"); + printf("\n"); + printf("\t-h or --help : Display this help.\n"); + printf("\n"); + printf("\t-v or --verbose : Be verbose, listing generations as they are \n"); + printf("\t treated.\n"); + printf("\n"); + printf("\t-n or --nocheck : Disable genome sequence checking. Makes the \n"); + printf("\t program faster, but it is not recommended. \n"); + printf("\t It is better to let the program check that \n"); + printf("\t when we rebuild the genomes of the ancestors\n"); + printf("\t from the lineage file, we get the same sequences\n"); + printf("\t as those stored in the backup files.\n"); + printf("\n"); + printf("\t-c or --fullcheck : Will perform the genome checks every <BACKUP_STEP>\n"); + printf("\t generations. Default behaviour is lighter as it\n"); + printf("\t only performs these checks at the ending generation.\n"); + printf("\n"); + printf("\t-i index or --index index : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t index is index. The index must be comprised \n"); + printf("\t between 0 and N-1, with N the size of the \n"); + printf("\t population at the ending generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-r rank or --rank rank : \n"); + printf("\t Retrieve the lineage of the individual whose\n"); + printf("\t rank is rank. The rank must be comprised \n"); + printf("\t between 1 and N, with N the size of the \n"); + printf("\t population at the endind generation. If neither\n"); + printf("\t index nor rank are specified, the program computes \n"); + printf("\t the lineage of the best individual of the ending \n"); + printf("\t generation.\n"); + printf("\n"); + printf("\t-b gener1 or --begin gener1 : \n"); + printf("\t Retrieve the lineage up to generation gener1.\n"); + printf("\t There must be a genome backup file for this\n"); + printf("\t generation. If not specified, the program \n"); + printf("\t retrieves the lineage up to generation 0.\n"); + printf("\n"); + printf("\t-e end_gener or --end end_gener : \n"); + printf("\t Retrieve the lineage of the individual of end_gener \n"); + printf("\t (default: that contained in file last_gener.txt, if any)\n"); + printf("\n"); +} + +void shuffle_randomized(int time, Individual_R* indiv, double fabs_metaerror_loss, double fabs_fitness_loss, + double fabs_metaerror_loss_percent, double fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout_shuffle_randomized.csv",std::ofstream::app); + + std::cout<<time<<","<<fabs_metaerror_loss<<"," + <<fabs_fitness_loss<<","<<fabs_metaerror_loss_percent<<","<<fabs_fitness_loss_percent<<std::endl; + + network<<time<<","<<fabs_metaerror_loss<<"," + <<fabs_fitness_loss<<","<<fabs_metaerror_loss_percent<<","<<fabs_fitness_loss_percent<<std::endl; + + network.flush(); + network.close(); +} + +void extract_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout.csv",std::ofstream::app); + + int i_edges = 0; + + for (auto& rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R*)rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R*)rna)->_enhancing_coef_list[i] > 0) + { + network<<time<<",1,"<<((Rna_R*)rna)->_enhancing_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + + if (((Rna_R*)rna)->_operating_coef_list[i] > 0) + { + network<<time<<",0,"<<((Rna_R*)rna)->_operating_coef_list[i]<<","<<fabs_metaerror_loss[i_edges]<<"," + <<fabs_fitness_loss[i_edges]<<","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<<std::endl; + } + i_edges++; + } + } + + network.flush(); + network.close(); +} + +void filter_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + float filter_values[3] = {0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_filtered_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + network << time << ",1,1,1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1,0,0," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0,1,0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges]<< + ","<<fabs_metaerror_loss_percent[i_edges]<<","<<fabs_fitness_loss_percent[i_edges]<< std::endl; + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "lucas_network_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << time << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void regul_or_not(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, + double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + int i_filter = 0; + for (float filter_value : filter_values) { + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_regul_or_not_" + str_filter_value + ".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + network << time << "," + << fabs_metaerror_loss[i_filter] << "," << fabs_fitness_loss[i_filter]<< + ","<<fabs_metaerror_loss_percent[i_filter]<<","<<fabs_fitness_loss_percent[i_filter]<< std::endl; + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + int both = 0; + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && + (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + filter_nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + filter_nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + filter_nb_edges_operating++; + } + } + filter_nb_edges++; + } + + if ((((Rna_R *) rna)->_enhancing_coef_list[i] > 0) && (((Rna_R *) rna)->_operating_coef_list[i] > 0)) { + nb_edges_both++; + } else { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + nb_edges_enhance++; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + nb_edges_operating++; + } + } + nb_edges++; + + i_edges++; + } + } + + network.flush(); + network.close(); + + file_name = "lucas_network_regul_or_not_edges_" + str_filter_value + ".csv"; + + network.open(file_name, std::ofstream::app); + network << time << "," << nb_edges_enhance << "," << nb_edges_operating << "," << nb_edges_both << "," + << nb_edges << "," << + filter_nb_edges_enhance << "," << filter_nb_edges_operating << "," << filter_nb_edges_both << "," + << filter_nb_edges << std::endl; + network.close(); + + } + +} + + + +void dump_network(int time, Individual_R* indiv, double* fabs_metaerror_loss, double* fabs_fitness_loss, double* fabs_metaerror_loss_percent, double* fabs_fitness_loss_percent) { + + float filter_values[4] = {0.0, 0.00001, 0.0001, 0.001}; + + for (float filter_value : filter_values) { + + std::string str_filter_value = std::to_string(filter_value); + std::string file_name = "lucas_network_dump_"+str_filter_value+".csv"; + std::ofstream network; + network.open(file_name, std::ofstream::app); + + int i_edges = 0; + + int nb_edges_enhance = 0, nb_edges_operating = 0, nb_edges_both = 0, nb_edges = 0; + int filter_nb_edges_enhance = 0, filter_nb_edges_operating = 0, filter_nb_edges_both = 0, filter_nb_edges = 0; + + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + for (auto& protein : rna->transcribed_proteins()) { + if (fabs_fitness_loss_percent[i_edges] >= filter_value) { + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "1," << ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << "," << protein->shine_dal_pos() << "," + << dynamic_cast<Rna_R*>(rna)->_protein_list[i]->shine_dal_pos()<<"," + << "0," << ((Rna_R *) rna)->_operating_coef_list[i] << "," + << fabs_metaerror_loss[i_edges] << "," << fabs_fitness_loss[i_edges] << std::endl; + } + } + } + + i_edges++; + } + } + + network.flush(); + network.close(); + + } + +} + +void extract_network_single_target_model(int time, Individual_R* indiv, int nb_phenotypic_target_models, + double** ptm_fabs_metaerror_loss, double** ptm_fabs_fitness_loss, + double** ptm_fabs_metaerror_loss_percent, + double** ptm_fabs_fitness_loss_percent) { + std::ofstream network; + network.open("lucas_network_knockout_single_env.csv",std::ofstream::trunc); + + + for (int target_id = 0; target_id < nb_phenotypic_target_models; target_id++) { + int i_edges = 0; + for (auto &rna: indiv->get_rna_list_coding()) { + for (unsigned int i = 0; i < ((Rna_R *) rna)->nb_influences(); i++) { + //std::cout<<"Influence "<<i<<" value is "<<((Rna_R*)rna)->_enhancing_coef_list[i]<<" "<<((Rna_R*)rna)->_operating_coef_list[i]<<std::endl; + //compute the activity + if (((Rna_R *) rna)->_enhancing_coef_list[i] > 0) { + network << time << ",1," <<target_id<<","<< ((Rna_R *) rna)->_enhancing_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] <<"," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + + if (((Rna_R *) rna)->_operating_coef_list[i] > 0) { + network << time << ",0," <<target_id<<","<< ((Rna_R *) rna)->_operating_coef_list[i] << "," + << ptm_fabs_metaerror_loss[target_id][i_edges]<<"," + << ptm_fabs_fitness_loss[target_id][i_edges] << "," + << ptm_fabs_metaerror_loss_percent[target_id][i_edges] <<"," + << ptm_fabs_fitness_loss_percent[target_id][i_edges] << std::endl; + } + i_edges++; + } + } + } + + network.flush(); + network.close(); + +} diff --git a/src/post_treatments/protein_map.cpp b/src/post_treatments/protein_map.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e24d146e14a42201b620df23f601be457e4104c --- /dev/null +++ b/src/post_treatments/protein_map.cpp @@ -0,0 +1,397 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// The input file is produced by the lineage post-treatment, please refer to it +// for e.g. the file format/content + +// ============================================================================ +// Includes +// ============================================================================ +#include <cinttypes> +#include <getopt.h> +#include <cstdlib> +#include <cstdio> +#include <cstring> +#include <zlib.h> +#include <err.h> +#include <cerrno> +#include <sys/stat.h> +#include <unistd.h> +#include <list> +#include <iostream> +#include <fstream> + +#include "aevol.h" + +using namespace aevol; + +class ProteinMap { +public: + ProteinMap(int nb_prot, int dna_length) { + nb_prot_ = nb_prot; + start_pos_.resize(nb_prot_); + length_.resize(nb_prot_); + basal_level_.resize(nb_prot_); + hamming_dist_.resize(nb_prot_); + dist_next_prot_.resize(nb_prot_); + dna_length_ = dna_length; + } + + void add_protein(int32_t start_pos, int32_t length, int32_t basal_level, int32_t hamming_dist, int32_t dist_next_prot) { + start_pos_[cpt_] = start_pos; + length_[cpt_] = length; + basal_level_[cpt_] = basal_level; + hamming_dist_[cpt_] = hamming_dist; + dist_next_prot_[cpt_] = dist_next_prot; + cpt_++; + } + + std::vector<int32_t> start_pos_; + std::vector<int32_t> length_; + std::vector<double> basal_level_; + std::vector<int32_t> hamming_dist_; + std::vector<int32_t> dist_next_prot_; + int nb_prot_; + int cpt_ = 0; + int dna_length_; +}; + +// Helper functions +void interpret_cmd_line_options(int argc, char* argv[]); +void print_help(char* prog_path); +ProteinMap* compute_protein_map(Individual* indiv); + +// Command-line option variables +static char* lineage_file_name = nullptr; +static bool verbose = false; + +static long pt_begin = -1; +static long pt_end = -1; + +int main(int argc, char* argv[]) { + interpret_cmd_line_options(argc, argv); + + printf("\n" + "WARNING : Parameter change during simulation is not managed in general.\n" + " Only changes in environmental target done with aevol_modify are handled.\n" + "\n"); + + // ======================= + // Open the lineage file + // ======================= + gzFile lineage_file = gzopen(lineage_file_name, "r"); + if (lineage_file == Z_NULL) { + fprintf(stderr, "ERROR : Could not read the lineage file %s\n", lineage_file_name); + exit(EXIT_FAILURE); + } + + int64_t t0 = 0; + int64_t t_end = 0; + int32_t final_indiv_index = 0; + int32_t final_indiv_rank = 0; + + gzread(lineage_file, &t0, sizeof(t0)); + gzread(lineage_file, &t_end, sizeof(t_end)); + gzread(lineage_file, &final_indiv_index, sizeof(final_indiv_index)); + gzread(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank)); + + if (pt_begin == -1) pt_begin = t0; + if (pt_end == -1) pt_end = t_end; + + if (verbose) { + printf("\n\n""===============================================================================\n"); + printf(" Statistics of the ancestors of indiv. %" PRId32 + " (rank %" PRId32 ") from time %" PRId64 " to %" PRId64 "\n", + final_indiv_index, final_indiv_rank, t0, t_end); + printf("================================================================================\n"); + } + + + + // ============================= + // Open the experiment manager + // ============================= + ExpManager* exp_manager = new ExpManager(); + exp_manager->load(t0, true, false); + + // The current version doesn't allow for phenotypic variation nor for + // different phenotypic targets among the grid + if (not exp_manager->world()->phenotypic_target_shared()) + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for per grid-cell phenotypic target"); + auto phenotypicTargetHandler = + exp_manager->world()->phenotypic_target_handler(); + if (not (phenotypicTargetHandler->var_method() == NO_VAR)) + Utils::ExitWithUsrMsg("sorry, ancestor stats has not yet been implemented " + "for variable phenotypic targets"); + + int64_t backup_step = exp_manager->backup_step(); + + + // ========================= + // Open the output file(s) + // ========================= + // Create missing directories + int status; + status = mkdir("stats/ancestor_stats/", 0755); + if ((status == -1) && (errno != EEXIST)) { + err(EXIT_FAILURE, "stats/ancestor_stats/"); + } + + // ========================= + // Create data structure + // ========================= + ProteinMap** list_prot_map = new ProteinMap*[t_end-t0+1]; + + // ================================================== + // Prepare the initial ancestor and write its stats + // ================================================== + GridCell* grid_cell = new GridCell(lineage_file, exp_manager, nullptr); + auto* indiv = grid_cell->individual(); + indiv->Evaluate(); + indiv->compute_statistical_data(); + indiv->compute_non_coding(); + + list_prot_map[time()] = compute_protein_map(indiv); + + // ========================================================================== + // Replay the mutations to get the successive ancestors and analyze them + // ========================================================================== + ReplicationReport* rep = nullptr; + int32_t index; + ExpManager* exp_manager_backup = nullptr; + int32_t unitlen_before; + double metabolic_error_before; + double impact_on_metabolic_error; + char mut_descr_string[255]; + + + aevol::AeTime::plusplus(); + while (time() <= t_end) + { + rep = new ReplicationReport(lineage_file, indiv); + index = rep->id(); // who we are building... + + if (verbose) + printf("Rebuilding ancestor at generation %" PRId64 + " (index %" PRId32 ")...", time(), index); + + indiv->Reevaluate(); + + // 2) Replay replication (create current individual's child) + GeneticUnit& gen_unit = indiv->genetic_unit_nonconst(0); + GeneticUnit* stored_gen_unit = nullptr; + Individual* stored_indiv = nullptr; + + // For each genetic unit, replay the replication (undergo all mutations) + // TODO <david.parsons@inria.fr> disabled for multiple GUs + const auto& dnarep = rep->dna_replic_report(); + + // TODO(dpa) The following 3 for loops should be factorized. + // However, this is not as easy as it sounds :-D + // see std::list::splice + for (const auto& mut: dnarep.HT()) + gen_unit.dna()->undergo_this_mutation(*mut); + + for (const auto& mut: dnarep.rearrangements()) { + // Apply mutation + gen_unit.dna()->undergo_this_mutation(*mut); + } + + for (const auto& mut: dnarep.mutations()) { + // Apply mutation + gen_unit.dna()->undergo_this_mutation(*mut); + + } + + // 3) All the mutations have been replayed, we can now evaluate the new individual + indiv->Reevaluate(); + indiv->compute_statistical_data(); + indiv->compute_non_coding(); + + list_prot_map[time()] = compute_protein_map(indiv); + + if (verbose) printf(" OK\n"); + + delete rep; + + aevol::AeTime::plusplus(); + } + + gzclose(lineage_file); + + std::ofstream proteins_map_file; + proteins_map_file.open("proteins_map_gen.csv",std::ofstream::trunc); + proteins_map_file<<"generation,protein_id,shine_dal,length,concentration,hamming_dist,dist_next_protein,nb_proteins,dna_length"<<std::endl; + + for (int i = t0; i <= t_end; i++) { + for (int i_prot = 0; i_prot < list_prot_map[i]->nb_prot_; i_prot++) { + proteins_map_file<<i<<","<<i_prot<<","<<list_prot_map[i]->start_pos_[i_prot] + <<","<<list_prot_map[i]->length_[i_prot] + <<","<<list_prot_map[i]->basal_level_[i_prot] + <<","<<list_prot_map[i]->hamming_dist_[i_prot] + <<","<<list_prot_map[i]->dist_next_prot_[i_prot] + <<","<<list_prot_map[i]->nb_prot_<<","<<list_prot_map[i]->dna_length_<<std::endl; + } + } + + proteins_map_file.flush(); + proteins_map_file.close(); + + // Additional outputs + + delete exp_manager; + delete indiv; + + return EXIT_SUCCESS; +} + +void interpret_cmd_line_options(int argc, char* argv[]) { + // ===================== + // Parse command line + // ===================== + const char * short_options = "hVb:e:v"; + static struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"begin", required_argument, nullptr, 'b'}, + {"end", required_argument, nullptr, 'e'}, + {0, 0, 0, 0} + }; + + int option; + while((option = getopt_long(argc, argv, short_options, + long_options, nullptr)) != -1) { + switch(option) { + case 'h': + print_help(argv[0]); + exit(EXIT_SUCCESS); + case 'V': + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + case 'v': + verbose = true; + break; + case 'b': + pt_begin = atol(optarg); + break; + case 'e': + pt_end = atol(optarg); + break; + default: + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + + // There should be only one remaining arg: the lineage file + if (optind != argc - 1) { + Utils::ExitWithUsrMsg("please specify a lineage file"); + } + + lineage_file_name = new char[strlen(argv[optind]) + 1]; + sprintf(lineage_file_name, "%s", argv[optind]); +} + +void print_help(char* prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char* prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } + else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: create an experiment with setup as specified in PARAM_FILE.\n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s LINEAGE_FILE [-FMv]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n"); + printf(" -V, --version\n\tprint version number, then exit\n"); + printf(" -v, --verbose\n\tbe verbose\n"); +} + +ProteinMap* compute_protein_map(Individual* indiv) { + ProteinMap* pmap = new ProteinMap(indiv->protein_list().size(),indiv->amount_of_dna()); + + // Make a copy of each genetic unit's protein list + for (auto& gen_unit: indiv->genetic_unit_list_nonconst()) { + // append all proteins from `gen_unit` to `protein_list_` + for (auto& strand_id: {LEADING, LAGGING}) { + auto& strand = gen_unit.protein_list(strand_id); + int pos_next = std::prev(gen_unit.protein_list(strand_id).end())->shine_dal_pos(); + bool first = true; + int pos_first = gen_unit.protein_list(strand_id).begin()->shine_dal_pos(); + int pos_prev = -1; + for (auto& p: strand) { + int dist = -1; + if (first) { + if (strand_id == LEADING) + dist = p.shine_dal_pos() + (indiv->amount_of_dna() - pos_next); + else + dist = (indiv->amount_of_dna() - p.shine_dal_pos()) + pos_next; + + first = false; + } else if (p.shine_dal_pos() == std::prev(gen_unit.protein_list(strand_id).end())->shine_dal_pos()) { + if (strand_id == LEADING) + dist = (indiv->amount_of_dna() - p.shine_dal_pos()) + pos_first; + else + dist = p.shine_dal_pos() + (indiv->amount_of_dna() - pos_first); + } else { + if (strand_id == LEADING) + dist = p.shine_dal_pos() - pos_prev; + else + dist = pos_prev - p.shine_dal_pos(); + } + pos_prev = p.shine_dal_pos(); + int8_t prom_dist; + gen_unit.is_promoter(LEADING, (*p.rna_list().begin())->promoter_pos(), + prom_dist); + + pmap->add_protein(p.shine_dal_pos(),p.length(),p.concentration(),prom_dist,dist); + } + } + } + + return pmap; +} \ No newline at end of file diff --git a/src/post_treatments/view.cpp b/src/post_treatments/view.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ea0051247eeb39108d07507e7187cc5b79e8830d --- /dev/null +++ b/src/post_treatments/view.cpp @@ -0,0 +1,158 @@ +// **************************************************************************** +// +// Aevol - An in silico experimental evolution platform +// +// **************************************************************************** +// +// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr> +// Web: http://www.aevol.fr/ +// E-mail: See <http://www.aevol.fr/contact/> +// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// **************************************************************************** + +// ============================================================================ +// Includes +// ============================================================================ +#include <cstdlib> +#include <cstdio> + +#include <getopt.h> + +#ifdef __NO_X + #error This program requires graphics libraries +#else + #include <X11/Xlib.h> +#endif + +#include "aevol.h" + +using namespace aevol; + +// Helper functions +void print_help(char* prog_path); +void interpret_cmd_line_options(int argc, char* argv[]); + +// Command-line option variables +static int64_t timestep = -1; + + +int main(int argc, char* argv[]) { + interpret_cmd_line_options(argc, argv); + + // If timestep wasn't provided, use default + if (timestep < 0) { + timestep = OutputManager::last_gener(); + } + + printf("Displaying timestep %" PRId64 "...\n", timestep); + + // ================================================================= + // Read the backup file + // ================================================================= + // Load simulation from backup + ExpManager_X11* exp_manager = new ExpManager_X11(); + exp_manager->load(timestep, false, false); + + // ================================================================= + // Draw the windows + // ================================================================= + // Display is off by default, switch it on + exp_manager->toggle_display_on_off(); + + // Display is usually triggered in ExpManager::run_evolution(), here we want + // to call it manually + exp_manager->display(); + + // Handle user events until he quits + while (not exp_manager->quit_signal_received()) { + exp_manager->handle_events(); + } + + delete exp_manager; + return EXIT_SUCCESS; +} + + +void print_help(char* prog_path) { + // Get the program file-name in prog_name (strip prog_path of the path) + char* prog_name; // No new, it will point to somewhere inside prog_path + if ((prog_name = strrchr(prog_path, '/'))) { + prog_name++; + } + else { + prog_name = prog_path; + } + + printf("******************************************************************************\n"); + printf("* *\n"); + printf("* aevol - Artificial Evolution *\n"); + printf("* *\n"); + printf("* Aevol is a simulation platform that allows one to let populations of *\n"); + printf("* digital organisms evolve in different conditions and study experimentally *\n"); + printf("* the mechanisms responsible for the structuration of the genome and the *\n"); + printf("* transcriptome. *\n"); + printf("* *\n"); + printf("******************************************************************************\n"); + printf("\n"); + printf("%s: view the simulation at the provided timestep\n", + prog_name); + printf("\n"); + printf("Usage : %s -h or --help\n", prog_name); + printf(" or : %s -V or --version\n", prog_name); + printf(" or : %s [-t TIMESTEP]\n", + prog_name); + printf("\nOptions\n"); + printf(" -h, --help\n\tprint this help, then exit\n\n"); + printf(" -V, --version\n\tprint version number, then exit\n\n"); + printf(" -t, --timestep TIMESTEP\n"); + printf("\tspecify timestep to display (default value read in last_gener.txt)\n"); +} + +void interpret_cmd_line_options(int argc, char* argv[]) { + // Define allowed options + const char* options_list = "hVt:"; + static struct option long_options_list[] = { + {"help", no_argument, nullptr, 'h'}, + {"version", no_argument, nullptr, 'V'}, + {"timestep", required_argument, nullptr, 't'}, + {0, 0, 0, 0} + }; + + // Get actual values of the CLI options + int option; + while ((option = getopt_long(argc, argv, options_list, long_options_list, + nullptr)) != -1) { + switch (option) { + case 'h' : { + print_help(argv[0]); + exit(EXIT_SUCCESS); + } + case 'V' : { + Utils::PrintAevolVersion(); + exit(EXIT_SUCCESS); + } + case 't' : { + timestep = atol(optarg); + break; + } + default : { + // An error message is printed in getopt_long, we just need to exit + exit(EXIT_FAILURE); + } + } + } +}