Forked from
aevol / aevol
3384 commits behind, 125 commits ahead of the upstream repository.
-
David Parsons authoredDavid Parsons authored
lineage.cpp 18.64 KiB
// ****************************************************************************
//
// Aevol - An in silico experimental evolution platform
//
// ****************************************************************************
//
// Copyright: See the AUTHORS file provided with the package or <www.aevol.fr>
// Web: http://www.aevol.fr/
// E-mail: See <http://www.aevol.fr/contact/>
// Original Authors : Guillaume Beslon, Carole Knibbe, David Parsons
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// ****************************************************************************
// ============================================================================
// Includes
// ============================================================================
#include <cerrno>
#include <cinttypes>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <zlib.h>
#include <sys/stat.h>
#include <getopt.h>
#include <list>
#include "aevol.h"
using namespace aevol;
// Helper functions
void print_help(char* prog_path);
void interpret_cmd_line_options(int argc, char* argv[]);
// Command-line option variables
static bool full_check = false;
static bool verbose = false;
static int64_t t0 = 0;
static int64_t t_end = -1;
static int32_t final_indiv_index = -1;
static int32_t final_indiv_rank = -1;
static char tree_file_name[255]; // TODO(dpa) remove magic number
int main(int argc, char** argv) {
// The output file (lineage.ae) contains the following information:
// You may check that this information is up-to-date by searching
// "lineage_file" in this source file
//
// - t0
// - t_end
// - final individual index
// - final individual rank
// - initial_ancestor information (including its genome)
// - replication report of ancestor at generation t0+1
// - replication report of ancestor at generation t0+2
// - replication report of ancestor at generation t0+3
// - ...
// - replication report of ancestor at generation t_end
interpret_cmd_line_options(argc, argv);
printf("\n WARNING : Parameter change in the middle of a simulation is not managed.\n");
// Load the simulation
ExpManager* exp_manager = new ExpManager();
exp_manager->load(t_end, true, false);
// Check that the tree was recorded
if (not exp_manager->record_tree()) {
Utils::ExitWithUsrMsg("The phylogenetic tree wasn't recorded during "
"evolution, could not reconstruct the lineage");
}
int64_t tree_step = exp_manager->tree_step();
//delete exp_manager;
// The tree
Tree* tree = nullptr;
// Indices, ranks and replication reports of the individuals in the lineage
int32_t* indices = new int32_t[t_end - t0 + 1];
ReplicationReport** reports = new ReplicationReport*[t_end - t0];
// NB: we do not need the report of the ancestor at time t0 since we have
// retrieved the individual itself from the initial backup
// (plus it might be the generation 0, for which we have no reports)
// reports[0] = how ancestor at t0 + 1 was created
// reports[i] = how ancestor at t0 + i + 1 was created
// reports[t_end - t0 - 1] = how the final individual was created
//
// ---------------------------------------------------------------
// reports | t0 => t1 | t1 => t2 |...| t_n-1 => t_n | XXXXXXXXXXXX |
// ---------------------------------------------------------------
// indices | index at t0 | index at t1 |...| index at t_n-1 | index at t_n |
// ---------------------------------------------------------------
// =========================
// Load the last tree file
// =========================
if (verbose) {
printf("\n\n");
printf("====================================\n");
printf(" Loading the last tree file ... ");
fflush(stdout);
}
// Example for ae_common::rec_params->tree_step() == 100 :
//
// tree_000100.ae ==> timesteps 1 to 100.
// tree_000200.ae ==> timesteps 101 to 200.
// tree_000300.ae ==> timesteps 201 to 300.
// etc.
//
#ifdef __REGUL
sprintf(tree_file_name,"tree/tree_" TIMESTEP_FORMAT ".rae", t_end);
#else
sprintf(tree_file_name,"tree/tree_" TIMESTEP_FORMAT ".ae", t_end);
#endif
tree = new Tree(exp_manager, tree_file_name);
if (verbose) {
printf("OK\n");
printf("====================================\n");
}
// ============================================================================
// Find the index of the final individual and retrieve its replication report
// ============================================================================
if (final_indiv_index != -1) {
// The index was directly provided, get the replication report and update the indices and ranks tables
reports[t_end - t0 - 1] =
new ReplicationReport(*(tree->report_by_index(t_end,
final_indiv_index)));
final_indiv_rank = reports[t_end - t0 - 1]->rank();
indices[t_end - t0] = final_indiv_index;
}
else {
if (final_indiv_rank == -1) {
// No index nor rank was given in the command line.
// By default, we construct the lineage of the best individual, the rank of which
// is simply the number of individuals in the population.
final_indiv_rank = exp_manager->nb_indivs();
}
// Retrieve the replication report of the individual of interest (at t_end)
reports[t_end - t0 - 1] =
new ReplicationReport(*(tree->report_by_rank(t_end, final_indiv_rank)));
final_indiv_index = reports[t_end - t0 - 1]->id();
indices[t_end - t0] = final_indiv_index;
//~ ranks[end_gener - begin_gener] = final_indiv_rank;
}
if (verbose) {
printf("The final individual has index %" PRId32
" (rank %" PRId32 ")\n", final_indiv_index, final_indiv_rank);
}
// =======================
// Open the output file
// =======================
char output_file_name[101];
#ifdef __REGUL
snprintf(output_file_name, 100,
"lineage-b" TIMESTEP_FORMAT "-e" TIMESTEP_FORMAT "-i%" PRId32 "-r%" PRId32 ".rae",
t0, t_end, final_indiv_index, final_indiv_rank);
#else
snprintf(output_file_name, 100,
"lineage-b" TIMESTEP_FORMAT "-e" TIMESTEP_FORMAT "-i%" PRId32 "-r%" PRId32 ".ae",
t0, t_end, final_indiv_index, final_indiv_rank);
#endif
gzFile lineage_file = gzopen(output_file_name, "w");
if (lineage_file == nullptr) {
fprintf(stderr, "File %s could not be created.\n", output_file_name);
fprintf(stderr, "Please check your permissions in this directory.\n");
exit(EXIT_FAILURE);
}
// ===================================================
// Retrieve the replication reports of the ancestors
// ===================================================
if (verbose) {
printf("\n\n\n");
printf("======================================================================\n");
printf(" Parsing tree files to retrieve the ancestors' replication reports... \n");
printf("======================================================================\n");
}
// Retrieve the index of the first ancestor from the last replication report
indices[t_end - t0 -1] = reports[t_end - t0 - 1]->parent_id();
// For each generation (going backwards), retrieve the index of the parent and
// the corresponding replication report
for (int64_t i = t_end - t0 - 2 ; i >= 0 ; i--) {
int64_t t = t0 + i + 1;
// We want to fill reports[i], that is to say, how the ancestor
// at generation begin_gener + i + 1 was created
if (verbose)
printf("Getting the replication report for the ancestor at generation %" PRId64 "\n", t);
// If we've exhausted the current tree file, load the next one
if (Utils::mod(t, tree_step) == 0) {
// Change the tree file
delete tree;
#ifdef __REGUL
sprintf(tree_file_name,"tree/tree_" TIMESTEP_FORMAT ".rae", t);
#else
sprintf(tree_file_name,"tree/tree_" TIMESTEP_FORMAT ".ae", t);
#endif
tree = new Tree(exp_manager, tree_file_name);
}
// Copy the replication report of the ancestor
reports[i] =
new ReplicationReport(*(tree->report_by_index(t, indices[i + 1])));
// Retreive the index and rank of the next ancestor from the report
indices[i] = reports[i]->parent_id();
}
delete exp_manager;
if (verbose) printf("OK\n");
// =============================================================================
// Get the initial genome from the backup file and write it in the output file
// =============================================================================
if (verbose) {
printf("\n\n\n");
printf("=============================================== \n");
printf(" Getting the initial genome sequence... ");
fflush(nullptr);
}
// Load the simulation
exp_manager = new ExpManager();
exp_manager->load(t0, true, false);
// Copy the initial ancestor
// NB : The list of individuals is sorted according to the index
const Individual& initial_ancestor = *(exp_manager->indiv_by_id(indices[0]));
// Write file "header"
gzwrite(lineage_file, &t0, sizeof(t0));
gzwrite(lineage_file, &t_end, sizeof(t_end));
gzwrite(lineage_file, &final_indiv_index, sizeof(final_indiv_index));
gzwrite(lineage_file, &final_indiv_rank, sizeof(final_indiv_rank));
initial_ancestor.grid_cell()->save(lineage_file);
if (verbose) {
printf("OK\n");
printf("=============================================== \n");
}
// ===============================================================================
// Write the replication reports of the successive ancestors in the output file
// (and, optionally, check that the rebuilt genome is correct each time a backup
// is available)
// ===============================================================================
if (verbose) {
printf("\n\n\n");
printf("============================================================ \n");
printf(" Write the replication reports in the output file... \n");
printf("============================================================ \n");
}
std::list<GeneticUnit>::const_iterator unit;
Individual* stored_indiv = nullptr;
std::list<GeneticUnit>::const_iterator stored_gen_unit;
ExpManager* exp_manager_backup = nullptr;
// NB: I must keep the genome encapsulated inside an Individual, because
// replaying the mutations has side effects on the list of promoters,
// which is stored in the individual
bool check_genome_now = false;
for (int64_t i = 0 ; i < t_end - t0 ; i++) {
// Where are we in time...
int64_t t = t0 + i + 1;
// Do we need to check the genome now?
check_genome_now = t == t_end ||
(full_check && Utils::mod(t, exp_manager->backup_step()) == 0);
// Write the replication report of the ancestor for current generation
if (verbose) {
printf("Writing the replication report for t= %" PRId64
" (built from indiv %" PRId32 " at t= %" PRId64 ")\n",
t, indices[i], t-1);
}
reports[i]->write_to_tree_file(lineage_file);
if (verbose) printf(" OK\n");
if (check_genome_now) {
// Load the simulation
exp_manager_backup = new ExpManager();
exp_manager_backup->load(t, true, false);
// Copy the ancestor from the backup
stored_indiv = exp_manager_backup->indiv_by_id(indices[i + 1]);
stored_gen_unit = stored_indiv->genetic_unit_list().cbegin();
}
// Warning: this portion of code won't work if the number of units changes
// during the evolution
// Replay the mutations stored in the current replication report on the
// current genome
unit = initial_ancestor.genetic_unit_list().cbegin();
for (const auto& mut: reports[i]->dna_replic_report().HT()) {
(unit->dna())->undergo_this_mutation(*mut);
}
for (const auto& mut: reports[i]->dna_replic_report().rearrangements()) {
(unit->dna())->undergo_this_mutation(*mut);
}
for (const auto& mut: reports[i]->dna_replic_report().mutations()) {
unit->dna()->undergo_this_mutation(*mut);
}
if (check_genome_now) {
if (verbose) {
printf("Checking the sequence of the unit...");
fflush(stdout);
}
assert(stored_gen_unit != stored_indiv->genetic_unit_list().cend());
char * str1 = new char[unit->dna()->length() + 1];
memcpy(str1, unit->dna()->data(),
unit->dna()->length() * sizeof(char));
str1[unit->dna()->length()] = '\0';
char * str2 = new char[stored_gen_unit->dna()->length() + 1];
memcpy(str2, stored_gen_unit->dna()->data(),
stored_gen_unit->dna()->length() * sizeof(char));
str2[stored_gen_unit->dna()->length()] = '\0';
if (strncmp(str1, str2, stored_gen_unit->dna()->length()) == 0) {
if (verbose) printf(" OK\n");
}
else {
if (verbose) printf(" ERROR !\n");
fprintf(stderr, "Error: the rebuilt unit is not the same as \n");
fprintf(stderr, "the one stored in backup file at %" PRId64 "\n", t);
fprintf(stderr, "Rebuilt unit : %" PRId32 " bp\n %s\n",
(int32_t)strlen(str1), str1);
fprintf(stderr, "Stored unit : %" PRId32 " bp\n %s\n",
(int32_t)strlen(str2), str2);
delete [] str1;
delete [] str2;
gzclose(lineage_file);
delete exp_manager_backup;
delete exp_manager;
delete [] reports;
fflush(stdout);
exit(EXIT_FAILURE);
}
delete [] str1;
delete [] str2;
++stored_gen_unit;
}
++unit;
assert(unit == initial_ancestor.genetic_unit_list().cend());
if (check_genome_now) {
assert(stored_gen_unit == stored_indiv->genetic_unit_list().cend());
delete exp_manager_backup;
}
}
gzclose(lineage_file);
delete [] reports;
delete exp_manager;
exit(EXIT_SUCCESS);
}
/**
* \brief print help and exist
*/
void print_help(char* prog_path) {
// Get the program file-name in prog_name (strip prog_path of the path)
char* prog_name; // No new, it will point to somewhere inside prog_path
if ((prog_name = strrchr(prog_path, '/'))) {
prog_name++;
}
else {
prog_name = prog_path;
}
printf("******************************************************************************\n");
printf("* *\n");
printf("* aevol - Artificial Evolution *\n");
printf("* *\n");
printf("* Aevol is a simulation platform that allows one to let populations of *\n");
printf("* digital organisms evolve in different conditions and study experimentally *\n");
printf("* the mechanisms responsible for the structuration of the genome and the *\n");
printf("* transcriptome. *\n");
printf("* *\n");
printf("******************************************************************************\n");
printf("\n");
printf("%s:\n", prog_name);
printf("\tReconstruct the lineage of a given individual from the tree files\n");
printf("\n");
printf("Usage : %s -h or --help\n", prog_name);
printf(" or : %s -V or --version\n", prog_name);
printf(" or : %s [-b TIMESTEP] [-e TIMESTEP] [-I INDEX | -R RANK] [-F] [-v]\n",
prog_name);
printf("\nOptions\n");
printf(" -h, --help\n\tprint this help, then exit\n");
printf(" -V, --version\n\tprint version number, then exit\n");
printf(" -b, --begin TIMESTEP\n");
printf("\tspecify time t0 up to which to reconstruct the lineage\n");
printf(" -e, --end TIMESTEP\n");
printf("\tspecify time t_end of the indiv whose lineage is to be reconstructed\n");
printf(" -I, --index INDEX\n");
printf("\tspecify the index of the indiv whose lineage is to be reconstructed\n");
printf(" -R, --rank RANK\n");
printf("\tspecify the rank of the indiv whose lineage is to be reconstructed\n");
printf(" -F, --full-check\n");
printf("\tperform genome checks whenever possible\n");
printf(" -v, --verbose\n\tbe verbose\n");
}
void interpret_cmd_line_options(int argc, char* argv[]) {
// Define allowed options
const char * short_options = "hVb:e:FI:R:v";
static struct option long_options[] = {
{"help", no_argument, nullptr, 'h'},
{"version", no_argument, nullptr, 'V'},
{"begin", required_argument, nullptr, 'b'},
{"end", required_argument, nullptr, 'e'},
{"fullcheck", no_argument, nullptr, 'F'},
{"index", required_argument, nullptr, 'I'},
{"rank", required_argument, nullptr, 'R'},
{"verbose", no_argument, nullptr, 'v'},
{0, 0, 0, 0}
};
// Get actual values of the command-line options
int option;
while((option = getopt_long(argc, argv, short_options,
long_options, nullptr)) != -1) {
switch(option) {
case 'h' : {
print_help(argv[0]);
exit(EXIT_SUCCESS);
}
case 'V' : {
Utils::PrintAevolVersion();
exit(EXIT_SUCCESS);
}
case 'b' : {
if (strcmp(optarg, "") == 0) {
printf("%s: error: Option -b or --begin : missing argument.\n",
argv[0]);
exit(EXIT_FAILURE);
}
t0 = atol(optarg);
break;
}
case 'e' : {
if (strcmp(optarg, "") == 0) {
printf("%s: error: Option -e or --end : missing argument.\n",
argv[0]);
exit(EXIT_FAILURE);
}
t_end = atol(optarg);
break;
}
case 'F' : {
full_check = true;
break;
}
case 'I' : {
final_indiv_index = atoi(optarg);
break;
}
case 'R' : {
final_indiv_rank = atoi(optarg);
break;
}
case 'v' : {
verbose = true;
break;
}
default : {
// An error message is printed in getopt_long, we just need to exit
exit(EXIT_FAILURE);
}
}
}
// If t_end wasn't provided, use default
if (t_end < 0) {
t_end = OutputManager::last_gener();
}
}