Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 9ab558df authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

cheaper way to get clique

parent e656fcf3
Branches
No related tags found
No related merge requests found
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <stack> #include <stack>
#include <string> #include <string>
#include <algorithm> #include <algorithm>
#include <random>
#include <chrono>
#include <vector> #include <vector>
#include <map> #include <map>
...@@ -23,7 +25,7 @@ public: ...@@ -23,7 +25,7 @@ public:
void add_edge(int node_a, int node_b); void add_edge(int node_a, int node_b);
// get a big clique with an heuristique // get a big clique with an heuristique
vector<int> get_clique(); vector<int> get_clique_from_graph();
}; };
Graph::Graph(int g_size) { Graph::Graph(int g_size) {
...@@ -39,7 +41,7 @@ void Graph::add_edge(int node_a, int node_b) { ...@@ -39,7 +41,7 @@ void Graph::add_edge(int node_a, int node_b) {
//sort the vector by edge number //sort the vector by edge number
vector<int> Graph::get_clique() { vector<int> Graph::get_clique_from_graph() {
// heuristique here (maximum clique is NP-complete) // heuristique here (maximum clique is NP-complete)
// sort nodes by number of edges // sort nodes by number of edges
// start a clique from the node with most edges // start a clique from the node with most edges
...@@ -114,20 +116,21 @@ string rev_comp(string sequence) { ...@@ -114,20 +116,21 @@ string rev_comp(string sequence) {
} }
bool test_hybridation(string primer_a, string primer_b) { bool test_hybridation(string primer_a, string primer_b) {
// true if the 2 primers do not have any hybridation >=4
int max_hybridisation_value = 4;
int max_hybridisation_value = 6; // maximum autorized number of consecutive common bases
int primer_size = 20; int primer_size = 20;
for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) { string rev_primer_a = rev_comp(primer_a);
string kmer_a = primer_a.substr(i, max_hybridisation_value);
for (int i = 0; i <= primer_size-max_hybridisation_value-1; ++i) {
string kmer_a = primer_a.substr(i, max_hybridisation_value+1);
if (primer_b.find(kmer_a) != string::npos) { if (primer_b.find(kmer_a) != string::npos) {
//cout << primer_b << " " << kmer_a << endl; //cout << primer_b << " " << kmer_a << endl;
return false; return false;
} }
} string kmer_rev_a = rev_primer_a.substr(i, max_hybridisation_value+1);
string rev_primer_a = rev_comp(primer_a);
for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) {
string kmer_rev_a = rev_primer_a.substr(i, max_hybridisation_value);
if (primer_b.find(kmer_rev_a) != string::npos) { if (primer_b.find(kmer_rev_a) != string::npos) {
//cout << primer_b << " " << kmer_rev_a << endl; //cout << primer_b << " " << kmer_rev_a << endl;
return false; return false;
...@@ -138,6 +141,8 @@ bool test_hybridation(string primer_a, string primer_b) { ...@@ -138,6 +141,8 @@ bool test_hybridation(string primer_a, string primer_b) {
Graph compute_hybridation(vector<string> primers_list) { Graph compute_hybridation(vector<string> primers_list) {
// generate a graph of primers intercompatiblity
// too long for >20k primers
int primers_n = primers_list.size(); int primers_n = primers_list.size();
Graph g(primers_n); Graph g(primers_n);
...@@ -158,13 +163,47 @@ Graph compute_hybridation(vector<string> primers_list) { ...@@ -158,13 +163,47 @@ Graph compute_hybridation(vector<string> primers_list) {
} }
int main() { vector<string> get_clique(vector<string> primers_list) {
// since it is too long to generate the graph of intercompatibility between primers
// start from a random primer and add the others to the clique by testing hybridation
vector<string> clique_list;
bool compatible;
for(const string& tested_primer : primers_list) {
compatible = true;
for(const string& clique_primer : clique_list) {
if (test_hybridation(tested_primer, clique_primer) == false) { // tested node is not linked to a node from the clique
compatible = false;
break; // cannot be added to the clique -> test the next node
}
}
// the tested node is linked to every node in the clique
if (compatible) {
clique_list.push_back(tested_primer); // add to the clique
}
}
return clique_list;
}
int main(int argc,char* argv[]) {
//args = primers_list.txt clique_size
if(argc != 4) {
printf("usage : get_clique primers_list.txt output_path.txt clique_size\n");
return 1;
}
printf("get clique of compatible primers...\n");
string primers_path = argv[1]; //txt file of the primers //"primer_generator/temp/checked_primers.txt"
string output_path = argv[2];
int clique_size = stoi(argv[3]); // size of the clique to find
// read list of primers // read list of primers
vector<string> primers_list; vector<string> primers_list;
ifstream input_file; ifstream input_file;
input_file.open("primer_generator/temp/checked_primers.txt"); input_file.open(primers_path);
string myline; string myline;
if ( input_file.is_open() ) { if ( input_file.is_open() ) {
while ( input_file ) { while ( input_file ) {
...@@ -175,21 +214,34 @@ int main() { ...@@ -175,21 +214,34 @@ int main() {
} else { } else {
cout << "Couldn't open file\n"; cout << "Couldn't open file\n";
} }
// use indexes of primers in the list
Graph g = compute_hybridation(primers_list);
cout << "get clique : hybridation graph created" << endl;
vector<int> clique_list = g.get_clique();
cout << "get clique : found clique of " << clique_list.size() << " primers" << endl;
vector<string> clique_list;
for( int i = 1; i <= 10; i++) {
// get a time-based seed
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
// random shuffle of the primers list
std::shuffle(std::begin(primers_list), std::end(primers_list), std::default_random_engine(seed));
// generate a clique
clique_list = get_clique(primers_list);
cout << "get clique : " << i << "/10; found clique of " << clique_list.size() << " primers" << endl;
if (clique_list.size() >= clique_size) {
// save the resulting clique
ofstream output_file;
output_file.open (output_path);
for(const string& primer : clique_list) {
output_file << primer << "\n";
}
output_file.close();
// successfull end
return 0;
}
}
ofstream output_file; cout << "get clique : couldn't find a clique of the required size" << endl;
output_file.open ("test_primers.txt");
for(const int& node : clique_list) {
output_file << primers_list[node] << "\n";
}
output_file.close();
return 0; // failed end
return 1;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment