From e656fcf32b6937cd505ed6275e815e86d799f789 Mon Sep 17 00:00:00 2001 From: oboulle <olivier.boulle@inria.fr> Date: Tue, 18 Apr 2023 16:46:01 +0200 Subject: [PATCH] init --- get_clique.cpp | 195 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 get_clique.cpp diff --git a/get_clique.cpp b/get_clique.cpp new file mode 100644 index 0000000..c7d145c --- /dev/null +++ b/get_clique.cpp @@ -0,0 +1,195 @@ + + +// g++ -std=c++11 get_clique.cpp -o get_clique + +#include <iostream> +#include <fstream> +#include <stack> +#include <string> +#include <algorithm> +#include <vector> +#include <map> + +using namespace std; + +class Graph { + int g_size; // No. of vertices + vector<int> *adj; // An array of adjacency lists + vector<int> test_clique(vector<pair<int, int>> sorted_node_edges, int node_index, vector<int> clique_list); + + +public: + Graph(int g_size); + void add_edge(int node_a, int node_b); + + // get a big clique with an heuristique + vector<int> get_clique(); +}; + +Graph::Graph(int g_size) { + this->g_size = g_size; + adj = new vector<int>[g_size]; +} + + +void Graph::add_edge(int node_a, int node_b) { + adj[node_a].push_back(node_b); // Add w to v’s list. +} + +//sort the vector by edge number + + +vector<int> Graph::get_clique() { + // heuristique here (maximum clique is NP-complete) + // sort nodes by number of edges + // start a clique from the node with most edges + // loop over next nodes with the most edges + // add to the clique if it is linked to all members + + + vector<pair<int, int>> node_edges; //number of edges for each node + + // fill the vector + for (int node = 0; node < g_size; node++) { + node_edges.push_back(make_pair(node, adj[node].size())); + } + + // sort by number of edges + sort(node_edges.begin(), node_edges.end(), + [](const pair<int,int> &a, const pair<int,int> &b) { // lambda function to sort vector pair by 2nd element descending + return (a.second > b.second); + }); + + + vector<int> clique_list; + // fill the clique + clique_list = test_clique(node_edges, 0, clique_list); + + return clique_list; +} + + +vector<int> Graph::test_clique(vector<pair<int, int>> sorted_node_edges, int node_index, vector<int> clique_list) { + // test if a node can be added to the clique list + // continue the test to the next node with the updated clique list + + if (node_index > sorted_node_edges.size()) { + return clique_list; + } + + int tested_node = sorted_node_edges[node_index].first; + + for (int i = 0; i < clique_list.size(); i++) { + vector<int> neigboors_array = adj[clique_list[i]]; // list of neigboors of this node + + if (find(begin(neigboors_array), end(neigboors_array), tested_node) == end(neigboors_array)) { // tested node is not linked to a node from the clique + return test_clique(sorted_node_edges, node_index+1, clique_list); // cannot be added to the clique -> test the next node + } + } + // the tested node is linked to every node in the clique + clique_list.push_back(tested_node); // add to the clique + return test_clique(sorted_node_edges, node_index+1, clique_list); // test the next node +} + + +string rev_comp(string sequence) { + reverse(sequence.begin(), sequence.end()); + for (int i = 0; i < sequence.length(); ++i) { + switch (sequence[i]) { + case 'A': + sequence[i] = 'T'; + break; + case 'C': + sequence[i] = 'G'; + break; + case 'G': + sequence[i] = 'C'; + break; + case 'T': + sequence[i] = 'A'; + break; + } + } + return sequence; +} + +bool test_hybridation(string primer_a, string primer_b) { + + int max_hybridisation_value = 4; + int primer_size = 20; + for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) { + string kmer_a = primer_a.substr(i, max_hybridisation_value); + + if (primer_b.find(kmer_a) != string::npos) { + //cout << primer_b << " " << kmer_a << endl; + return false; + } + } + string rev_primer_a = rev_comp(primer_a); + for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) { + string kmer_rev_a = rev_primer_a.substr(i, max_hybridisation_value); + if (primer_b.find(kmer_rev_a) != string::npos) { + //cout << primer_b << " " << kmer_rev_a << endl; + return false; + } + } + return true; +} + + +Graph compute_hybridation(vector<string> primers_list) { + int primers_n = primers_list.size(); + + Graph g(primers_n); + + for (int i = 0; i < primers_n; ++i) { + string primer_a = primers_list[i]; + + for (int j = i+1; j < primers_n; ++j) { + string primer_b = primers_list[j]; + //cout << "a"; + if (test_hybridation(primer_a, primer_b)) { + g.add_edge(i,j); + g.add_edge(j,i); + } + } + } + return g; +} + + +int main() { + + // read list of primers + vector<string> primers_list; + + ifstream input_file; + input_file.open("primer_generator/temp/checked_primers.txt"); + string myline; + if ( input_file.is_open() ) { + while ( input_file ) { + getline (input_file, myline); + if (myline.size() > 0) // last line is empty + primers_list.push_back(myline); + } + } else { + cout << "Couldn't open file\n"; + } + // use indexes of primers in the list + Graph g = compute_hybridation(primers_list); + cout << "get clique : hybridation graph created" << endl; + vector<int> clique_list = g.get_clique(); + cout << "get clique : found clique of " << clique_list.size() << " primers" << endl; + + + ofstream output_file; + output_file.open ("test_primers.txt"); + for(const int& node : clique_list) { + output_file << primers_list[node] << "\n"; + } + output_file.close(); + + + return 0; +} + -- GitLab