From e656fcf32b6937cd505ed6275e815e86d799f789 Mon Sep 17 00:00:00 2001
From: oboulle <olivier.boulle@inria.fr>
Date: Tue, 18 Apr 2023 16:46:01 +0200
Subject: [PATCH] init

---
 get_clique.cpp | 195 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 get_clique.cpp

diff --git a/get_clique.cpp b/get_clique.cpp
new file mode 100644
index 0000000..c7d145c
--- /dev/null
+++ b/get_clique.cpp
@@ -0,0 +1,195 @@
+
+
+// g++ -std=c++11 get_clique.cpp -o get_clique
+
+#include <iostream>
+#include <fstream>
+#include <stack>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include <map>
+
+using namespace std;
+ 
+class Graph {
+    int g_size;    // No. of vertices
+    vector<int> *adj;    // An array of adjacency lists
+    vector<int> test_clique(vector<pair<int, int>> sorted_node_edges, int node_index, vector<int> clique_list);
+
+    
+public:
+    Graph(int g_size);
+    void add_edge(int node_a, int node_b);
+ 
+    // get a big clique with an heuristique
+    vector<int> get_clique();
+};
+ 
+Graph::Graph(int g_size) {
+    this->g_size = g_size;
+    adj = new vector<int>[g_size];
+}
+ 
+
+void Graph::add_edge(int node_a, int node_b) {
+    adj[node_a].push_back(node_b); // Add w to v’s list.
+}
+
+//sort the vector by edge number
+
+	
+vector<int> Graph::get_clique() {
+	// heuristique here (maximum clique is NP-complete)
+	// sort nodes by number of edges
+	// start a clique from the node with most edges
+	// loop over next nodes with the most edges
+	// add to the clique if it is linked to all members 
+
+	
+	vector<pair<int, int>> node_edges; //number of edges for each node
+	
+	// fill the vector
+	for (int node = 0; node < g_size; node++) { 
+		node_edges.push_back(make_pair(node, adj[node].size()));
+	}
+	
+	// sort by number of edges
+	sort(node_edges.begin(), node_edges.end(), 
+			[](const pair<int,int> &a, const pair<int,int> &b) { // lambda function to sort vector pair by 2nd element descending
+		 		return (a.second > b.second);
+			}); 
+	
+	
+	vector<int> clique_list;
+	// fill the clique
+    clique_list = test_clique(node_edges, 0, clique_list);
+    
+    return clique_list;
+}
+
+	
+vector<int> Graph::test_clique(vector<pair<int, int>> sorted_node_edges, int node_index, vector<int> clique_list) {
+	// test if a node can be added to the clique list
+	// continue the test to the next node with the updated clique list
+	
+	if (node_index > sorted_node_edges.size()) {
+		return clique_list;
+	}
+	
+	int tested_node = sorted_node_edges[node_index].first;
+	
+	for (int i = 0; i < clique_list.size(); i++) {
+		vector<int> neigboors_array = adj[clique_list[i]]; // list of neigboors of this node
+		
+		if (find(begin(neigboors_array), end(neigboors_array), tested_node) == end(neigboors_array)) { // tested node is not linked to a node from the clique
+			return test_clique(sorted_node_edges, node_index+1, clique_list); // cannot be added to the clique -> test the next node
+		}
+	}
+	// the tested node is linked to every node in the clique
+	clique_list.push_back(tested_node); // add to the clique
+	return test_clique(sorted_node_edges, node_index+1, clique_list); // test the next node
+}
+
+
+string rev_comp(string sequence) {
+	reverse(sequence.begin(), sequence.end());
+    for (int i = 0; i < sequence.length(); ++i) {
+        switch (sequence[i]) {
+        case 'A':
+        	sequence[i] = 'T';
+            break;    
+        case 'C':
+        	sequence[i] = 'G';
+            break;
+        case 'G':
+        	sequence[i] = 'C';
+            break;
+        case 'T':
+        	sequence[i] = 'A';
+            break;
+        }
+    }
+    return sequence;
+}
+
+bool test_hybridation(string primer_a, string primer_b) {
+
+	int max_hybridisation_value = 4;
+	int primer_size = 20;
+	for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) {
+		string kmer_a = primer_a.substr(i, max_hybridisation_value);
+		
+		if (primer_b.find(kmer_a) != string::npos) {
+			//cout << primer_b << " " << kmer_a << endl;
+			return false;
+		}
+	}
+	string rev_primer_a = rev_comp(primer_a);
+	for (int i = 0; i <= primer_size-max_hybridisation_value; ++i) {
+		string kmer_rev_a = rev_primer_a.substr(i, max_hybridisation_value);
+		if (primer_b.find(kmer_rev_a) != string::npos) {
+			//cout << primer_b << " " << kmer_rev_a << endl;
+			return false;
+		}
+	}
+	return true;
+}
+
+
+Graph compute_hybridation(vector<string> primers_list) {
+	int primers_n = primers_list.size();
+	
+	Graph g(primers_n);
+	
+	for (int i = 0; i < primers_n; ++i) {
+		string primer_a = primers_list[i];
+		
+		for (int j = i+1; j < primers_n; ++j) {
+			string primer_b = primers_list[j];
+			//cout << "a";
+			if (test_hybridation(primer_a, primer_b)) {
+				g.add_edge(i,j);
+				g.add_edge(j,i);
+			}
+		}
+	}
+	return g;
+}
+
+
+int main() {
+	
+	// read list of primers
+	vector<string> primers_list;
+	
+	ifstream input_file;
+	input_file.open("primer_generator/temp/checked_primers.txt");
+	string myline;
+	if ( input_file.is_open() ) {
+		while ( input_file ) {
+			getline (input_file, myline);
+			if (myline.size() > 0) // last line is empty
+				primers_list.push_back(myline);
+		}
+	} else {
+		cout << "Couldn't open file\n";
+	}
+	// use indexes of primers in the list
+	Graph g = compute_hybridation(primers_list);
+	cout << "get clique : hybridation graph created" << endl;
+	vector<int> clique_list = g.get_clique();
+	cout << "get clique : found clique of " << clique_list.size() << " primers" << endl;
+
+	
+	ofstream output_file;
+	output_file.open ("test_primers.txt");
+	for(const int& node : clique_list) {
+		output_file << primers_list[node] << "\n";
+    }
+	output_file.close();
+	
+	
+    return 0;
+}
+
-- 
GitLab