Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Synthesis_Modules
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dnarXiv
Synthesis_Modules
Commits
9ab558df
Commit
9ab558df
authored
2 years ago
by
BOULLE Olivier
Browse files
Options
Downloads
Patches
Plain Diff
cheaper way to get clique
parent
e656fcf3
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
get_clique.cpp
+77
-25
77 additions, 25 deletions
get_clique.cpp
with
77 additions
and
25 deletions
get_clique.cpp
+
77
−
25
View file @
9ab558df
...
@@ -7,6 +7,8 @@
...
@@ -7,6 +7,8 @@
#include
<stack>
#include
<stack>
#include
<string>
#include
<string>
#include
<algorithm>
#include
<algorithm>
#include
<random>
#include
<chrono>
#include
<vector>
#include
<vector>
#include
<map>
#include
<map>
...
@@ -23,7 +25,7 @@ public:
...
@@ -23,7 +25,7 @@ public:
void
add_edge
(
int
node_a
,
int
node_b
);
void
add_edge
(
int
node_a
,
int
node_b
);
// get a big clique with an heuristique
// get a big clique with an heuristique
vector
<
int
>
get_clique
();
vector
<
int
>
get_clique
_from_graph
();
};
};
Graph
::
Graph
(
int
g_size
)
{
Graph
::
Graph
(
int
g_size
)
{
...
@@ -39,7 +41,7 @@ void Graph::add_edge(int node_a, int node_b) {
...
@@ -39,7 +41,7 @@ void Graph::add_edge(int node_a, int node_b) {
//sort the vector by edge number
//sort the vector by edge number
vector
<
int
>
Graph
::
get_clique
()
{
vector
<
int
>
Graph
::
get_clique
_from_graph
()
{
// heuristique here (maximum clique is NP-complete)
// heuristique here (maximum clique is NP-complete)
// sort nodes by number of edges
// sort nodes by number of edges
// start a clique from the node with most edges
// start a clique from the node with most edges
...
@@ -114,20 +116,21 @@ string rev_comp(string sequence) {
...
@@ -114,20 +116,21 @@ string rev_comp(string sequence) {
}
}
bool
test_hybridation
(
string
primer_a
,
string
primer_b
)
{
bool
test_hybridation
(
string
primer_a
,
string
primer_b
)
{
// true if the 2 primers do not have any hybridation >=4
int
max_hybridisation_value
=
4
;
int
max_hybridisation_value
=
6
;
// maximum autorized number of consecutive common bases
int
primer_size
=
20
;
int
primer_size
=
20
;
for
(
int
i
=
0
;
i
<=
primer_size
-
max_hybridisation_value
;
++
i
)
{
string
rev_primer_a
=
rev_comp
(
primer_a
);
string
kmer_a
=
primer_a
.
substr
(
i
,
max_hybridisation_value
);
for
(
int
i
=
0
;
i
<=
primer_size
-
max_hybridisation_value
-
1
;
++
i
)
{
string
kmer_a
=
primer_a
.
substr
(
i
,
max_hybridisation_value
+
1
);
if
(
primer_b
.
find
(
kmer_a
)
!=
string
::
npos
)
{
if
(
primer_b
.
find
(
kmer_a
)
!=
string
::
npos
)
{
//cout << primer_b << " " << kmer_a << endl;
//cout << primer_b << " " << kmer_a << endl;
return
false
;
return
false
;
}
}
}
string
kmer_rev_a
=
rev_primer_a
.
substr
(
i
,
max_hybridisation_value
+
1
);
string
rev_primer_a
=
rev_comp
(
primer_a
);
for
(
int
i
=
0
;
i
<=
primer_size
-
max_hybridisation_value
;
++
i
)
{
string
kmer_rev_a
=
rev_primer_a
.
substr
(
i
,
max_hybridisation_value
);
if
(
primer_b
.
find
(
kmer_rev_a
)
!=
string
::
npos
)
{
if
(
primer_b
.
find
(
kmer_rev_a
)
!=
string
::
npos
)
{
//cout << primer_b << " " << kmer_rev_a << endl;
//cout << primer_b << " " << kmer_rev_a << endl;
return
false
;
return
false
;
...
@@ -138,6 +141,8 @@ bool test_hybridation(string primer_a, string primer_b) {
...
@@ -138,6 +141,8 @@ bool test_hybridation(string primer_a, string primer_b) {
Graph
compute_hybridation
(
vector
<
string
>
primers_list
)
{
Graph
compute_hybridation
(
vector
<
string
>
primers_list
)
{
// generate a graph of primers intercompatiblity
// too long for >20k primers
int
primers_n
=
primers_list
.
size
();
int
primers_n
=
primers_list
.
size
();
Graph
g
(
primers_n
);
Graph
g
(
primers_n
);
...
@@ -158,13 +163,47 @@ Graph compute_hybridation(vector<string> primers_list) {
...
@@ -158,13 +163,47 @@ Graph compute_hybridation(vector<string> primers_list) {
}
}
int
main
()
{
vector
<
string
>
get_clique
(
vector
<
string
>
primers_list
)
{
// since it is too long to generate the graph of intercompatibility between primers
// start from a random primer and add the others to the clique by testing hybridation
vector
<
string
>
clique_list
;
bool
compatible
;
for
(
const
string
&
tested_primer
:
primers_list
)
{
compatible
=
true
;
for
(
const
string
&
clique_primer
:
clique_list
)
{
if
(
test_hybridation
(
tested_primer
,
clique_primer
)
==
false
)
{
// tested node is not linked to a node from the clique
compatible
=
false
;
break
;
// cannot be added to the clique -> test the next node
}
}
// the tested node is linked to every node in the clique
if
(
compatible
)
{
clique_list
.
push_back
(
tested_primer
);
// add to the clique
}
}
return
clique_list
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
//args = primers_list.txt clique_size
if
(
argc
!=
4
)
{
printf
(
"usage : get_clique primers_list.txt output_path.txt clique_size
\n
"
);
return
1
;
}
printf
(
"get clique of compatible primers...
\n
"
);
string
primers_path
=
argv
[
1
];
//txt file of the primers //"primer_generator/temp/checked_primers.txt"
string
output_path
=
argv
[
2
];
int
clique_size
=
stoi
(
argv
[
3
]);
// size of the clique to find
// read list of primers
// read list of primers
vector
<
string
>
primers_list
;
vector
<
string
>
primers_list
;
ifstream
input_file
;
ifstream
input_file
;
input_file
.
open
(
"
primer
_generator/temp/checked_primers.txt"
);
input_file
.
open
(
primer
s_path
);
string
myline
;
string
myline
;
if
(
input_file
.
is_open
()
)
{
if
(
input_file
.
is_open
()
)
{
while
(
input_file
)
{
while
(
input_file
)
{
...
@@ -175,21 +214,34 @@ int main() {
...
@@ -175,21 +214,34 @@ int main() {
}
else
{
}
else
{
cout
<<
"Couldn't open file
\n
"
;
cout
<<
"Couldn't open file
\n
"
;
}
}
// use indexes of primers in the list
Graph
g
=
compute_hybridation
(
primers_list
);
cout
<<
"get clique : hybridation graph created"
<<
endl
;
vector
<
int
>
clique_list
=
g
.
get_clique
();
cout
<<
"get clique : found clique of "
<<
clique_list
.
size
()
<<
" primers"
<<
endl
;
vector
<
string
>
clique_list
;
for
(
int
i
=
1
;
i
<=
10
;
i
++
)
{
// get a time-based seed
unsigned
seed
=
std
::
chrono
::
system_clock
::
now
().
time_since_epoch
().
count
();
// random shuffle of the primers list
std
::
shuffle
(
std
::
begin
(
primers_list
),
std
::
end
(
primers_list
),
std
::
default_random_engine
(
seed
));
// generate a clique
clique_list
=
get_clique
(
primers_list
);
cout
<<
"get clique : "
<<
i
<<
"/10; found clique of "
<<
clique_list
.
size
()
<<
" primers"
<<
endl
;
if
(
clique_list
.
size
()
>=
clique_size
)
{
// save the resulting clique
ofstream
output_file
;
output_file
.
open
(
output_path
);
for
(
const
string
&
primer
:
clique_list
)
{
output_file
<<
primer
<<
"
\n
"
;
}
output_file
.
close
();
// successfull end
return
0
;
}
}
ofstream
output_file
;
cout
<<
"get clique : couldn't find a clique of the required size"
<<
endl
;
output_file
.
open
(
"test_primers.txt"
);
for
(
const
int
&
node
:
clique_list
)
{
output_file
<<
primers_list
[
node
]
<<
"
\n
"
;
}
output_file
.
close
();
return
0
;
// failed end
return
1
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment