Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Source_Encoding
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dnarXiv
Source_Encoding
Commits
acaba874
Commit
acaba874
authored
3 years ago
by
BOULLE Olivier
Browse files
Options
Downloads
Patches
Plain Diff
comments and refactors
parent
e907312c
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sequence_control.py
+12
-11
12 additions, 11 deletions
sequence_control.py
with
12 additions
and
11 deletions
sequence_control.py
+
12
−
11
View file @
acaba874
...
...
@@ -6,13 +6,12 @@ import source_encoding
import
source_decoding
def
check_homopolymere
(
sequence
,
max_h
):
"""
count the number of homopolymeres in the sequence
count the number of homopolymeres
larger than h_max
in the sequence
"""
h_nbr
=
0
#number of homopolymere larger than h_max found
row_size
=
0
#size of the current row
row_size
=
0
#size of the current row
of consecutive nucleotides
last_nucleotide
=
""
#previous nucleotide in the sequence
for
nucleotide
in
sequence
:
if
nucleotide
==
last_nucleotide
:
...
...
@@ -30,7 +29,7 @@ def check_homopolymere(sequence, max_h):
def
check_GC
(
sequence
,
window_size
):
"""
returns the min and max GC percentage
in
all windows of the sequence
returns the min
imum
and max
imum
GC percentage
for
all
the
windows of the sequence
"""
def
check_GC_Window
(
window
):
...
...
@@ -44,7 +43,7 @@ def check_GC(sequence, window_size):
if
len
(
sequence
)
<=
window_size
:
GC_percent
=
check_GC_Window
(
sequence
)
return
GC_percent
,
GC_percent
return
GC_percent
,
GC_percent
max_GC_percent
=
0
min_GC_percent
=
100
...
...
@@ -77,7 +76,7 @@ def check_loop(sequence, loop_size, window_size):
def
sequence_check
(
sequence
,
window_size
=
60
,
verbose
=
False
):
"""
test if a the conditions for a correct sequence are met
test if a the conditions for a correct sequence are met
, return True if all 3 constraints are valid
"""
h_nbr
=
check_homopolymere
(
sequence
,
3
)
if
verbose
:
print
(
"
number of homopolymere larger than
"
,
3
,
"
:
"
,
h_nbr
)
...
...
@@ -86,7 +85,7 @@ def sequence_check(sequence, window_size=60, verbose=False):
loop_nbr
=
check_loop
(
sequence
,
6
,
window_size
)
if
verbose
:
print
(
"
number of potential loop :
"
,
loop_nbr
)
if
h_nbr
==
0
and
min_GC_percent
>=
4
5
and
max_GC_percent
<=
55
and
loop_nbr
==
0
:
if
h_nbr
==
0
and
min_GC_percent
>=
4
0
and
max_GC_percent
<=
55
and
loop_nbr
==
0
:
if
verbose
:
print
(
"
sequence is correct
"
)
return
True
else
:
...
...
@@ -112,14 +111,15 @@ def hash_until_correct(sequence, start_key=0):
def
find_hash_keys
(
sequence
):
"""
find the keys to hash the sequence to pass the conditions
the sequence is divided in sub sequences and each one is hashed t
o
pass the conditions
the sequence is divided in sub sequences and each one is hashed
until i
t pass
es
the conditions
"""
tot_hash
=
""
hash_keys
=
[]
sub_seq_size
=
2
00
sub_seq_size
=
2
50
#higher -> less keys, but more time consuming
sub_seq_nbr
=
int
(
math
.
ceil
(
len
(
sequence
)
/
sub_seq_size
))
for
i
in
range
(
sub_seq_nbr
):
#display progress bar
k
=
int
(
20
*
len
(
hash_keys
)
/
sub_seq_nbr
)
sys
.
stdout
.
write
(
'
\r
'
)
sys
.
stdout
.
write
(
"
[%-20s] %d%%
"
%
(
'
=
'
*
k
,
5
*
k
))
...
...
@@ -139,7 +139,7 @@ def find_hash_keys(sequence):
print
(
"
\n
"
+
tot_hash
)
print
(
hash_keys
)
sequence_check
(
tot_hash
,
60
,
True
)
sequence_check
(
tot_hash
,
60
,
True
)
#will be valid
def
decode_poeme
(
sequence
):
hash_keys
=
[
2137
,
4123
,
1324
,
833
,
3
]
...
...
@@ -150,7 +150,8 @@ def decode_poeme(sequence):
decoded_sub_seq
=
source_decoding
.
remove_filter
(
sub_seq
,
hash_filter
)
decoded_seq
+=
decoded_sub_seq
print
(
decoded_seq
)
# =================== main ======================= #
if
__name__
==
'
__main__
'
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment