Mentions légales du service

Skip to content
Snippets Groups Projects
Commit acaba874 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

comments and refactors

parent e907312c
No related branches found
No related tags found
No related merge requests found
......@@ -6,13 +6,12 @@ import source_encoding
import source_decoding
def check_homopolymere(sequence, max_h):
"""
count the number of homopolymeres in the sequence
count the number of homopolymeres larger than h_max in the sequence
"""
h_nbr = 0 #number of homopolymere larger than h_max found
row_size = 0 #size of the current row
row_size = 0 #size of the current row of consecutive nucleotides
last_nucleotide = "" #previous nucleotide in the sequence
for nucleotide in sequence:
if nucleotide == last_nucleotide:
......@@ -30,7 +29,7 @@ def check_homopolymere(sequence, max_h):
def check_GC(sequence, window_size):
"""
returns the min and max GC percentage in all windows of the sequence
returns the minimum and maximum GC percentage for all the windows of the sequence
"""
def check_GC_Window(window):
......@@ -44,7 +43,7 @@ def check_GC(sequence, window_size):
if len(sequence) <= window_size:
GC_percent = check_GC_Window(sequence)
return GC_percent,GC_percent
return GC_percent, GC_percent
max_GC_percent = 0
min_GC_percent = 100
......@@ -77,7 +76,7 @@ def check_loop(sequence, loop_size, window_size):
def sequence_check(sequence, window_size=60, verbose=False):
"""
test if a the conditions for a correct sequence are met
test if a the conditions for a correct sequence are met, return True if all 3 constraints are valid
"""
h_nbr = check_homopolymere(sequence, 3)
if verbose: print("number of homopolymere larger than",3,":",h_nbr)
......@@ -86,7 +85,7 @@ def sequence_check(sequence, window_size=60, verbose=False):
loop_nbr = check_loop(sequence, 6, window_size)
if verbose: print("number of potential loop :",loop_nbr)
if h_nbr == 0 and min_GC_percent >= 45 and max_GC_percent <= 55 and loop_nbr == 0:
if h_nbr == 0 and min_GC_percent >= 40 and max_GC_percent <= 55 and loop_nbr == 0:
if verbose: print("sequence is correct")
return True
else:
......@@ -112,14 +111,15 @@ def hash_until_correct(sequence, start_key=0):
def find_hash_keys(sequence):
"""
find the keys to hash the sequence to pass the conditions
the sequence is divided in sub sequences and each one is hashed to pass the conditions
the sequence is divided in sub sequences and each one is hashed until it passes the conditions
"""
tot_hash = ""
hash_keys = []
sub_seq_size = 200
sub_seq_size = 250 #higher -> less keys, but more time consuming
sub_seq_nbr = int(math.ceil(len(sequence)/sub_seq_size))
for i in range(sub_seq_nbr):
#display progress bar
k=int(20*len(hash_keys)/sub_seq_nbr)
sys.stdout.write('\r')
sys.stdout.write("[%-20s] %d%%" % ('='*k, 5*k))
......@@ -139,7 +139,7 @@ def find_hash_keys(sequence):
print("\n"+tot_hash)
print(hash_keys)
sequence_check(tot_hash, 60, True)
sequence_check(tot_hash, 60, True) #will be valid
def decode_poeme(sequence):
hash_keys = [2137,4123,1324,833,3]
......@@ -150,7 +150,8 @@ def decode_poeme(sequence):
decoded_sub_seq = source_decoding.remove_filter(sub_seq, hash_filter)
decoded_seq += decoded_sub_seq
print(decoded_seq)
# =================== main ======================= #
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment