Mentions légales du service

Skip to content
Snippets Groups Projects
Commit c88849e0 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

improved compatible pair selection, added more comments

parent 914ae312
No related branches found
No related tags found
No related merge requests found
...@@ -16,7 +16,7 @@ import dna_file_reader as dfr ...@@ -16,7 +16,7 @@ import dna_file_reader as dfr
""" """
manage primers generation and selection, manage primers generation and selection,
use the primer_generator script use the dna_storage_primer_tools script from
""" """
def generate_compatible_primers(primer_number: int, output_primers_path=None): def generate_compatible_primers(primer_number: int, output_primers_path=None):
...@@ -30,41 +30,38 @@ def generate_compatible_primers(primer_number: int, output_primers_path=None): ...@@ -30,41 +30,38 @@ def generate_compatible_primers(primer_number: int, output_primers_path=None):
primers_gen_script = primer_generator_dir+"/bin/DSPgen" # path to the script primers_gen_script = primer_generator_dir+"/bin/DSPgen" # path to the script
primers_param_path = primer_generator_dir+"/data/param_dnarxiv_V2.txt" # file containing the parameters for the primer_generator scripts primers_param_path = primer_generator_dir+"/data/param_dnarxiv_V2.txt" # file containing the parameters for the primer_generator scripts
if output_primers_path is None: if output_primers_path is None:
output_primers_path = primer_generator_dir+"/data/temp/primers.fasta" # output path for generated primers output_primers_path = primer_generator_dir+"/data/temp/primers.fasta" # default output path for generated primers
scm_option = "1" # salt/magnesium correction method: 0 = Santalucia | 1 = Owczarzy | 2 = IDT scm_option = "1" # salt/magnesium correction method: 0 = Santalucia | 1 = Owczarzy | 2 = IDT
SEED = "0" SEED = "0"
#TODO enable dtg ?
# generate a file containing a lot of primers fullfilling the general conditions set in param_dnarxiv.txt # generate a file containing a lot of primers fullfilling the general conditions set in param_dnarxiv.txt
primers_gen_command = primers_gen_script+' '+str(primer_number)+' '+primers_param_path+' '+output_primers_path+' -scm '+scm_option+' -seed '+SEED+' -dtg 0' primers_gen_command = primers_gen_script+' '+str(primer_number)+' '+primers_param_path+' '+output_primers_path+' -scm '+scm_option+' -seed '+SEED+' -dtg 1'
subprocess.call('/bin/bash -c "$PRIMERGEN"', shell=True, env={'PRIMERGEN': primers_gen_command}) subprocess.call('/bin/bash -c "$PRIMERGEN"', shell=True, env={'PRIMERGEN': primers_gen_command})
def filter_primers_with_oligos(oligo_path, input_primers_path=None, output_filtered_primers_path=None): def filter_primers_with_oligos(oligo_path, input_primers_path=None, output_filtered_primers_path=None):
""" """
select primers without hybridation with the encoded data select primers without hybridation with the encoded oligos
""" """
#---call the hybridization check script---#
primer_generator_dir = currentdir+"/dna-storage-primer-tools" primer_generator_dir = currentdir+"/dna-storage-primer-tools"
primers_filter_script = primer_generator_dir+"/bin/DSPhyb" # path to the script primers_filter_script = primer_generator_dir+"/bin/DSPhyb" # path to the script
primers_param_path = primer_generator_dir+"/data/param_dnarxiv_V2.txt" # file containing the parameters for the primer_generator scripts primers_param_path = primer_generator_dir+"/data/param_dnarxiv_V2.txt" # file containing the parameters for the primer_generator scripts
hybridation_results = primer_generator_dir+"/data/temp/hybridations.txt" hybridation_results = primer_generator_dir+"/data/temp/hybridations.txt" # result file listing primers with hybridizations
if input_primers_path is None: if input_primers_path is None:
input_primers_path = primer_generator_dir+"/data/temp/primers.fasta" # input path for generated primers from generate_compatible_primers() input_primers_path = primer_generator_dir+"/data/temp/primers.fasta" # default path for input generated primers from generate_compatible_primers()
if output_filtered_primers_path is None: if output_filtered_primers_path is None:
output_filtered_primers_path = primer_generator_dir+"/data/temp/compatible_primers.fasta" # output path for filtered primers that doesn't hybridize with the assembly output_filtered_primers_path = primer_generator_dir+"/data/temp/compatible_primers.fasta" # default path for output filtered primers that doesn't hybridize with the oligos
# filter the primers that hybridize with the whole sequence of encoded data assembled with the overhangs
filter_primers_command = primers_filter_script+' '+primers_param_path+' '+input_primers_path+' '+oligo_path+' '+hybridation_results filter_primers_command = primers_filter_script+' '+primers_param_path+' '+input_primers_path+' '+oligo_path+' '+hybridation_results
subprocess.call('/bin/bash -c "$PRIMERFILTER"', shell=True, env={'PRIMERFILTER': filter_primers_command}) subprocess.call('/bin/bash -c "$PRIMERFILTER"', shell=True, env={'PRIMERFILTER': filter_primers_command})
#print() # needed cause the printer go at the start of the last printed line
# get the list of hybrided primers from the output file
#---get the list of hybrided primers from the output file---#
with open(hybridation_results, 'r') as file: with open(hybridation_results, 'r') as file:
list_of_rejected_primers = [] list_of_rejected_primers = []
line = file.readline() line = file.readline()
...@@ -86,28 +83,30 @@ def filter_primers_with_oligos(oligo_path, input_primers_path=None, output_filte ...@@ -86,28 +83,30 @@ def filter_primers_with_oligos(oligo_path, input_primers_path=None, output_filte
checked_primers_dict = dict((name, primer) for name, primer in primers_dict.items() if sequence_control.sequence_check(primer)) # check for potential constraints in primers checked_primers_dict = dict((name, primer) for name, primer in primers_dict.items() if sequence_control.sequence_check(primer)) # check for potential constraints in primers
#random.shuffle(checked_primers_list) #random.shuffle(checked_primers_list)
# save the non hybrided primers
dfr.save_dict_to_fasta(checked_primers_dict, output_filtered_primers_path) dfr.save_dict_to_fasta(checked_primers_dict, output_filtered_primers_path)
def test_hybridation_and_GC(primer: str, selected_primers_list: list, max_hybridisation_value=4) -> bool: def test_hybridation_and_GC(primer: str, selected_primers_list: list, max_hybridisation_value=4) -> bool:
""" """
test the hybridisation and GC% of the primer with a list of other primers test the hybridisation and GC% of the primer with a list of other primers
return True if same GC and no hybriization found with any other primers
""" """
primer_rc = dfr.reverse_complement(primer) # get the reverse complement of the primer primer_rc = dfr.reverse_complement(primer) # get the reverse complement of the primer
for selected_primer in selected_primers_list: for other_primer in selected_primers_list:
# test if the GC% is the same # test if the GC% is the same
if not compare_GC(primer, selected_primer): if not compare_GC(primer, other_primer):
return False return False
for i in range(len(primer)-max_hybridisation_value): for i in range(len(primer)-max_hybridisation_value):
# test if any part of the primer is contained in another primer from the list (= hybridisation) # test if any part of the primer is contained in another primer from the list (= hybridisation)
if primer[i:i+max_hybridisation_value+1] in selected_primer: if primer[i:i+max_hybridisation_value+1] in other_primer:
return False return False
# test also for the reverse complement # test also for the reverse complement
if primer_rc[i:i+max_hybridisation_value+1] in selected_primer: if primer_rc[i:i+max_hybridisation_value+1] in other_primer:
return False return False
# return true if no hybridisation at all
return True return True
...@@ -124,21 +123,24 @@ def compare_GC(primer_A: str, primer_B: str) -> bool: ...@@ -124,21 +123,24 @@ def compare_GC(primer_A: str, primer_B: str) -> bool:
def get_pair_with_lowest_temp_diff(compatible_primers_list): def get_pair_with_lowest_temp_diff(compatible_primers_list):
""" """
find the pair of primers with the least difference in temperature find the pair of primers with the least difference in temperature
use list of inter compatible primers
""" """
length_of_compatibilities = [len(k) for k in compatible_primers_list] length_of_compatibilities = [len(k) for k in compatible_primers_list]
# get largest group of intercompatible primers
largest_compatibility_dict = compatible_primers_list[length_of_compatibilities.index(max(length_of_compatibilities))] largest_compatibility_dict = compatible_primers_list[length_of_compatibilities.index(max(length_of_compatibilities))]
temp_dict = {} # key: primer_name; value: primer temperature
temp_dict = {} # extract the temperature from the primers name
# get the temp per primers
for primer_name in largest_compatibility_dict.keys(): for primer_name in largest_compatibility_dict.keys():
temp_dict[primer_name] = primer_name.split(" Tm: ")[1].split(" ")[0] temp_dict[primer_name] = primer_name.split(" Tm: ")[1].split(" ")[0]
lowest_temp_diff = 1000 lowest_temp_diff = 1000
best_primer_couple = "", "" best_primer_couple = "", ""
# sort primers per temperature (ascending)
sorted_items = sorted(temp_dict.items(), key=lambda x: x[1]) sorted_items = sorted(temp_dict.items(), key=lambda x: x[1])
# find the couple of primers with the least difference in temperature
for i in range(len(sorted_items)-1): for i in range(len(sorted_items)-1):
primer_a, temp_a = sorted_items[i] primer_a, temp_a = sorted_items[i]
primer_b, temp_b = sorted_items[i+1] primer_b, temp_b = sorted_items[i+1]
...@@ -152,20 +154,20 @@ def get_pair_with_lowest_temp_diff(compatible_primers_list): ...@@ -152,20 +154,20 @@ def get_pair_with_lowest_temp_diff(compatible_primers_list):
def get_pair_with_closest_temp(compatible_primers_list, other_primers_dict): def get_pair_with_closest_temp(compatible_primers_list, other_primers_dict):
""" """
find the pair of primers with the least difference in temperature compared to the other primers pair find the pair of primers with the least difference in temperature compared to the other primers
other_primers_dict must only contain 2 primers
""" """
# get the medium temp of the other primers pair to aim # get the medium temp of the other primers to aim
medium_temp = 0.5*sum((float(primer_name.split(" Tm: ")[1].split(" ")[0]) for primer_name in other_primers_dict.keys())) medium_temp = sum((float(primer_name.split(" Tm: ")[1].split(" ")[0]) for primer_name in other_primers_dict.keys())) / len(other_primers_dict.keys())
lowest_temp_diff = 1000 lowest_temp_diff = 1000
best_primer_couple = "", "" best_primer_couple = "", ""
# compare the distance of any pair temperature to the medium temperature, for evey intercompatible primers group
for compatibility_dict in compatible_primers_list: for compatibility_dict in compatible_primers_list:
temp_dict = {} temp_dict = {} # key: primer_name; value: primer temperature
# get the temp per primers # extract the temperature from the primers name
for primer_name in compatibility_dict.keys(): for primer_name in compatibility_dict.keys():
temp_dict[primer_name] = primer_name.split(" Tm: ")[1].split(" ")[0] temp_dict[primer_name] = primer_name.split(" Tm: ")[1].split(" ")[0]
...@@ -173,59 +175,58 @@ def get_pair_with_closest_temp(compatible_primers_list, other_primers_dict): ...@@ -173,59 +175,58 @@ def get_pair_with_closest_temp(compatible_primers_list, other_primers_dict):
for i in range(len(sorted_items)-1): for i in range(len(sorted_items)-1):
primer_a, temp_a = sorted_items[i] primer_a, temp_a = sorted_items[i]
primer_b, temp_b = sorted_items[i+1] primer_b, temp_b = sorted_items[i+1]
# get absolute value of difference between medium pair temp and best difference found until now # get difference with the medium
#pair_temp_diff = abs(medium_temp - (0.5 * (float(temp_a) + float(temp_b))))
pair_temp_diff = abs(medium_temp-float(temp_a)) + abs(medium_temp-float(temp_b)) pair_temp_diff = abs(medium_temp-float(temp_a)) + abs(medium_temp-float(temp_b))
if pair_temp_diff < lowest_temp_diff: if pair_temp_diff < lowest_temp_diff:
best_primer_couple = primer_a, primer_b best_primer_couple = primer_a, primer_b
lowest_temp_diff = pair_temp_diff lowest_temp_diff = pair_temp_diff
print(medium_temp, pair_temp_diff)
return best_primer_couple return best_primer_couple
def get_best_compatible_pair(source_primers_dict = None, other_primers_dict = None) -> list: def get_best_compatible_pair(source_primers_dict = None, other_primers_dict = None) -> list:
""" """
select a list of primers (2 primers for each assembly) select a list of primers (2 primers for each assembly)
primers needs to not hybridize with each other output primers needs to not hybridize with each other
can add a dict of other primers that the result has to be compatible with
""" """
primer_generator_dir = currentdir+"/dna-storage-primer-tools" primer_generator_dir = currentdir+"/dna-storage-primer-tools"
if source_primers_dict is None: if source_primers_dict is None:
source_primers_dict = dfr.read_fasta(primer_generator_dir+"/data/temp/compatible_primers.fasta") source_primers_dict = dfr.read_fasta(primer_generator_dir+"/data/temp/compatible_primers.fasta") # default input path
if other_primers_dict is None: if other_primers_dict is None:
other_primers_dict = {} other_primers_dict = {}
compatible_primers_list = [] compatible_primers_list = [] # list of intercompatible primers groups found
primers_keys = list(source_primers_dict.keys()) primers_keys = [] # list of primers names that do not hybridize with any of the other primers
for primer_name, primer_seq in source_primers_dict.items():
if test_hybridation_and_GC(primer_seq, list(other_primers_dict.values()), 4):
primers_keys.append(primer_name)
# get all group of primers with no inter hybridization
for index_primer, primer_name in enumerate(primers_keys): for index_primer, primer_name in enumerate(primers_keys):
primer_sequence = source_primers_dict[primer_name] primer_sequence = source_primers_dict[primer_name]
# the primer must not hybridise with any previous selected primers compatible_primers_dict = {primer_name: primer_sequence} # init a group of inter compatible primers
if test_hybridation_and_GC(primer_sequence, list(other_primers_dict.values()), 4):
for other_primer_name in primers_keys[index_primer+1:]: # search a corresponding stop primer in the rest of list
other_primer_sequence = source_primers_dict[other_primer_name]
compatible_primers_dict = {primer_name: primer_sequence} # the second primer needs the same %GC, and no hybridization with the group
if test_hybridation_and_GC(other_primer_sequence, list(compatible_primers_dict.values())):
compatible_primers_dict[other_primer_name] = other_primer_sequence
for other_primer_name in primers_keys[index_primer+1:]: # search a corresponding stop primer in the rest of list if len(compatible_primers_dict) >= 2: # keep groups of at least 2 primers
other_primer_sequence = source_primers_dict[other_primer_name] compatible_primers_list.append(compatible_primers_dict)
# the 2 primers needs the same %GC, and a lower risk of hybridation
if test_hybridation_and_GC(other_primer_sequence, list(compatible_primers_dict.values())):
# the primer must not hybridise with any previous selected primers
if test_hybridation_and_GC(other_primer_sequence, list(other_primers_dict.values()), 4):
compatible_primers_dict[other_primer_name] = other_primer_sequence
if len(compatible_primers_dict) >= 2:
compatible_primers_list.append(compatible_primers_dict)
if other_primers_dict == {}: if other_primers_dict == {}:
# get the pair with the closest temperature # get the pair with the closest temperature
best_primer_couple = get_pair_with_lowest_temp_diff(compatible_primers_list) best_primer_couple = get_pair_with_lowest_temp_diff(compatible_primers_list)
else: else:
# get the pair with the cosest temperature to the other selected primers # get the pair with the closest temperature to the other selected primers
best_primer_couple = get_pair_with_closest_temp(compatible_primers_list, other_primers_dict) best_primer_couple = get_pair_with_closest_temp(compatible_primers_list, other_primers_dict)
selected_primers_dict = {best_primer_couple[0]:source_primers_dict[best_primer_couple[0]], best_primer_couple[1]:source_primers_dict[best_primer_couple[1]]} selected_primers_dict = {best_primer_couple[0]:source_primers_dict[best_primer_couple[0]], best_primer_couple[1]:source_primers_dict[best_primer_couple[1]]}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment