diff --git a/primers_generation.py b/primers_generation.py index ce90ca0108f3a1d5f909bd0cd9a45fa040aa2962..49093cf945c2b31c0cfd21ae826209a8b52b4bc4 100644 --- a/primers_generation.py +++ b/primers_generation.py @@ -28,7 +28,7 @@ def generate_compatible_primers(sequence_between: str) -> list: primer_generator_dir = currentdir+"/primer_generator" primers_gen_script = primer_generator_dir+"/bin/primers_gen" # path to the script - random_sequence_path = primer_generator_dir+"/data/random_seq_100k.fasta" # some very long random sequences to create the primers from, this sequence respects the biological constraints + random_sequence_path = primer_generator_dir+"/data/random_seq_1M.fasta" # some very long random sequences to create the primers from, this sequence respects the biological constraints primers_param_path = primer_generator_dir+"/data/param_dnarxiv.txt" # file containing the parameters for the primer_generator scripts output_primers_path = primer_generator_dir+"/temp/primers.fasta" # output path for generated primers @@ -53,7 +53,8 @@ def generate_compatible_primers(sequence_between: str) -> list: filter_primers_command = primers_filter_script+' '+sequence_between_path+' '+output_primers_path+' '+primers_param_path \ +' '+output_compatible_primers_path+' '+output_hyb+' '+output_dic+' '+output_pos subprocess.call('/bin/bash -c "$PRIMERFILTER"', shell=True, env={'PRIMERFILTER': filter_primers_command}) - + print() # needed cause the printer go at the start of the last printed line + primers_list = list(dfr.read_fasta(output_compatible_primers_path).values()) checked_primers_list = [primer for primer in primers_list if sequence_control.sequence_check(primer)] # check for potential constraints in primers @@ -68,6 +69,35 @@ def generate_compatible_primers(sequence_between: str) -> list: return checked_primers_list +def test_hybridation(primer: str, selected_primers_list: list, max_hybridisation_value=4) -> bool: + """ + test the hybridisation of the primer with a list of other primers + """ + primer_rc = dfr.reverse_complement(primer) # get the reverse complement of the primer + + for selected_primer in selected_primers_list: + + for i in range(len(primer)-max_hybridisation_value): + # test if any part of the primer is contained in another primer from the list (= hybridisation) + if primer[i:i+max_hybridisation_value+1] in selected_primer: + return False + # test also for the reverse complement + if primer_rc[i:i+max_hybridisation_value+1] in selected_primer: + return False + # return true if no hybridisation at all + return True + + +def compare_GC(primer_A: str, primer_B: str) -> bool: + """ + primers have a special temperature in PCR depending on %GC, a couple (start;stop) needs to have a close tmp, so the same %GC is better + return true if the %GC is the same for the 2 primers + """ + start_GC = (primer_A.count("C") + primer_A.count("G"))/(primer_A.count("A")+primer_A.count("T")) # get the %GC of the first primer + stop_GC = (primer_B.count("C") + primer_B.count("G"))/(primer_B.count("A")+primer_B.count("T")) # get the %GC of the second primer + return start_GC == stop_GC + + def select_primers(primers_list: list, number_assembly: int) -> list: """ select a list of primers (2 primers for each assembly) @@ -75,36 +105,6 @@ def select_primers(primers_list: list, number_assembly: int) -> list: """ selected_primers_list = [] - - def test_hybridation(primer: str, selected_primers_list: list, max_hybridisation_value=4) -> bool: - """ - test the hybridisation of the primer with a list of other primers - """ - primer_rc = dfr.reverse_complement(primer) # get the reverse complement of the primer - - for selected_primer in selected_primers_list: - - for i in range(len(primer)-max_hybridisation_value): - # test if any part of the primer is contained in another primer from the list (= hybridisation) - if primer[i:i+max_hybridisation_value+1] in selected_primer: - return False - # test also for the reverse complement - if primer_rc[i:i+max_hybridisation_value+1] in selected_primer: - return False - # return true if no hybridisation at all - return True - - - def compare_GC(primer_A: str, primer_B: str) -> bool: - """ - primers have a special temperature in PCR depending on %GC, a couple (start;stop) needs to have a close tmp, so the same %GC is better - return true if the %GC is the same for the 2 primers - """ - start_GC = (primer_A.count("C") + primer_A.count("G"))/(primer_A.count("A")+primer_A.count("T")) # get the %GC of the first primer - stop_GC = (primer_B.count("C") + primer_B.count("G"))/(primer_B.count("A")+primer_B.count("T")) # get the %GC of the second primer - return start_GC == stop_GC - - for index_primer, potential_start_primer in enumerate(primers_list): # the primer must not hybridise with any previous selected primers if test_hybridation(potential_start_primer, selected_primers_list, 4):