improved buffer selection

de50d39c · BOULLE Olivier · 7b570ad0 · de50d39c
Commit de50d39c authored 2 years ago by BOULLE Olivier
--- a/eBlocks_design.py
+++ b/eBlocks_design.py
@@ -164,11 +164,8 @@ def add_bsaI(block_dict: dict, output_path: str = None) -> dict:
    bsaI_block_dict = {}
    bsaI_block_list = list(block_dict.values())
-    for i in range(len(bsaI_block_list)):
+    for key, value in block_dict.items():
-        bsaI_block_list[i] = start_bsaI + bsaI_block_list[i] + stop_bsaI
+        bsaI_block_dict[key] = start_bsaI + value + stop_bsaI
-    for i, key in enumerate(block_dict.keys()):
-        bsaI_block_dict[key] = bsaI_block_list[i]
    if output_path:
        dfr.save_dict_to_fasta(bsaI_block_dict, output_path)
@@ -194,10 +191,24 @@ def add_buffer(block_dict: str, output_path: str = None) -> dict:
    # choose some payload in the next fragment
    for i, frag_name in enumerate(list(block_dict.keys())):
-        start_buffer = block_list[i+1][100:100+buffer_size]
+        mid_block_index = len(block_list[i+1])//2
-        stop_buffer = block_list[i+1][150:150+buffer_size]
+        start_buffer = block_list[i+1][mid_block_index-buffer_size:mid_block_index-1]
+        stop_buffer = block_list[i+1][mid_block_index:mid_block_index+buffer_size-1]
+        #the addition of the buffer can create homopolymere
+        #set a base different from extremity and start of buffer in between
+        alphabet = [ a for a in ["A", "G", "C", "T"] if a not in [block_dict[frag_name][0], start_buffer[-1]] ]
+        start_buffer += alphabet[0]
+        alphabet = [ a for a in ["A", "G", "C", "T"] if a not in [block_dict[frag_name][-1], stop_buffer[0]] ]
+        stop_buffer = alphabet[0] + stop_buffer
        buffered_block = start_buffer + block_dict[frag_name] + stop_buffer
        #TODO test for no inverse repeat region between buffer and payload
        buffer_block_dict[frag_name] = buffered_block
@@ -219,6 +230,7 @@ def eBlocks_design(payload_fragments_dir_path: str, output_dir_path: str) -> Non
    overhang_list = list(dfr.read_fasta(os.path.abspath(os.path.dirname(__file__))+"/overhangs_eBlocks.fasta").values())
    full_assembly_total_sequences = [] # save the total sequences of each assembly
+    total_blocks_number = 0 # number of blocks in the directory
    # loop over payload sequences files
    for filename in os.listdir(payload_fragments_dir_path):
@@ -235,6 +247,7 @@ def eBlocks_design(payload_fragments_dir_path: str, output_dir_path: str) -> Non
            # read the sequences
            eBlock_dict = dfr.read_fasta(file_path)
+            total_blocks_number += len(eBlock_dict.values())
            # add the overhangs for each eBlock and save it into a file
            _, blocks_assembly_sequence = add_intern_overhangs(eBlock_dict, overhang_list, output_subdir_path+"/1_blocks_overhang.fasta")
@@ -249,6 +262,7 @@ def eBlocks_design(payload_fragments_dir_path: str, output_dir_path: str) -> Non
    all_total_sequences_concat = "".join(full_assembly_total_sequences)
    print("total sequences len :",str(len(all_total_sequences_concat)))
+    print("in",str(total_blocks_number),"blocks")
    # generate primers compatible with the full joined sequence
    compatible_primers_list = pg.generate_compatible_primers(all_total_sequences_concat)