Mentions légales du service

Skip to content
Snippets Groups Projects
Commit a06df5ca authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

add overhangs at extremities

parent c82e2092
Branches
No related tags found
No related merge requests found
......@@ -17,6 +17,9 @@ bsaI_size = 7
def remove_buffer(eBlocks_dict: dict) -> dict:
"""
buffer at the beginning and end of each blocks
"""
result_dict = {}
for key, sequence in eBlocks_dict.items():
......@@ -26,14 +29,14 @@ def remove_buffer(eBlocks_dict: dict) -> dict:
def digest_bsaI(eBlocks_dict: dict) -> dict:
"""
bsaI next to any overhangs, beginning and end of each blocks
"""
result_dict = {}
bsaI_block_list = list(eBlocks_dict.values())
bsaI_block_list[0] = bsaI_block_list[0][:-bsaI_size]
bsaI_block_list[-1] = bsaI_block_list[-1][bsaI_size:]
for i in range(1, len(bsaI_block_list)-1):
for i in range(len(bsaI_block_list)):
bsaI_block_list[i] = bsaI_block_list[i][bsaI_size:-bsaI_size]
for i, key in enumerate(eBlocks_dict.keys()):
......@@ -55,6 +58,9 @@ def assemble_blocks(eBlocks_dict: dict) -> str:
print("error assembly simulation, overhangs not compatibles",result_string[-overhang_size:],sequence[:overhang_size])
exit(1)
# remove overhangs before start primer and after stop primers in extremities blocks
result_string = result_string[overhang_size:-overhang_size]
return result_string
......@@ -86,7 +92,7 @@ def container_assembly(input_dir_path, output_dir_path, nbr_mol):
for filename in os.listdir(input_dir_path):
file_path = os.path.join(input_dir_path, filename)
if os.path.isdir(file_path):
input_file = file_path+"/4_blocks_buffer.fasta"
input_file = file_path+"/5_blocks_buffer.fasta"
output_file = os.path.join(output_dir_path, filename+".fasta")
eBlocks_assembly(input_file, filename, output_file, nbr_mol)
......
......@@ -33,7 +33,7 @@ OVERHANG_LENGTH = 4 # CONSTANT for length of overhangs
def sac_a_dos(eBlock_dicts_dict: dict, overhang_list: list):
"""
dict {filename:dict(eBlocks)}
dict {filename:dict(eBlocks)} #TODO
"""
sizes_dicts = {}
......@@ -55,7 +55,7 @@ def sac_a_dos(eBlock_dicts_dict: dict, overhang_list: list):
sacs_list.append([filename])
def add_overhangs_blocks(eBlock_dict: dict, overhang_list: list, output_path: str) -> (dict, str):
def add_intern_overhangs(block_dict: dict, overhang_list: list, output_path: str = None) -> (dict, str):
"""
add predefined overhangs at each blocks to make the intra-Block joints
B1 + ov1
......@@ -65,11 +65,11 @@ def add_overhangs_blocks(eBlock_dict: dict, overhang_list: list, output_path: st
ov9 + B10
"""
block_names = list(eBlock_dict.keys())
block_names = list(block_dict.keys())
if len(block_names) == 1: # no overhang addition for single blocks
dfr.save_dict_to_fasta(eBlock_dict, output_path)
return eBlock_dict, list(eBlock_dict.values())[0]
dfr.save_dict_to_fasta(block_dict, output_path)
return block_dict, list(block_dict.values())[0]
used_overhangs = overhang_list[:len(block_names)-1] # overhangs that will be used for this assembly
......@@ -78,27 +78,28 @@ def add_overhangs_blocks(eBlock_dict: dict, overhang_list: list, output_path: st
# first eBlock of a bigBlock : overhang only at the end
b_name = block_names[0]
overhang_block_dict[b_name] = eBlock_dict[b_name] + used_overhangs[0]
blocks_assembly_sequence += eBlock_dict[b_name] + used_overhangs[0]
overhang_block_dict[b_name] = block_dict[b_name] + used_overhangs[0]
blocks_assembly_sequence += block_dict[b_name] + used_overhangs[0]
# other eBlocks : overhang at the beginning + at the end
# the overhang at the end of eBlock N is the same as the overhang at the beginning of eBlock N+1
for num_block in range(1, len(block_names)-1):
b_name = block_names[num_block]
overhang_block_dict[b_name] = used_overhangs[num_block-1] + eBlock_dict[b_name] + used_overhangs[num_block]
blocks_assembly_sequence += eBlock_dict[b_name] + used_overhangs[num_block] # do not add the overhang at the start of the bock since it is already at the end of previous block
overhang_block_dict[b_name] = used_overhangs[num_block-1] + block_dict[b_name] + used_overhangs[num_block]
blocks_assembly_sequence += block_dict[b_name] + used_overhangs[num_block] # do not add the overhang at the start of the bock since it is already at the end of previous block
# last fragment : overhang only at the start
b_name = block_names[-1]
overhang_block_dict[b_name] = used_overhangs[-1] + eBlock_dict[b_name]
blocks_assembly_sequence += eBlock_dict[b_name]
overhang_block_dict[b_name] = used_overhangs[-1] + block_dict[b_name]
blocks_assembly_sequence += block_dict[b_name]
dfr.save_dict_to_fasta(overhang_block_dict, output_path)
if output_path:
dfr.save_dict_to_fasta(overhang_block_dict, output_path)
return overhang_block_dict, blocks_assembly_sequence
def add_primers(overhang_block_dict: dict, start_primer: str, stop_primer: str, output_path: str) -> dict:
def add_primers(block_dict: dict, start_primer: str, stop_primer: str, output_path: str = None) -> dict:
"""
add the primers to the first and last eBlocks
p1 + B1
......@@ -106,50 +107,76 @@ def add_primers(overhang_block_dict: dict, start_primer: str, stop_primer: str,
"""
primers_block_dict = {}
primer_block_list = list(overhang_block_dict.values())
primer_block_list = list(block_dict.values())
primer_block_list[0] = start_primer + primer_block_list[0] # start primer at the beginning
primer_block_list[-1] = primer_block_list[-1] + dfr.reverse_complement(stop_primer) # reverse complement of stop primer at the end
for i, key in enumerate(overhang_block_dict.keys()):
for i, key in enumerate(block_dict.keys()):
primers_block_dict[key] = primer_block_list[i]
dfr.save_dict_to_fasta(primers_block_dict, output_path)
if output_path:
dfr.save_dict_to_fasta(primers_block_dict, output_path)
return primers_block_dict
def add_bsaI(primers_block_dict: dict, output_path: str) -> dict:
def add_extern_overhangs(block_dict: dict, overhang_list: list, output_path: str = None) -> dict:
"""
add overhangs at the extremity blocks
used to create single strand part to avoid random assembly of the molecules together
"""
#TODO choose overhangs depending on the other molecules
first_overhang, last_overhang = overhang_list[0], overhang_list[1]
block_names = list(block_dict.keys())
overhang_block_dict = block_dict.copy()
# overhang at the beginning of first block
b_name = block_names[0]
overhang_block_dict[b_name] = first_overhang + block_dict[b_name]
# overhang at the end of last block
b_name = block_names[-1]
overhang_block_dict[b_name] = block_dict[b_name] + last_overhang
if output_path:
dfr.save_dict_to_fasta(overhang_block_dict, output_path)
return overhang_block_dict
def add_bsaI(block_dict: dict, output_path: str = None) -> dict:
"""
add bsaI restriction sites next to every overhangs, used in biology part to remove dna in order to make the overhangs part a single strand
B1 + bsaI2
BsaI1 + B1 + bsaI2
BsaI1 + B2 + BsaI2
...
bsaI1 + B10
bsaI1 + B10 + BsaI1
"""
start_bsaI = "GGTCTCG" # bsaI site + 1 base # goes before an overhang
stop_bsaI = "TGAGACC" # 1 base + bsaI site # goes after an overhang
bsaI_block_dict = {}
bsaI_block_list = list(primers_block_dict.values())
bsaI_block_list = list(block_dict.values())
bsaI_block_list[0] = bsaI_block_list[0] + stop_bsaI
bsaI_block_list[-1] = start_bsaI + bsaI_block_list[-1]
for i in range(1, len(bsaI_block_list)-1):
for i in range(len(bsaI_block_list)):
bsaI_block_list[i] = start_bsaI + bsaI_block_list[i] + stop_bsaI
for i, key in enumerate(primers_block_dict.keys()):
for i, key in enumerate(block_dict.keys()):
bsaI_block_dict[key] = bsaI_block_list[i]
dfr.save_dict_to_fasta(bsaI_block_dict, output_path)
if output_path:
dfr.save_dict_to_fasta(bsaI_block_dict, output_path)
return bsaI_block_dict
def add_buffer(bsaI_block_dict: str, output_path) -> dict:
def add_buffer(block_dict: str, output_path: str = None) -> dict:
"""
add a buffering zone around each block
just pick some payload of an other block, the buffer will be removed in biology
......@@ -159,22 +186,23 @@ def add_buffer(bsaI_block_dict: str, output_path) -> dict:
"""
buffer_size = 15
block_list = list(bsaI_block_dict.values()) # list of blocks where the buffer is taken, usually take a buffer in the N+1 block to use in the N block
block_list = list(block_dict.values()) # list of blocks where the buffer is taken, usually take a buffer in the N+1 block to use in the N block
block_list.append(block_list[0]) # add the first block at the end so the last buffer can use it
buffer_block_dict = {}
# choose some payload in the next fragment
for i, frag_name in enumerate(list(bsaI_block_dict.keys())):
for i, frag_name in enumerate(list(block_dict.keys())):
start_buffer = block_list[i+1][100:100+buffer_size]
stop_buffer = block_list[i+1][150:150+buffer_size]
buffered_block = start_buffer + bsaI_block_dict[frag_name] + stop_buffer
buffered_block = start_buffer + block_dict[frag_name] + stop_buffer
#TODO test for no inverse repeat region between buffer and payload
buffer_block_dict[frag_name] = buffered_block
dfr.save_dict_to_fasta(buffer_block_dict, output_path)
if output_path:
dfr.save_dict_to_fasta(buffer_block_dict, output_path)
return buffer_block_dict
......@@ -209,7 +237,7 @@ def eBlocks_design(payload_fragments_dir_path: str, output_dir_path: str) -> Non
eBlock_dict = dfr.read_fasta(file_path)
# add the overhangs for each eBlock and save it into a file
overhang_block_dict, blocks_assembly_sequence = add_overhangs_blocks(eBlock_dict, overhang_list, output_subdir_path+"/1_blocks_overhang.fasta")
_, blocks_assembly_sequence = add_intern_overhangs(eBlock_dict, overhang_list, output_subdir_path+"/1_blocks_overhang.fasta")
# create a sequence containing the concatenation of all eBlocks with overhangs
full_assembly_total_sequences.append(blocks_assembly_sequence)
......@@ -258,11 +286,13 @@ def eBlocks_design(payload_fragments_dir_path: str, output_dir_path: str) -> Non
primers_block_dict = add_primers(overhang_block_dict, start_primer, stop_primer, output_subdir_path+"/2_blocks_overhang_primers.fasta")
extern_overhang_blocks = add_extern_overhangs(primers_block_dict, overhang_list, output_subdir_path+"/3_blocks_overhang_primers_overhangs.fasta")
# add the bsaI sites to each eBlock
bsaI_block_dict = add_bsaI(primers_block_dict, output_subdir_path+"/3_blocks_bsaI.fasta")
bsaI_block_dict = add_bsaI(extern_overhang_blocks, output_subdir_path+"/4_blocks_bsaI.fasta")
# add the buffers to each eBlock
buffer_block_dict = add_buffer(bsaI_block_dict, output_subdir_path+"/4_blocks_buffer.fasta")
buffer_block_dict = add_buffer(bsaI_block_dict, output_subdir_path+"/5_blocks_buffer.fasta")
dfr.save_dict_to_fasta(assigned_primers_dict, output_dir_path+"/assigned_primers.fasta") # save the selected primers
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment