Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 5bc5b5e3 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

new random assembly simulation

parent 4deca44d
Branches
No related tags found
No related merge requests found
......@@ -2,6 +2,8 @@
import os
import argparse
import random
import dna_file_reader as dfr
......@@ -45,25 +47,67 @@ def digest_bsaI(eBlocks_dict: dict) -> dict:
return result_dict
def assemble_blocks(eBlocks_dict: dict) -> str:
def assemble_blocks(assembly_name: str, eBlocks_dict: dict) -> dict:
"""
just a single perfect assembly of blocks
"""
block_sequences = list(eBlocks_dict.values())
result_string = block_sequences[0]
assembly_dict = {assembly_name : block_sequences[0]}
for sequence in block_sequences[1:]:
if sequence[:overhang_size] == result_string[-overhang_size:]:
result_string += sequence[overhang_size:]
if sequence[:overhang_size] == assembly_dict[assembly_name][-overhang_size:]:
assembly_dict[assembly_name] += sequence[overhang_size:]
else:
print("error assembly simulation, overhangs not compatibles",result_string[-overhang_size:],sequence[:overhang_size])
print("error assembly simulation, overhangs not compatibles",assembly_dict[assembly_name][-overhang_size:],sequence[:overhang_size])
exit(1)
# remove overhangs before start primer and after stop primers in extremities blocks
result_string = result_string[overhang_size:-overhang_size]
assembly_dict[assembly_name] = assembly_dict[assembly_name][overhang_size:-overhang_size]
return result_string
return assembly_dict
def assemble_blocks_simulation(eBlocks_dict: dict, nbr_mol: int) -> dict:
"""
use the statistics of overhangs assemblies to simulation the assembly of blocks
"""
overhang_assembly_stats_dict = dfr.read_fasta("file_test_assembly/overhangs_stats.txt")
blocks_list = list(eBlocks_dict.values())
assembly_dict = {}
i = 0
for assembly_try in range(nbr_mol): # an assembly try can result in multiple molecules
current_assembly = blocks_list[0] # start an assembly from the first block
for block in blocks_list[1:]:
last_overhang = current_assembly[-overhang_size:]
overhang_link_proba = float(overhang_assembly_stats_dict[last_overhang])
if random.random() <= overhang_link_proba: # chance to succeed overhang linking
print("link success")
current_assembly += block[overhang_size:] # link the block to the current assembly
else:
print("link fail")
assembly_dict[i] = current_assembly # the current assembly is not linked to the block, so its is completed
i += 1
current_assembly = block[overhang_size:] # begin a new assembly starting from this block
# add the assembly to the dict
assembly_dict[i] = current_assembly
i += 1
for value in assembly_dict.values():
print(value)
return eBlocks_dict
def eBlocks_assembly(input_path: str, assembly_name, output_path: str, nbr_mol: int) -> None:
"""
......@@ -81,12 +125,13 @@ def eBlocks_assembly(input_path: str, assembly_name, output_path: str, nbr_mol:
# simulate bsaI digestion
eBlocks_dict = digest_bsaI(eBlocks_dict)
# simulation assembly by overhangs, fatal error if overhangs are not compatibles for successive blocks
assembly_sequence = assemble_blocks(eBlocks_dict)
#assembly_dict = assemble_blocks(assembly_name, eBlocks_dict)
assembly_dict = assemble_blocks_simulation(eBlocks_dict, nbr_mol)
# save the assemblies into file
dfr.save_sequence_to_fasta(assembly_name, assembly_sequence, output_path)
dfr.save_dict_to_fasta(assembly_dict, output_path)
def container_assembly(input_dir_path, output_dir_path, nbr_mol):
for filename in os.listdir(input_dir_path):
......@@ -112,7 +157,7 @@ if __name__ == '__main__':
# ---------- input list ---------------#
arg = parser.parse_args()
print("fragment assembly...")
print("fragment assembly simulation...")
container_assembly(arg.input_dir_path, arg.output_dir_path, arg.nbr_mol)
......
>block_1
buffer_________bsaI___ov0_primer_____________block_1_ov1_bsaI___buffer_________
>block_2
buffer_________bsaI___ov1_block_2_ov2_bsaI___buffer_________
>block_3
buffer_________bsaI___ov2_block_3_ov3_bsaI___buffer_________
>block_4
buffer_________bsaI___ov3_block_4_ov4_bsaI___buffer_________
>block_5
buffer_________bsaI___ov4_block_5_ov5_bsaI___buffer_________
>block_6
buffer_________bsaI___ov5_block_6_ov6_bsaI___buffer_________
>block_7
buffer_________bsaI___ov6_block_7_ov7_bsaI___buffer_________
>block_8
buffer_________bsaI___ov7_block_8_ov8_bsaI___buffer_________
>block_9
buffer_________bsaI___ov8_block_9_ov9_bsaI___buffer_________
>block_10
buffer_________bsaI___ov9_block_10primer_____________ov10bsaI___buffer_________
>ov1_
0.958
>ov2_
0.97
>ov3_
0.935
>ov4_
0.99
>ov5_
0.995
>ov6_
0.95
>ov7_
0.96
>ov8_
0.98
>ov9_
0.93
>ov10
0.992
>ov11
0.95
>ov12
0.95
>ov13
0.95
>ov14
0.95
>ov15
0.95
>ov16
0.95
>ov17
0.95
>ov18
0.95
>ov19
0.95
>ov20
0.95
>ov21
0.95
>ov22
0.95
>ov23
0.95
>ov24
0.95
>ov25
0.95
>ov26
0.95
>ov27
0.95
>ov28
0.95
>ov29
0.95
>ov30
0.95
>ov31
0.95
>ov32
0.95
>ov33
0.95
>ov34
0.95
>ov35
0.95
>ov36
0.95
>ov37
0.95
>ov38
0.95
>ov39
0.95
>ov40
0.95
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment