Mentions légales du service

Skip to content
Snippets Groups Projects
Commit c631c4c5 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

apply to all sequences in the input fasta

parent f1e0e3c4
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ import file_to_dna as ftd # get the biological constants for block size, etc...
def removes_non_payload_stuff(input_path: str, output_file: str) -> str:
def removes_non_payload_stuff(consensus_sequence: str) -> str:
""""
remove the non payload parts added for the block assembly
......@@ -27,7 +27,6 @@ def removes_non_payload_stuff(input_path: str, output_file: str) -> str:
n can only be from 1 to 10
X is integer between min_total_size and max_total_size, and the max possible with a minimal number of blocks
"""
consensus_name, consensus_sequence = dfr.read_single_sequence_fasta(input_path)
assembly_size = len(consensus_sequence)
......@@ -48,9 +47,9 @@ def removes_non_payload_stuff(input_path: str, output_file: str) -> str:
# no possible valid couple (block number, block size) for this assembly : invalid consensus
if block_number == 0:
print("warning no possible corresponding assembly for the consensus",consensus_name)
#print("warning no possible corresponding assembly for the consensus")
# TODO use another possible consensus for this file
return
return ""
# extract the payload from the assembly
payload_sequence = ""
......@@ -75,20 +74,29 @@ def removes_non_payload_stuff(input_path: str, output_file: str) -> str:
payload_sequence += consensus_sequence[sequence_index: sequence_index + block_extremity_payload_size]
dfr.save_sequence_to_fasta(consensus_name+"_payload", payload_sequence, output_file)
return payload_sequence
def extract_payload_container(input_dir_path: str, output_dir_path: str) -> None:
"""
remove overhangs and primers from the consensus sequence files and save the resulting payload
apply to all sequences contained in the fasta files
"""
for filename in os.listdir(input_dir_path):
file_path = os.path.join(input_dir_path, filename)
output_file = os.path.join(output_dir_path, filename)
result_sequence = removes_non_payload_stuff(file_path, output_file)
sequences_dict = dfr.read_fasta(file_path)
payload_dict = {}
for sequence_name, sequence in sequences_dict.items():
payload = removes_non_payload_stuff(sequence)
if payload != "": # ignore failed extractions
payload_dict[sequence_name+"_payload"] = payload
dfr.save_dict_to_fasta(payload_dict, output_file)
if __name__ == "__main__":
......@@ -104,6 +112,7 @@ if __name__ == "__main__":
print("payload extraction...")
#removes_non_payload_stuff(arg.input_dir_path, arg.output_dir_path)
extract_payload_container(arg.input_dir_path, arg.output_dir_path)
print("\tcompleted !")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment