Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 5a217f07 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

function for forbidden rests

parent cb2c89ef
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,13 @@ def bit_to_dna_balance_GC(bit_char: str, reference_base: str):
return bit_to_dna_AT[bit_char]
def forbidden_rest_abaab(binary_string_size: str) -> bool:
"""
return True if the binary is of a length that cannot be converted by the abaab method
"""
return binary_string_size % 8 in [1, 4, 6]
def binary_to_dna_abaab(binary_string: str) -> str:
"""
convert binaries into dna sequence with some properties
......@@ -49,7 +56,7 @@ def binary_to_dna_abaab(binary_string: str) -> str:
#TODO warning : if ending with bsaI -> end can currently be aa when a rest of 4 bits -> will break with banword removal
"""
if len(binary_string) % 8 in [1, 4, 6]:
if forbidden_rest_abaab(len(binary_string)):
print("error binary dna conversion, need a binary string multiple of 8, or with a rest of 2, 3, 7 ("+str(len(binary_string) % 8)+")")
exit(0)
......@@ -231,6 +238,13 @@ def size_binary_from_dna_len_abaab(dna_length):
return base_length + 7
def forbidden_rest_baa(binary_string_size: str) -> bool:
"""
return True if the binary is of a length that cannot be converted by the baa method
"""
return binary_string_size % 5 in [2, 4]
def binary_to_dna_baa(binary_string: str, GC_window=20) -> str:
"""
convert binaries into dna sequence with some properties
......@@ -249,9 +263,9 @@ def binary_to_dna_baa(binary_string: str, GC_window=20) -> str:
only allowing rests multiple of 5 removes ambiguity (also possible 0, 1 or 3 modulo 5)
"""
if len(binary_string) % 5 not in [0, 1, 3]:
print("error binary dna conversion, need a binary string multiple of 5, or with a rest of 1, 3")
return
if forbidden_rest_baa(len(binary_string)):
print("error binary dna conversion, need a binary string multiple of 5, or with a rest of 1, 3 ("+str(len(binary_string) % 5)+")")
return
sequence = ""
n_quintuplets, rest = divmod(len(binary_string), 5)
......@@ -339,6 +353,7 @@ def dna_to_binary_baa(sequence: str) -> str:
return binary_string
def remove_ban_words_baa_encoding(sequence: str, baa_method_offset=0) -> str:
"""
remove banned words from a sequence encoded with the binary_conversion.binary_to_dna_baa() method
......
......@@ -112,11 +112,12 @@ def encode_file(input_path: str, output_path: str) -> None:
objective is to fit the payload in the lowest number of blocks, but all blocks must be the same size after addition of non coding stuff (primer/buffer/overhang/bsaI)
"""
#TODO
#TODO
binary_string = convert_file_to_bits(input_path) # get the binary string representing the file content
#binary_string = input_path #TODO REMOVE
if (len(binary_string) + CHECK_SUM_SIZE) % 8 in [1, 4, 6]: # add a non coding 0 when size of incompatible length for dna conversion
if bdc.forbidden_rest_abaab(len(binary_string) + CHECK_SUM_SIZE):
# add a non coding 0 when size of incompatible length for dna conversion
binary_string = "0" + binary_string
dna_payload_size = bdc.size_dna_from_bit_len_abaab(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna
......@@ -155,8 +156,9 @@ def encode_file(input_path: str, output_path: str) -> None:
#print("updated binary size", str(len(binary_string)))
# apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions
binary_string = binary_string[::-1] # reverse the binary string, because 2 files can have the same start with ziping methods
# apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions
filtered_binary_string = apply_binary_filter(binary_string)
......@@ -166,9 +168,7 @@ def encode_file(input_path: str, output_path: str) -> None:
# convert binaries into dna sequence
sequence = bdc.binary_to_dna_abaab(filtered_binary_string)
#if add_base_at_end: sequence += bdc.bit_to_dna_balance_GC("0", sequence[-1]) # add a non coding base different from the preceding one
total_sequence_size = len(sequence)+ dna_for_assembly_size
# test for errors that should never occur (I hope ...)
......@@ -217,8 +217,10 @@ def decode_file(input_path: str, output_path: str) -> None:
input : fasta format file with the payload sequences extracted from the block assembly
"""
#TODO
sub_sequences_dict = dfr.read_fasta(input_path)
#TODO
sequence = "".join(sub_sequences_dict.values())
#sequence = input_path #TODO REMOVE
......@@ -268,7 +270,6 @@ if __name__ == '__main__':
#binary_string = sys.argv[1]
#seq = encode_file(doc_path)
#print(get_max_binary_len())
#exit(0)
for i in range(400,10000, 8):
......@@ -281,13 +282,13 @@ if __name__ == '__main__':
binary_result = decode_file(seq, "")
if binary != binary_result:
#print(binary)
#print(seq)
#print(binary_result)
print(binary)
print(seq)
print(binary_result)
exit(0)
#encode_file("", "test")
#seq = binary_to_dna_abaab(binary_string)
#seq = binary_to_dna_baa(binary_string)
#print(seq)
#print(dna_to_binary_abaab(seq))
#print(dna_to_binary_baa(seq))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment