Mentions légales du service

Skip to content
Snippets Groups Projects
Commit f10225b0 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

switch to abaab, decode all sequences in input

parent 3bd22658
Branches
No related tags found
No related merge requests found
......@@ -28,8 +28,8 @@ bsaI_size = 7
extern_extremity_size = primer_size + overhang_size + bsaI_size + buffer_size # size of non-payload dna part at the extern extremities (start of first block and end of last block)
intern_extremity_size = overhang_size + bsaI_size + buffer_size # size of non-payload dna part at the extremities between blocks
max_total_block_size = 1000 # maximum allowed size for a complete block
min_total_block_size = 1000 # minimum allowed size for a complete block
max_total_block_size = 500 # maximum allowed size for a complete block
min_total_block_size = 500 # minimum allowed size for a complete block
n_block_max = 10 # maximum assembled number of blocks
......@@ -97,7 +97,7 @@ def get_max_binary_len() -> int:
max_payload_dna_len = max_total_dna_len - 2*extern_extremity_size - 2*(n_block_max-1)*intern_extremity_size # without the assembly stuff
max_binary_len = bdc.size_binary_from_dna_len_baa(max_payload_dna_len) - CHECK_SUM_SIZE # convert the remaining bases to binary and remove the checksum bits
max_binary_len = bdc.size_binary_from_dna_len_abaab(max_payload_dna_len) - CHECK_SUM_SIZE # convert the remaining bases to binary and remove the checksum bits
return math.floor(max_binary_len) # round down
......@@ -115,11 +115,11 @@ def encode_file(input_path: str, output_path: str) -> None:
binary_string = convert_file_to_bits(input_path) # get the binary string representing the file content
#binary_string = input_path #TODO REMOVE
if bdc.forbidden_rest_baa(len(binary_string) + CHECK_SUM_SIZE):
if bdc.forbidden_rest_abaab(len(binary_string) + CHECK_SUM_SIZE):
# add a non coding 0 when size of incompatible length for dna conversion
binary_string = "0" + binary_string
dna_payload_size = bdc.size_dna_from_bit_len_baa(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna
dna_payload_size = bdc.size_dna_from_bit_len_abaab(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna
#print("payload size",str(dna_payload_size))
# estimate number of blocks, round up to next int
......@@ -145,9 +145,9 @@ def encode_file(input_path: str, output_path: str) -> None:
# calculate the number of bases to add to the payload to get a round number of equal length blocks
dna_payload_needed_size = final_assembly_size - dna_for_assembly_size
#print(bdc.size_binary_from_dna_len_abaab(dna_payload_needed_size) )
#print(bdc.size_binary_from_dna_len_aabaabb(dna_payload_needed_size) )
# get number of bits to add
filler_length = bdc.size_binary_from_dna_len_baa(dna_payload_needed_size) - len(binary_string) - CHECK_SUM_SIZE
filler_length = bdc.size_binary_from_dna_len_abaab(dna_payload_needed_size) - len(binary_string) - CHECK_SUM_SIZE
#print("dna_payload_needed_size",str(dna_payload_needed_size)," +",filler_length,"bits")
# fill with '0' at the beginning of the binary # not the end because some zip can end with octets of 0, which makes difficult to remove only the non coding '0'
......@@ -166,7 +166,7 @@ def encode_file(input_path: str, output_path: str) -> None:
filtered_binary_string += binary_check_sum
# convert binaries into dna sequence
sequence = bdc.binary_to_dna_baa(filtered_binary_string)
sequence = bdc.binary_to_dna_abaab(filtered_binary_string)
total_sequence_size = len(sequence)+ dna_for_assembly_size
......@@ -216,51 +216,47 @@ def decode_file(input_path: str, output_path: str) -> None:
input : fasta format file with the payload sequences extracted from the block assembly
"""
#TODO
sub_sequences_dict = dfr.read_fasta(input_path)
sequences_dict = dfr.read_fasta(input_path) # get all the sequences from the fasta file
#TODO
sequence = "".join(sub_sequences_dict.values())
#sequence = input_path #TODO REMOVE
for seq_name, sequence in sequences_dict.items():
# convert the dna sequence into a binary string
binary_from_dna_string = bdc.dna_to_binary_baa(sequence)
if not binary_from_dna_string:
print("warning file conversion, decoding an empty file")
return
# test if the check_sum corresponds to the binary string
binary_string = binary_from_dna_string[:-CHECK_SUM_SIZE]
binary_check_sum = binary_from_dna_string[-CHECK_SUM_SIZE:]
if compute_check_sum(binary_string) != binary_check_sum:
print(compute_check_sum(binary_string),"!=",binary_check_sum)
print("Invalid check sum for",input_path)
exit(1)
# convert the dna sequence into a binary string
binary_from_dna_string = bdc.dna_to_binary_abaab(sequence)
# apply the same filter used in the encoding to the binary string to remove it
binary_string = apply_binary_filter(binary_string)
binary_string = binary_string[::-1] # reverse the binary string to get the original
# case binaries length is not multiple of 8 -> remove the excess bits at the beginning that have been added in the encoding to get a round number of blocks
rest = len(binary_string) % 8
if rest != 0:
binary_string = binary_string[rest:]
# remove octets of zeros at the beginning (the start of the sequence can be filled with zeros to get a round number of blocks)
while binary_string.startswith(8*"0"): # 1/256 (2**8) chance to remove actual data ! but 8*0 is ascii char NULL
binary_string = binary_string[8:]
#return binary_string #TODO REMOVE
# convert binaries into bytes
n = int(binary_string, 2)
bytes = n.to_bytes((n.bit_length() + 7) // 8, 'big')
if not binary_from_dna_string:
print("warning file conversion, decoding an empty sequence",seq_name,"in",input_path)
continue
# write the bytes into the file
with open(output_path, "wb") as f:
f.write(bytes)
# test if the check_sum corresponds to the binary string
binary_string = binary_from_dna_string[:-CHECK_SUM_SIZE]
binary_check_sum = binary_from_dna_string[-CHECK_SUM_SIZE:]
if compute_check_sum(binary_string) != binary_check_sum:
print(compute_check_sum(binary_string),"!=",binary_check_sum)
print("Invalid check sum for",seq_name,"in",input_path)
continue
# apply the same filter used in the encoding to the binary string to remove it
binary_string = apply_binary_filter(binary_string)
binary_string = binary_string[::-1] # reverse the binary string to get the original
# case binaries length is not multiple of 8 -> remove the excess bits at the beginning that have been added in the encoding to get a round number of blocks
rest = len(binary_string) % 8
if rest != 0:
binary_string = binary_string[rest:]
# remove octets of zeros at the beginning (the start of the sequence can be filled with zeros to get a round number of blocks)
while binary_string.startswith(8*"0"): # 1/256 (2**8) chance to remove actual data ! but 8*0 is ascii char NULL
binary_string = binary_string[8:]
# convert binaries into bytes
n = int(binary_string, 2)
bytes = n.to_bytes((n.bit_length() + 7) // 8, 'big')
# write the bytes into the file
with open(output_path, "wb") as f:
f.write(bytes)
return # end the decoding, since the sequence passed the checksum
# =================== main ======================= #
......@@ -286,8 +282,8 @@ if __name__ == '__main__':
print(binary_result)
exit(0)
#encode_file("", "test")
#seq = binary_to_dna_baa(binary_string)
#seq = binary_to_dna_abaab(binary_string)
#print(seq)
#print(dna_to_binary_baa(seq))
#print(dna_to_binary_abaab(seq))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment