Mentions légales du service

Skip to content
Snippets Groups Projects
Commit cb2c89ef authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

reword ending of abaab conversion, greatly simplified

parent eed3fa8c
No related branches found
No related tags found
No related merge requests found
......@@ -112,9 +112,13 @@ def encode_file(input_path: str, output_path: str) -> None:
objective is to fit the payload in the lowest number of blocks, but all blocks must be the same size after addition of non coding stuff (primer/buffer/overhang/bsaI)
"""
#TODO
binary_string = convert_file_to_bits(input_path) # get the binary string representing the file content
#binary_string = input_path #TODO REMOVE
if (len(binary_string) + CHECK_SUM_SIZE) % 8 in [1, 4, 6]: # add a non coding 0 when size of incompatible length for dna conversion
binary_string = "0" + binary_string
dna_payload_size = bdc.size_dna_from_bit_len_abaab(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna
#print("payload size",str(dna_payload_size))
......@@ -132,14 +136,11 @@ def encode_file(input_path: str, output_path: str) -> None:
# estimate the total dna sequence length after addition of non coding parts
total_dna_size = dna_payload_size + dna_for_assembly_size
#print("total_dna_size",str(total_dna_size))
# possibility to add a non coding base at the end of the sequence to make the last block at correct size when it is not possible to do by adding bits of 0 (very rare case)
add_base_at_end = False
# round the number of base per blocks, make sure it is above the minimal block size
final_assembly_size = block_number * max(math.ceil(total_dna_size / block_number), min_total_block_size)
# case when some bits need to be added to increase the number of bases
# case when some bits needs to be added to increase the number of bases
if final_assembly_size != total_dna_size:
# calculate the number of bases to add to the payload to get a round number of equal length blocks
......@@ -153,12 +154,6 @@ def encode_file(input_path: str, output_path: str) -> None:
binary_string = math.ceil(filler_length) * "0" + binary_string
#print("updated binary size", str(len(binary_string)))
# rare case where adding 0 can not solve the problem of round blocks
if dna_payload_needed_size % 5 == 3:
# need a non coding base at the end
#print("added non coding base")
add_base_at_end = True
# apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions
binary_string = binary_string[::-1] # reverse the binary string, because 2 files can have the same start with ziping methods
......@@ -172,7 +167,7 @@ def encode_file(input_path: str, output_path: str) -> None:
# convert binaries into dna sequence
sequence = bdc.binary_to_dna_abaab(filtered_binary_string)
if add_base_at_end: sequence += bdc.bit_to_dna_balance_GC("0", sequence[-1]) # add a non coding base different from the preceding one
#if add_base_at_end: sequence += bdc.bit_to_dna_balance_GC("0", sequence[-1]) # add a non coding base different from the preceding one
total_sequence_size = len(sequence)+ dna_for_assembly_size
......@@ -189,7 +184,6 @@ def encode_file(input_path: str, output_path: str) -> None:
# split the sequence into blocks of correct size to add the non payload stuff later # start the block count at 1
sub_sequences_dict = {}
if block_number == 1:
sub_sequences_dict["1"] = sequence
elif block_number == 2:
......@@ -226,7 +220,7 @@ def decode_file(input_path: str, output_path: str) -> None:
sub_sequences_dict = dfr.read_fasta(input_path)
sequence = "".join(sub_sequences_dict.values())
#sequence = input_path
#sequence = input_path #TODO REMOVE
# convert the dna sequence into a binary string
binary_from_dna_string = bdc.dna_to_binary_abaab(sequence)
......@@ -283,8 +277,8 @@ if __name__ == '__main__':
continue
print("i=",str(i))
seq = encode_file("", binary)
binary_result = decode_file(seq, "toto")
seq = encode_file(binary, "")
binary_result = decode_file(seq, "")
if binary != binary_result:
#print(binary)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment