diff --git a/file_to_dna.py b/file_to_dna.py index a4304dc7da7eb0f3869a56ee1291e610c54463aa..b6a041d07bddc75a4b1ff613e758967d98dfd574 100755 --- a/file_to_dna.py +++ b/file_to_dna.py @@ -114,7 +114,7 @@ def encode_file(input_path: str, output_path: str) -> None: binary_string = convert_file_to_bits(input_path) # get the binary string representing the file content - dna_payload_size = bdc.size_of_dna_from_bit_len_abaab(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna + dna_payload_size = bdc.size_dna_from_bit_len_abaab(len(binary_string) + CHECK_SUM_SIZE) # length of the payload after conversion in dna #print("payload size",str(dna_payload_size)) # estimate number of blocks, round up to next int @@ -138,8 +138,8 @@ def encode_file(input_path: str, output_path: str) -> None: exit(1) pass - # add a non coding A at the end of the sequence to make the last block at correct size when it is not possible to do by adding bits of 0 (very rare case) - add_A_at_end = False + # add a non coding base at the end of the sequence to make the last block at correct size when it is not possible to do by adding bits of 0 (very rare case) + add_base_at_end = False if force_size_to_max or total_dna_size % block_number > 0: # need to add non payload bits to adjust the size of blocks @@ -158,16 +158,16 @@ def encode_file(input_path: str, output_path: str) -> None: filler_length = bdc.size_binary_from_dna_len_abaab(dna_payload_needed_size) - len(binary_string) - CHECK_SUM_SIZE #print("dna_payload_needed_size",str(dna_payload_needed_size)," +",filler_length,"bits") - # fill with '0' at the beginning of the binary # not the end because some zip can end with octets of 0, wich makes difficult to remove only the non coding '0' + # fill with '0' at the beginning of the binary # not the end because some zip can end with octets of 0, which makes difficult to remove only the non coding '0' binary_string = math.ceil(filler_length) * "0" + binary_string #print("updated binary size", str(len(binary_string))) # rare case where adding 0 can not solve the problem of round blocks if dna_payload_needed_size % 5 == 3: - # need a non coding A at the end - #print("added A") - add_A_at_end = True + # need a non coding base at the end + #print("added non coding base") + add_base_at_end = True # apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions @@ -182,7 +182,7 @@ def encode_file(input_path: str, output_path: str) -> None: # convert binaries into dna sequence sequence = bdc.binary_to_dna_abaab(filtered_binary_string) - if add_A_at_end: sequence += "A" + if add_base_at_end: sequence += bdc.bit_to_dna_balance_GC("0", sequence[-1]) # add a non coding base different from the preceding one total_sequence_size = len(sequence)+ dna_non_payload_size