Mentions légales du service

Skip to content
Snippets Groups Projects
Commit d1a1079c authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

force maximum block size option, add block number and size in filename

parent 0a4f3f94
No related branches found
No related tags found
No related merge requests found
......@@ -27,8 +27,9 @@ bsaI_size = 7
extern_extremity_size = primer_size + buffer_size # size of non-payload dna part at the extern extremities (start of first block and end of last block)
intern_extremity_size = overhang_size + bsaI_size + buffer_size # size of non-payload dna part at the extremities between blocks
max_total_block_size = 1000 # maximum allowed size for a complete block
max_total_block_size = 500 # maximum allowed size for a complete block
min_total_block_size = 300 # minimum allowed size for a complete block
force_size_to_max = True # set to false if blocks can have any size between min and max, else force the size of all blocks to be the max size
n_block_max = 10 # maximum assembled number of blocks
......@@ -160,20 +161,23 @@ def encode_file(input_path: str, output_path: str) -> None:
# add a non coding A at the end of the sequence to make the last block at correct size when it is not possible to do by adding bits of 0 (very rare case)
add_A_at_end = False
rest = total_dna_size % block_number # rest of dna if the sequence is divided equally in the blocks
if rest > 0: # fill up with some bits of 0 to have a round number of blocks
# find the next length the total sequence needs to be to have equal division in blocks
next_length_for_round_blocks = block_number * math.ceil(total_dna_size / block_number)
#print("next_length_for_round_blocks",str(next_length_for_round_blocks))
if force_size_to_max or total_dna_size % block_number > 0: # need to add non payload bits to adjust the size of blocks
# all blocks must be the maximum size
if force_size_to_max: # find the next length the total sequence needs to be to have a division in blocks of maximum size
final_assembly_size = block_number * max_total_block_size
else: # all blocks still need to have the same round size
# find the next length the total sequence needs to be to have equal division in blocks
final_assembly_size = block_number * math.ceil(total_dna_size / block_number)
#print("final_assembly_size",str(final_assembly_size))
# calculate the number of bits to add to get a round number of equal length blocks
dna_payload_needed_size = next_length_for_round_blocks - dna_non_payload_size
dna_payload_needed_size = final_assembly_size - dna_non_payload_size
#print(size_binary_from_dna_len(dna_payload_needed_size) )
filler_length = size_binary_from_dna_len(dna_payload_needed_size) - len(binary_string) - CHECK_SUM_SIZE
#print("dna_payload_needed_size",str(dna_payload_needed_size)," +",filler_length,"bits")
# fill with '0' at the beginning of the binary # not the end because some zip can end with octets of 0, wich makes difficult to remove only the non coding '0'
binary_string = math.ceil(filler_length) * "0" + binary_string
......@@ -204,11 +208,13 @@ def encode_file(input_path: str, output_path: str) -> None:
# test for errors that should never occur (I hope ...)
# round number of blocks, no blocks too large, no blocks to small
if round(total_sequence_size/block_number) != total_sequence_size/block_number or total_sequence_size/block_number > max_total_block_size or total_sequence_size/block_number < min_total_block_size:
print("error file to dna")
print("error file to dna", input_path)
print("\tseq payload size",str(len(sequence)))
print("\ttotal estimated seq size",total_sequence_size)
print("\t",str(block_number),"blocks of",str(total_sequence_size/block_number))
print("TODO remove me")
#TODO
return
exit(1)
# split the sequence into blocks of correct size to add the non payload stuff later # start the block count at 1
......@@ -233,6 +239,9 @@ def encode_file(input_path: str, output_path: str) -> None:
sub_sequences_dict[str(block_number)] = sequence[index_sequence:]
# add number and blocks size to path, add type
output_path = output_path + "_" + str(block_number) + "x" + str(total_sequence_size//block_number) + ".fasta"
dfr.save_dict_to_fasta(sub_sequences_dict, output_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment