Mentions légales du service

Skip to content
Snippets Groups Projects
Commit be62c47f authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

function to add,remove the compression extension

parent eb178ae1
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,12 @@ do the opposite of pre_processing.py
take dna sequences and convert them into files
"""
def compressed_name(file_path):
return file_path+".gz"
def uncompressed_name(file_path):
return file_path.replace(".gz", "")
def convert_to_binary(input_dir_path, compressed_dir_path):
"""
......@@ -20,7 +26,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path):
for filename in os.listdir(input_dir_path):
file_path = os.path.join(input_dir_path, filename)
result_file_path = os.path.join(compressed_dir_path, filename)+".gz"
result_file_path = os.path.join(compressed_dir_path, compressed_name(filename))
# checking if it is a file
if os.path.isfile(file_path):
......@@ -38,14 +44,14 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path):
"""
for filename in os.listdir(compressed_dir_path):
file_path = os.path.join(compressed_dir_path, filename)
result_file_path = os.path.join(uncompressed_dir_path, filename.replace(".gz",""))
result_file_path = os.path.join(uncompressed_dir_path, uncompressed_name(filename))
# checking if it is a file
if os.path.isfile(file_path):
pre_processing.unzip_file(file_path)
# move the unzipped file
os.replace(file_path.replace(".gz", ""), result_file_path)
os.replace(uncompressed_name(file_path), result_file_path)
elif os.path.isdir(file_path):
print("error post processing (uncompress_files) : directory found in compressed_dir_path", filename)
......
......@@ -33,6 +33,13 @@ def unzip_file(file_path):
subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
def compressed_name(file_path):
return file_path+".gz"
def uncompressed_name(file_path):
return file_path.replace(".gz", "")
def insert_path_in_files(input_dir_path: str, rearanged_files_dir_path: str) -> None:
"""
copy all files from a directory and paste them at the same level in output directory, with their relative path written before first line
......@@ -90,7 +97,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
exit(0)
# get binary size of the compressed file
binary_len = len(file_to_dna.convert_file_to_bits(compressed_file_path+".gz"))
binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_file_path)))
if binary_len <= max_binary_length: # if acceptable length, it's perfect
files_compressed_size[filename] = binary_len # save the compressed size for this file
......@@ -98,7 +105,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
else:
# file too large, nedd to split it
os.remove(compressed_file_path+".gz") # delete the compressed file
os.remove(compressed_name(compressed_file_path)) # delete the compressed file
# read the original file as bytes
with open(file_path, "rb") as input_file:
......@@ -143,7 +150,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
# compress the split_file to the compressed directory
zip_file(split_file_path, compressed_file_path + split_file_footer)
compressed_subfile_path = compressed_file_path + split_file_footer + ".gz"
compressed_subfile_path = compressed_name(compressed_file_path + split_file_footer)
# check the size of the subfile
binary_len = len(file_to_dna.convert_file_to_bits(compressed_subfile_path))
......@@ -197,7 +204,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
# add the merged file path to the bis dict with it's compressed size
zip_file(merged_file_path, compressed_merged_file_path)
merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path+".gz"))
merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_merged_file_path)))
if merged_binary_len >= max_binary_length:
print("error merging result too large", compressed_merged_file_path)
......@@ -207,8 +214,8 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
files_compressed_size[merged_file_name] = merged_binary_len
# remove the 2 compressed files of the 2 files
os.remove(os.path.join(compressed_dir_path, filename)+".gz")
os.remove(os.path.join(compressed_dir_path, filename_2)+".gz")
os.remove(os.path.join(compressed_dir_path, compressed_name(filename)))
os.remove(os.path.join(compressed_dir_path, compressed_name(filename_2)))
# set the compressed size of the 2 files to a too high number to avoid them to be reused for merging
files_compressed_size[filename] = 2*max_binary_length
......@@ -238,7 +245,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path):
# checking if it is a file
if os.path.isfile(file_path):
output_file_path = os.path.join(payload_fragments_dir_path, filename.replace(".gz", ""))
output_file_path = os.path.join(payload_fragments_dir_path, uncompressed_name(filename))
dna_sequence = file_to_dna.encode_file(file_path, output_file_path) # convert binaries into a dna sequence and save result in the output file
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment