diff --git a/post_processing.py b/post_processing.py index 03c3afa8f4e0f3c1f18f78c3e098d8a4e5348645..72339c292e99e38c381d1eaed4469ccfe8a438a0 100755 --- a/post_processing.py +++ b/post_processing.py @@ -12,6 +12,12 @@ do the opposite of pre_processing.py take dna sequences and convert them into files """ +def compressed_name(file_path): + return file_path+".gz" + +def uncompressed_name(file_path): + return file_path.replace(".gz", "") + def convert_to_binary(input_dir_path, compressed_dir_path): """ @@ -20,7 +26,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path): for filename in os.listdir(input_dir_path): file_path = os.path.join(input_dir_path, filename) - result_file_path = os.path.join(compressed_dir_path, filename)+".gz" + result_file_path = os.path.join(compressed_dir_path, compressed_name(filename)) # checking if it is a file if os.path.isfile(file_path): @@ -38,14 +44,14 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path): """ for filename in os.listdir(compressed_dir_path): file_path = os.path.join(compressed_dir_path, filename) - result_file_path = os.path.join(uncompressed_dir_path, filename.replace(".gz","")) + result_file_path = os.path.join(uncompressed_dir_path, uncompressed_name(filename)) # checking if it is a file if os.path.isfile(file_path): pre_processing.unzip_file(file_path) # move the unzipped file - os.replace(file_path.replace(".gz", ""), result_file_path) + os.replace(uncompressed_name(file_path), result_file_path) elif os.path.isdir(file_path): print("error post processing (uncompress_files) : directory found in compressed_dir_path", filename) diff --git a/pre_processing.py b/pre_processing.py index bae60193ecc4d86d40cf01447e86698927316a36..dfbc2640c08912c832bff01b05eb545cbcab995a 100755 --- a/pre_processing.py +++ b/pre_processing.py @@ -33,6 +33,13 @@ def unzip_file(file_path): subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command}) +def compressed_name(file_path): + return file_path+".gz" + +def uncompressed_name(file_path): + return file_path.replace(".gz", "") + + def insert_path_in_files(input_dir_path: str, rearanged_files_dir_path: str) -> None: """ copy all files from a directory and paste them at the same level in output directory, with their relative path written before first line @@ -90,7 +97,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str) exit(0) # get binary size of the compressed file - binary_len = len(file_to_dna.convert_file_to_bits(compressed_file_path+".gz")) + binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_file_path))) if binary_len <= max_binary_length: # if acceptable length, it's perfect files_compressed_size[filename] = binary_len # save the compressed size for this file @@ -98,7 +105,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str) else: # file too large, nedd to split it - os.remove(compressed_file_path+".gz") # delete the compressed file + os.remove(compressed_name(compressed_file_path)) # delete the compressed file # read the original file as bytes with open(file_path, "rb") as input_file: @@ -143,7 +150,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str) # compress the split_file to the compressed directory zip_file(split_file_path, compressed_file_path + split_file_footer) - compressed_subfile_path = compressed_file_path + split_file_footer + ".gz" + compressed_subfile_path = compressed_name(compressed_file_path + split_file_footer) # check the size of the subfile binary_len = len(file_to_dna.convert_file_to_bits(compressed_subfile_path)) @@ -197,7 +204,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str) # add the merged file path to the bis dict with it's compressed size zip_file(merged_file_path, compressed_merged_file_path) - merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path+".gz")) + merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_merged_file_path))) if merged_binary_len >= max_binary_length: print("error merging result too large", compressed_merged_file_path) @@ -207,8 +214,8 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str) files_compressed_size[merged_file_name] = merged_binary_len # remove the 2 compressed files of the 2 files - os.remove(os.path.join(compressed_dir_path, filename)+".gz") - os.remove(os.path.join(compressed_dir_path, filename_2)+".gz") + os.remove(os.path.join(compressed_dir_path, compressed_name(filename))) + os.remove(os.path.join(compressed_dir_path, compressed_name(filename_2))) # set the compressed size of the 2 files to a too high number to avoid them to be reused for merging files_compressed_size[filename] = 2*max_binary_length @@ -238,7 +245,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path): # checking if it is a file if os.path.isfile(file_path): - output_file_path = os.path.join(payload_fragments_dir_path, filename.replace(".gz", "")) + output_file_path = os.path.join(payload_fragments_dir_path, uncompressed_name(filename)) dna_sequence = file_to_dna.encode_file(file_path, output_file_path) # convert binaries into a dna sequence and save result in the output file