function to add,remove the compression extension

be62c47f · BOULLE Olivier · eb178ae1 · be62c47f · be62c47f
Commit be62c47f authored 1 year ago by BOULLE Olivier
--- a/post_processing.py
+++ b/post_processing.py
@@ -12,6 +12,12 @@ do the opposite of pre_processing.py
 take dna sequences and convert them into files
 """
+def compressed_name(file_path):
+    return file_path+".gz"
+def uncompressed_name(file_path):
+    return file_path.replace(".gz", "")
 def convert_to_binary(input_dir_path, compressed_dir_path):
    """
@@ -20,7 +26,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path):
    for filename in os.listdir(input_dir_path):
        file_path = os.path.join(input_dir_path, filename)
-        result_file_path = os.path.join(compressed_dir_path, filename)+".gz"
+        result_file_path = os.path.join(compressed_dir_path, compressed_name(filename))
        # checking if it is a file
        if os.path.isfile(file_path):
@@ -38,14 +44,14 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path):
    """
    for filename in os.listdir(compressed_dir_path):
        file_path = os.path.join(compressed_dir_path, filename)
-        result_file_path = os.path.join(uncompressed_dir_path, filename.replace(".gz",""))
+        result_file_path = os.path.join(uncompressed_dir_path, uncompressed_name(filename))
        # checking if it is a file
        if os.path.isfile(file_path):
            pre_processing.unzip_file(file_path)
            # move the unzipped file
-            os.replace(file_path.replace(".gz", ""), result_file_path)
+            os.replace(uncompressed_name(file_path), result_file_path)
        elif os.path.isdir(file_path):
            print("error post processing (uncompress_files) : directory found in compressed_dir_path", filename)

--- a/pre_processing.py
+++ b/pre_processing.py
@@ -33,6 +33,13 @@ def unzip_file(file_path):
    subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
+def compressed_name(file_path):
+    return file_path+".gz"
+def uncompressed_name(file_path):
+    return file_path.replace(".gz", "")
 def insert_path_in_files(input_dir_path: str, rearanged_files_dir_path: str) -> None:
    """
    copy all files from a directory and paste them at the same level in output directory, with their relative path written before first line
@@ -90,7 +97,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
            exit(0)
        # get binary size of the compressed file
-        binary_len = len(file_to_dna.convert_file_to_bits(compressed_file_path+".gz"))
+        binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_file_path)))
        if binary_len <= max_binary_length: # if acceptable length, it's perfect
            files_compressed_size[filename] = binary_len # save the compressed size for this file
@@ -98,7 +105,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
        else:
            # file too large, nedd to split it
-            os.remove(compressed_file_path+".gz") # delete the compressed file
+            os.remove(compressed_name(compressed_file_path)) # delete the compressed file
            # read the original file as bytes
            with open(file_path, "rb") as input_file:
@@ -143,7 +150,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                    # compress the split_file to the compressed directory
                    zip_file(split_file_path, compressed_file_path + split_file_footer)
-                    compressed_subfile_path = compressed_file_path + split_file_footer + ".gz"
+                    compressed_subfile_path = compressed_name(compressed_file_path + split_file_footer)
                    # check the size of the subfile
                    binary_len = len(file_to_dna.convert_file_to_bits(compressed_subfile_path))
@@ -197,7 +204,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                    # add the merged file path to the bis dict with it's compressed size
                    zip_file(merged_file_path, compressed_merged_file_path)
-                    merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path+".gz"))
+                    merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_merged_file_path)))
                    if merged_binary_len >= max_binary_length: 
                        print("error merging result too large", compressed_merged_file_path)
@@ -207,8 +214,8 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                    files_compressed_size[merged_file_name] = merged_binary_len
                    # remove the 2 compressed files of the 2 files
-                    os.remove(os.path.join(compressed_dir_path, filename)+".gz")
+                    os.remove(os.path.join(compressed_dir_path, compressed_name(filename)))
-                    os.remove(os.path.join(compressed_dir_path, filename_2)+".gz")
+                    os.remove(os.path.join(compressed_dir_path, compressed_name(filename_2)))
                    # set the compressed size of the 2 files to a too high number to avoid them to be reused for merging
                    files_compressed_size[filename] = 2*max_binary_length
@@ -238,7 +245,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path):
        # checking if it is a file
        if os.path.isfile(file_path):
-            output_file_path = os.path.join(payload_fragments_dir_path, filename.replace(".gz", ""))
+            output_file_path = os.path.join(payload_fragments_dir_path, uncompressed_name(filename))
            dna_sequence = file_to_dna.encode_file(file_path, output_file_path) # convert binaries into a dna sequence and save result in the output file