diff --git a/pre_processing.py b/pre_processing.py
index 53c645273972aace9049e95ee43769e59351ac30..6e774e51b9088725f57d54314fb00b248fa46326 100755
--- a/pre_processing.py
+++ b/pre_processing.py
@@ -16,8 +16,10 @@ zipping all files
 splitting files that are too large for 1 assembly
 """
 
+compression_type = "gzip" # set the used compression method
 
-def zip_file(file_path, output_path, compression_type="gzip"):
+
+def zip_file(file_path, output_path):
     """
     compress the file and write it at the output path
     """
@@ -39,7 +41,7 @@ def zip_file(file_path, output_path, compression_type="gzip"):
     exit(0)
 
 
-def unzip_file(file_path, output_path, compression_type="gzip"):
+def unzip_file(file_path, output_path):
     """
     uncompress the file and write it just where it is
     """
@@ -48,7 +50,7 @@ def unzip_file(file_path, output_path, compression_type="gzip"):
         decompression_command = "gzip -d "+ file_path
         subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
         # move the unzipped file to the defined output path
-        os.replace(get_uncompressed_name(file_path, "gzip"), output_path)
+        os.replace(get_uncompressed_name(file_path), output_path)
         return
     
     if compression_type == "cmix":
@@ -60,7 +62,7 @@ def unzip_file(file_path, output_path, compression_type="gzip"):
     exit(0)
 
         
-def get_compressed_name(file_path, compression_type="gzip"):
+def get_compressed_name(file_path):
     
     if compression_type == "gzip":
         return file_path+".gz"
@@ -72,7 +74,7 @@ def get_compressed_name(file_path, compression_type="gzip"):
     exit(0)
     
     
-def get_uncompressed_name(file_path, compression_type="gzip"):
+def get_uncompressed_name(file_path):
     
     if compression_type == "gzip":
         return file_path.replace(".gz", "")
@@ -227,56 +229,58 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
         for i, filename in enumerate(list(files_compressed_size.keys())[:-1]):
             file_compressed_size = files_compressed_size[filename] # get size of the compressed file
             
-            if file_compressed_size is None or file_compressed_size >= max_binary_length: # impossible to merge because too large, or has already be used in a merge (set to None)
+            if file_compressed_size is None or file_compressed_size > max_binary_length: # impossible to merge because too large, or has already be used in a merge (set to None)
                 continue # skip this file
             
             for filename_2 in list(files_compressed_size.keys())[i+1:]:
                 file_compressed_size_2 = files_compressed_size[filename_2]
                 
-                if file_compressed_size_2 is None or file_compressed_size + file_compressed_size_2 <= max_binary_length:
-                    # the sum of the 2 compressed files is lower than what can be stored,
-                    # so the original files will be merged and recompressed,
-                    # the compression of a merging is supposed to be smaller than the sum of compressions of each file
-                    
-                    # get the binary content of each file
-                    with open(os.path.join(rearanged_files_dir_path, filename), "rb") as input_file:
-                        bytes_content = b"".join(input_file.readlines())
-                    with open(os.path.join(rearanged_files_dir_path, filename_2), "rb") as input_file:
-                        bytes_content_2 = b"".join(input_file.readlines())
-                        
-                    # remove the "merged_" from the name of already merged files for visibility
-                    merged_file_name = "merged_" + filename.replace("merged_","") + "+" + filename_2.replace("merged_","") 
-                    merged_file_path = os.path.join(rearanged_files_dir_path, merged_file_name)
-                    compressed_merged_file_path = get_compressed_name(os.path.join(compressed_dir_path, merged_file_name))
-                    print("new merge :",filename,"and",filename_2)
-                    
-                    with open(merged_file_path, "wb") as f: # write the sum of bytes content
-                        f.write(bytes_content + bytes_content_2)
-                    
-                    # compress the merged file created
-                    zip_file(merged_file_path, compressed_merged_file_path)
-                    
-                    # test its size just in case, but it should fit in a molecule
-                    merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path))
-        
-                    if merged_binary_len >= max_binary_length: 
-                        print("error merging result too large", compressed_merged_file_path)
-                        exit(0)
-                    
-                    # add the merged file to the dict because it can still be used for other merging if it's short enough
-                    files_compressed_size[merged_file_name] = merged_binary_len
-                    
-                    # remove the 2 old compressed files of the 2 files
-                    os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename)))
-                    os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename_2)))
-                    
-                    # set the compressed size of the 2 files to None to avoid them to be reused for merging
-                    files_compressed_size[filename] = None
-                    files_compressed_size[filename_2] = None
-                    
-                    new_merge = True # keep in memory that at least one new merge has been made in this loop
+                if file_compressed_size_2 is None or file_compressed_size + file_compressed_size_2 > max_binary_length:
+                    continue # skip this file
+                
+                # the sum of the 2 compressed files is lower than what can be stored,
+                # so the original files will be merged and recompressed,
+                # the compression of a merging is supposed to be smaller than the sum of compressions of each file
+                
+                # get the binary content of each file
+                with open(os.path.join(rearanged_files_dir_path, filename), "rb") as input_file:
+                    bytes_content = b"".join(input_file.readlines())
+                with open(os.path.join(rearanged_files_dir_path, filename_2), "rb") as input_file:
+                    bytes_content_2 = b"".join(input_file.readlines())
                     
-                    break # leave the second loop, but others merges can still be done in the continuation of the first loop
+                # remove the "merged_" from the name of already merged files for visibility
+                merged_file_name = "merged_" + filename.replace("merged_","") + "+" + filename_2.replace("merged_","") 
+                merged_file_path = os.path.join(rearanged_files_dir_path, merged_file_name)
+                compressed_merged_file_path = get_compressed_name(os.path.join(compressed_dir_path, merged_file_name))
+                print("new merge :",filename,"and",filename_2)
+                
+                with open(merged_file_path, "wb") as f: # write the sum of bytes content
+                    f.write(bytes_content + bytes_content_2)
+                
+                # compress the merged file created
+                zip_file(merged_file_path, compressed_merged_file_path)
+                
+                # test its size just in case, but it should fit in a molecule
+                merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path))
+    
+                if merged_binary_len >= max_binary_length: 
+                    print("error merging result too large", compressed_merged_file_path)
+                    exit(0)
+                
+                # add the merged file to the dict because it can still be used for other merging if it's short enough
+                files_compressed_size[merged_file_name] = merged_binary_len
+                
+                # remove the 2 old compressed files of the 2 files
+                os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename)))
+                os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename_2)))
+                
+                # set the compressed size of the 2 files to None to avoid them to be reused for merging
+                files_compressed_size[filename] = None
+                files_compressed_size[filename_2] = None
+                
+                new_merge = True # keep in memory that at least one new merge has been made in this loop
+                
+                break # leave the second loop, but others merges can still be done in the continuation of the first loop
                 
         # continue to try to create other merging if at least one merge has been made
         if new_merge: