diff --git a/post_processing.py b/post_processing.py
index 72339c292e99e38c381d1eaed4469ccfe8a438a0..6703f71fe98a07a0c8a66e8072e89270d47e739b 100755
--- a/post_processing.py
+++ b/post_processing.py
@@ -12,12 +12,6 @@ do the opposite of pre_processing.py
 take dna sequences and convert them into files
 """
 
-def compressed_name(file_path):
-    return file_path+".gz"
-
-def uncompressed_name(file_path):
-    return file_path.replace(".gz", "")
-
 
 def convert_to_binary(input_dir_path, compressed_dir_path):
     """
@@ -26,7 +20,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path):
     
     for filename in os.listdir(input_dir_path):
         file_path = os.path.join(input_dir_path, filename)
-        result_file_path = os.path.join(compressed_dir_path, compressed_name(filename))
+        result_file_path = os.path.join(compressed_dir_path, pre_processing.get_compressed_name(filename))
         
         # checking if it is a file
         if os.path.isfile(file_path):
@@ -44,14 +38,12 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path):
     """
     for filename in os.listdir(compressed_dir_path):
         file_path = os.path.join(compressed_dir_path, filename)
-        result_file_path = os.path.join(uncompressed_dir_path, uncompressed_name(filename))
+        uncompressed_file_path = os.path.join(uncompressed_dir_path, pre_processing.get_uncompressed_name(filename))
 
         
         # checking if it is a file
         if os.path.isfile(file_path):
-            pre_processing.unzip_file(file_path)
-            # move the unzipped file
-            os.replace(uncompressed_name(file_path), result_file_path)
+            pre_processing.unzip_file(file_path, uncompressed_file_path)
             
         elif os.path.isdir(file_path):
             print("error post processing (uncompress_files) : directory found in compressed_dir_path", filename)
diff --git a/pre_processing.py b/pre_processing.py
index dfbc2640c08912c832bff01b05eb545cbcab995a..53c645273972aace9049e95ee43769e59351ac30 100755
--- a/pre_processing.py
+++ b/pre_processing.py
@@ -17,28 +17,72 @@ splitting files that are too large for 1 assembly
 """
 
 
-def zip_file(file_path, output_path):
+def zip_file(file_path, output_path, compression_type="gzip"):
     """
-    split the file with gzip and write it at the output path
+    compress the file and write it at the output path
     """
-    compression_command = "gzip -c9 "+ file_path + " > "+output_path+".gz"
-    subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': compression_command})
+    
+    if compression_type == "gzip":
+        compression_command = "gzip -c9 "+ file_path + " > "+output_path
+        subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': compression_command})
+        return
+    
+    if compression_type == "cmix":
+        if not os.path.isfile(output_path):
+            compression_command = "/udd/oboulle/Documents/result_analysis/compression_analysis/cmix/cmix -c "+ file_path + " "+output_path
+            subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': compression_command})
+        else:
+            print("already done",output_path)
+        return
+    # type not supported
+    print("compression error, unknown format:",compression_type)
+    exit(0)
 
 
-def unzip_file(file_path):
+def unzip_file(file_path, output_path, compression_type="gzip"):
     """
-    unzip the file and write it just where it is
+    uncompress the file and write it just where it is
     """
-    decompression_command = "gzip -d "+ file_path
-    subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
-
-
-def compressed_name(file_path):
-    return file_path+".gz"
-
-def uncompressed_name(file_path):
-    return file_path.replace(".gz", "")
+    
+    if compression_type == "gzip":
+        decompression_command = "gzip -d "+ file_path
+        subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
+        # move the unzipped file to the defined output path
+        os.replace(get_uncompressed_name(file_path, "gzip"), output_path)
+        return
+    
+    if compression_type == "cmix":
+        decompression_command = "/udd/oboulle/Documents/result_analysis/compression_analysis/cmix/cmix -d "+ file_path + " "+output_path
+        subprocess.run('/bin/bash -c "$COMMAND"', shell=True, env={'COMMAND': decompression_command})
+        return
+    # type not supported
+    print("decompression error, unknown format:",compression_type)
+    exit(0)
 
+        
+def get_compressed_name(file_path, compression_type="gzip"):
+    
+    if compression_type == "gzip":
+        return file_path+".gz"
+    
+    if compression_type == "cmix":
+        return file_path+".cx"
+    # type not supported
+    print("get_compressed_name error, unknown format:",compression_type)
+    exit(0)
+    
+    
+def get_uncompressed_name(file_path, compression_type="gzip"):
+    
+    if compression_type == "gzip":
+        return file_path.replace(".gz", "")
+    
+    if compression_type == "cmix":
+        return file_path.replace(".cx", "")
+    # type not supported
+    print("get_uncompressed_name error, unknown format:",compression_type)
+    exit(0)
+    
 
 def insert_path_in_files(input_dir_path: str, rearanged_files_dir_path: str) -> None:
     """
@@ -86,18 +130,18 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
         
         # compress the file
         file_path = os.path.join(rearanged_files_dir_path, filename)
-        compressed_file_path = os.path.join(compressed_dir_path, filename)
+        comp_file_path = get_compressed_name(os.path.join(compressed_dir_path, filename))
         
         # checking if it is a file
         if os.path.isfile(file_path):
-            zip_file(file_path, compressed_file_path)
+            zip_file(file_path, comp_file_path)
             
         elif os.path.isdir(file_path):
             print("error pre processing (compress_all) : directory found in rearanged dir path", filename)
             exit(0)
             
         # get binary size of the compressed file
-        binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_file_path)))
+        binary_len = len(file_to_dna.convert_file_to_bits(comp_file_path))
         
         if binary_len <= max_binary_length: # if acceptable length, it's perfect
             files_compressed_size[filename] = binary_len # save the compressed size for this file
@@ -105,7 +149,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
         else:
             # file too large, nedd to split it
             
-            os.remove(compressed_name(compressed_file_path)) # delete the compressed file
+            os.remove(comp_file_path) # delete the compressed file
             
             # read the original file as bytes
             with open(file_path, "rb") as input_file:
@@ -147,10 +191,10 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                     with open(split_file_path, "wb") as f: # write the bytes content
                         f.write(split_file_bytes_content)
                     
+                    compressed_subfile_path = get_compressed_name(get_uncompressed_name(comp_file_path) + split_file_footer)
                     # compress the split_file to the compressed directory
-                    zip_file(split_file_path, compressed_file_path + split_file_footer)
+                    zip_file(split_file_path, compressed_subfile_path)
 
-                    compressed_subfile_path = compressed_name(compressed_file_path + split_file_footer)
                     # check the size of the subfile
                     binary_len = len(file_to_dna.convert_file_to_bits(compressed_subfile_path))
         
@@ -166,28 +210,35 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                         # if the sub file is recreated, the new size will overwrite this one
 
     def merge_short_files(files_compressed_size: dict):
+        """
+        each molecule can store a maximum fixed number of bits
+        since it's more efficient to have a few long molecule than a lot of short molecule,
+        small files can be merged in one same molecule
+        """
     
         #sorted_files_sizes = sorted(files_compressed_size.items(), key=lambda item: item[1], reverse=True) # sort dict by sizes from highest to lowest
         
-        files_compressed_size_bis = {}
+        #files_compressed_size_bis = {}
         
-        merged_files_paths = []
+        #merged_files_paths = []
         
-        new_merge = False # true if at least one new merge has been made
+        new_merge = False # set to true if at least one new merge has been made
         
         for i, filename in enumerate(list(files_compressed_size.keys())[:-1]):
-            unmerged = True # set to false if a merge is made
-            file_compressed_size = files_compressed_size[filename]
+            file_compressed_size = files_compressed_size[filename] # get size of the compressed file
             
-            if file_compressed_size >= max_binary_length: # impossible to merge because too large
-                continue
+            if file_compressed_size is None or file_compressed_size >= max_binary_length: # impossible to merge because too large, or has already be used in a merge (set to None)
+                continue # skip this file
             
             for filename_2 in list(files_compressed_size.keys())[i+1:]:
                 file_compressed_size_2 = files_compressed_size[filename_2]
                 
-                if file_compressed_size + file_compressed_size_2 <= max_binary_length:
+                if file_compressed_size_2 is None or file_compressed_size + file_compressed_size_2 <= max_binary_length:
+                    # the sum of the 2 compressed files is lower than what can be stored,
+                    # so the original files will be merged and recompressed,
+                    # the compression of a merging is supposed to be smaller than the sum of compressions of each file
                     
-                    # merge the 2 files
+                    # get the binary content of each file
                     with open(os.path.join(rearanged_files_dir_path, filename), "rb") as input_file:
                         bytes_content = b"".join(input_file.readlines())
                     with open(os.path.join(rearanged_files_dir_path, filename_2), "rb") as input_file:
@@ -196,15 +247,17 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                     # remove the "merged_" from the name of already merged files for visibility
                     merged_file_name = "merged_" + filename.replace("merged_","") + "+" + filename_2.replace("merged_","") 
                     merged_file_path = os.path.join(rearanged_files_dir_path, merged_file_name)
-                    with open(merged_file_path, "wb") as f: # write the bytes content
+                    compressed_merged_file_path = get_compressed_name(os.path.join(compressed_dir_path, merged_file_name))
+                    print("new merge :",filename,"and",filename_2)
+                    
+                    with open(merged_file_path, "wb") as f: # write the sum of bytes content
                         f.write(bytes_content + bytes_content_2)
                     
-                    compressed_merged_file_path = os.path.join(compressed_dir_path, merged_file_name)
-
-                    # add the merged file path to the bis dict with it's compressed size
+                    # compress the merged file created
                     zip_file(merged_file_path, compressed_merged_file_path)
                     
-                    merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_name(compressed_merged_file_path)))
+                    # test its size just in case, but it should fit in a molecule
+                    merged_binary_len = len(file_to_dna.convert_file_to_bits(compressed_merged_file_path))
         
                     if merged_binary_len >= max_binary_length: 
                         print("error merging result too large", compressed_merged_file_path)
@@ -213,23 +266,25 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
                     # add the merged file to the dict because it can still be used for other merging if it's short enough
                     files_compressed_size[merged_file_name] = merged_binary_len
                     
-                    # remove the 2 compressed files of the 2 files
-                    os.remove(os.path.join(compressed_dir_path, compressed_name(filename)))
-                    os.remove(os.path.join(compressed_dir_path, compressed_name(filename_2)))
+                    # remove the 2 old compressed files of the 2 files
+                    os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename)))
+                    os.remove(os.path.join(compressed_dir_path, get_compressed_name(filename_2)))
+                    
+                    # set the compressed size of the 2 files to None to avoid them to be reused for merging
+                    files_compressed_size[filename] = None
+                    files_compressed_size[filename_2] = None
                     
-                    # set the compressed size of the 2 files to a too high number to avoid them to be reused for merging
-                    files_compressed_size[filename] = 2*max_binary_length
-                    files_compressed_size[filename_2] = 2*max_binary_length
+                    new_merge = True # keep in memory that at least one new merge has been made in this loop
                     
-                    new_merge = True
-                    break
+                    break # leave the second loop, but others merges can still be done in the continuation of the first loop
                 
         # continue to try to create other merging if at least one merge has been made
-        # otherwise, the loop can end since it no longer find possible merges
         if new_merge:
             print("continue merging...")
+            print(files_compressed_size)
             merge_short_files(files_compressed_size)
-            
+        # otherwise, the loop can end since it no longer find possible merges   
+        
     print(files_compressed_size)
     merge_short_files(files_compressed_size)
 
@@ -245,7 +300,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path):
         # checking if it is a file
         if os.path.isfile(file_path):
             
-            output_file_path = os.path.join(payload_fragments_dir_path, uncompressed_name(filename))
+            output_file_path = os.path.join(payload_fragments_dir_path, get_uncompressed_name(filename))
 
             dna_sequence = file_to_dna.encode_file(file_path, output_file_path) # convert binaries into a dna sequence and save result in the output file