moved filtering

3cc52a7b · BOULLE Olivier · 73544462 · 3cc52a7b · 3cc52a7b
Commit 3cc52a7b authored 2 years ago by BOULLE Olivier
--- a/binary_dna_conversion.py
+++ b/binary_dna_conversion.py
@@ -265,22 +265,6 @@ def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
    return sequence_wo_bans


-def apply_binary_filter(binary_string: str) -> str:
-    """
-    apply a filter to the binary string
-    use the length of the string as a default hash key, so 2 files of different size have a different hash key
-    this is because some zip results starts with the same octets
-    we just need to use the same key for the un-hashing
-    """
-    filter = hashing.hash_string_to_formated_base2(str(len(binary_string)), len(binary_string))
-    filtered_binary_string = ""
-    for i in range(len(binary_string)):
-        filter_bit = int(filter[i]) # get filter bit of index i
-        filtered_binary_string += str((int(binary_string[i])+filter_bit) % 2) # get reverse binary string of index i and XOR it with the filter bit
-         
-    return filtered_binary_string
-
-
 # =================== main ======================= #
 if __name__ == '__main__':
    #doc_path = sys.argv[1]

--- a/file_to_dna.py
+++ b/file_to_dna.py
@@ -49,6 +49,22 @@ def compute_check_sum(binary_string: str) -> str:
        
    return bin_sum.zfill(CHECK_SUM_SIZE) # fill the beginning with 0 to get the correct size

+
+def apply_binary_filter(binary_string: str) -> str:
+    """
+    apply a filter to the binary string
+    use the length of the string as a default hash key, so 2 files of different size have a different hash key
+    this is because some zip results starts with the same octets
+    we just need to use the same key for the un-hashing
+    """
+    filter = hashing.hash_string_to_formated_base2(str(len(binary_string)), len(binary_string))
+    filtered_binary_string = ""
+    for i in range(len(binary_string)):
+        filter_bit = int(filter[i]) # get filter bit of index i
+        filtered_binary_string += str((int(binary_string[i])+filter_bit) % 2) # get reverse binary string of index i and XOR it with the filter bit
+         
+    return filtered_binary_string
+
  
 def convert_file_to_bits(input_path: str) -> str:
    """
@@ -192,7 +208,7 @@ def encode_file(input_path: str, output_path: str) -> None:
    
    # apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions 
    binary_string = binary_string[::-1] # reverse the binary string, because 2 files can have the same start with ziping methods
-    filtered_binary_string = bdc.apply_binary_filter(binary_string)
+    filtered_binary_string = apply_binary_filter(binary_string)
    
    
    # calculate and add the binary check_sum at the end
@@ -274,7 +290,7 @@ def decode_file(input_path: str, output_path: str) -> None:
        exit(1)
        
    # apply the same filter used in the encoding to the binary string to remove it  
-    binary_string = bdc.apply_binary_filter(binary_string)
+    binary_string = apply_binary_filter(binary_string)
    binary_string = binary_string[::-1] # reverse the binary string to get the original

    # case binaries length is not multiple of 8 -> remove the excess bits at the beginning that have been added in the encoding to get a round number of blocks