Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 3cc52a7b authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

moved filtering

parent 73544462
No related branches found
No related tags found
No related merge requests found
......@@ -265,22 +265,6 @@ def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
return sequence_wo_bans
def apply_binary_filter(binary_string: str) -> str:
"""
apply a filter to the binary string
use the length of the string as a default hash key, so 2 files of different size have a different hash key
this is because some zip results starts with the same octets
we just need to use the same key for the un-hashing
"""
filter = hashing.hash_string_to_formated_base2(str(len(binary_string)), len(binary_string))
filtered_binary_string = ""
for i in range(len(binary_string)):
filter_bit = int(filter[i]) # get filter bit of index i
filtered_binary_string += str((int(binary_string[i])+filter_bit) % 2) # get reverse binary string of index i and XOR it with the filter bit
return filtered_binary_string
# =================== main ======================= #
if __name__ == '__main__':
#doc_path = sys.argv[1]
......
......@@ -49,6 +49,22 @@ def compute_check_sum(binary_string: str) -> str:
return bin_sum.zfill(CHECK_SUM_SIZE) # fill the beginning with 0 to get the correct size
def apply_binary_filter(binary_string: str) -> str:
"""
apply a filter to the binary string
use the length of the string as a default hash key, so 2 files of different size have a different hash key
this is because some zip results starts with the same octets
we just need to use the same key for the un-hashing
"""
filter = hashing.hash_string_to_formated_base2(str(len(binary_string)), len(binary_string))
filtered_binary_string = ""
for i in range(len(binary_string)):
filter_bit = int(filter[i]) # get filter bit of index i
filtered_binary_string += str((int(binary_string[i])+filter_bit) % 2) # get reverse binary string of index i and XOR it with the filter bit
return filtered_binary_string
def convert_file_to_bits(input_path: str) -> str:
"""
......@@ -192,7 +208,7 @@ def encode_file(input_path: str, output_path: str) -> None:
# apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions
binary_string = binary_string[::-1] # reverse the binary string, because 2 files can have the same start with ziping methods
filtered_binary_string = bdc.apply_binary_filter(binary_string)
filtered_binary_string = apply_binary_filter(binary_string)
# calculate and add the binary check_sum at the end
......@@ -274,7 +290,7 @@ def decode_file(input_path: str, output_path: str) -> None:
exit(1)
# apply the same filter used in the encoding to the binary string to remove it
binary_string = bdc.apply_binary_filter(binary_string)
binary_string = apply_binary_filter(binary_string)
binary_string = binary_string[::-1] # reverse the binary string to get the original
# case binaries length is not multiple of 8 -> remove the excess bits at the beginning that have been added in the encoding to get a round number of blocks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment