Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 084deaa3 authored by BOULLE Olivier's avatar BOULLE Olivier
Browse files

hashing to base 2 for binary filter

parent a3583fdf
No related branches found
No related tags found
No related merge requests found
......@@ -17,13 +17,13 @@ def base10_to_lower_base_convert(i: int, base: int) -> str:
return "".join(result)
def base16_to_base4(base16_hash: str) -> str:
def base16_to_baseX(base16_hash: str, baseX: int) -> str:
"""
convert a string representing a number in base 16 to a string representing a number in base 4
"""
base10_hash = int(base16_hash, base=16)
base4_hash = base10_to_lower_base_convert(base10_hash, 4)
return base4_hash
baseX_hash = base10_to_lower_base_convert(base10_hash, baseX)
return baseX_hash
def hash_string_to_formated_base4(input_string: str, hash_size: int) -> str:
......@@ -44,7 +44,30 @@ def hash_string_to_formated_base4(input_string: str, hash_size: int) -> str:
# even if the total hashing cannot be the same, we try to avoid having some k_hash parts (of size 128) that are exactly the same between two different total hashes
k_hash = sha256_hash(input_string + "A" + str(k))
total_hash += k_hash
base4_total_hash = base16_to_base4(total_hash)
base4_total_hash = base16_to_baseX(total_hash, 4)
return base4_total_hash[:hash_size] # remove excess size
def hash_string_to_formated_base2(input_string: str, hash_size: int) -> str:
"""
apply SHA256 hashing to a string to get a hash in base2 and of a specified size
a basic hash is 64 long (base 16); so 256 long when reduced to base 2
we count the needed number of basic hash to get to the wanted hash size / 2, because the size of the hash will be doubled in base 4
then the input string is hashed with a "A" +k added at the end ( k is an incremented hash_number)
the multiples hash are then concatenated to have a total hash of the wanted size /2 (rounded up to 64)
the total hash is then reduced in base 4, and brought to the exact wanted size
"""
hash_number = (hash_size/4) // 64 +1
total_hash = ""
for k in range(int(hash_number)):
# hash the string with a "A" and a number added, the A is to avoid getting the same hash for different strings
# ex : hashing input = "a" with k = 10 equals hashing "a10"; but hashing input = "a1" with k = 0 is also equals hashing "a10"
# here we will get the hashing of "aA10" and "a1A0", so completely different results
# even if the total hashing cannot be the same, we try to avoid having some k_hash parts (of size 128) that are exactly the same between two different total hashes
k_hash = sha256_hash(input_string + "A" + str(k))
total_hash += k_hash
base4_total_hash = base16_to_baseX(total_hash, 2)
return base4_total_hash[:hash_size] # remove excess size
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment