Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Source_Encoding
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
GitLab upgrade completed. Current version is 17.11.3.
Show more breadcrumbs
dnarXiv
Source_Encoding
Commits
3cc52a7b
Commit
3cc52a7b
authored
2 years ago
by
BOULLE Olivier
Browse files
Options
Downloads
Patches
Plain Diff
moved filtering
parent
73544462
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
binary_dna_conversion.py
+0
-16
0 additions, 16 deletions
binary_dna_conversion.py
file_to_dna.py
+18
-2
18 additions, 2 deletions
file_to_dna.py
with
18 additions
and
18 deletions
binary_dna_conversion.py
+
0
−
16
View file @
3cc52a7b
...
...
@@ -265,22 +265,6 @@ def remove_ban_words_z_encoding(sequence: str, z_method_offset=0) -> str:
return
sequence_wo_bans
def
apply_binary_filter
(
binary_string
:
str
)
->
str
:
"""
apply a filter to the binary string
use the length of the string as a default hash key, so 2 files of different size have a different hash key
this is because some zip results starts with the same octets
we just need to use the same key for the un-hashing
"""
filter
=
hashing
.
hash_string_to_formated_base2
(
str
(
len
(
binary_string
)),
len
(
binary_string
))
filtered_binary_string
=
""
for
i
in
range
(
len
(
binary_string
)):
filter_bit
=
int
(
filter
[
i
])
# get filter bit of index i
filtered_binary_string
+=
str
((
int
(
binary_string
[
i
])
+
filter_bit
)
%
2
)
# get reverse binary string of index i and XOR it with the filter bit
return
filtered_binary_string
# =================== main ======================= #
if
__name__
==
'
__main__
'
:
#doc_path = sys.argv[1]
...
...
This diff is collapsed.
Click to expand it.
file_to_dna.py
+
18
−
2
View file @
3cc52a7b
...
...
@@ -49,6 +49,22 @@ def compute_check_sum(binary_string: str) -> str:
return
bin_sum
.
zfill
(
CHECK_SUM_SIZE
)
# fill the beginning with 0 to get the correct size
def
apply_binary_filter
(
binary_string
:
str
)
->
str
:
"""
apply a filter to the binary string
use the length of the string as a default hash key, so 2 files of different size have a different hash key
this is because some zip results starts with the same octets
we just need to use the same key for the un-hashing
"""
filter
=
hashing
.
hash_string_to_formated_base2
(
str
(
len
(
binary_string
)),
len
(
binary_string
))
filtered_binary_string
=
""
for
i
in
range
(
len
(
binary_string
)):
filter_bit
=
int
(
filter
[
i
])
# get filter bit of index i
filtered_binary_string
+=
str
((
int
(
binary_string
[
i
])
+
filter_bit
)
%
2
)
# get reverse binary string of index i and XOR it with the filter bit
return
filtered_binary_string
def
convert_file_to_bits
(
input_path
:
str
)
->
str
:
"""
...
...
@@ -192,7 +208,7 @@ def encode_file(input_path: str, output_path: str) -> None:
# apply a filter to the binary string -> shuffle the data to avoid long rows of 0 or 1, and avoid rows repetitions
binary_string
=
binary_string
[::
-
1
]
# reverse the binary string, because 2 files can have the same start with ziping methods
filtered_binary_string
=
bdc
.
apply_binary_filter
(
binary_string
)
filtered_binary_string
=
apply_binary_filter
(
binary_string
)
# calculate and add the binary check_sum at the end
...
...
@@ -274,7 +290,7 @@ def decode_file(input_path: str, output_path: str) -> None:
exit
(
1
)
# apply the same filter used in the encoding to the binary string to remove it
binary_string
=
bdc
.
apply_binary_filter
(
binary_string
)
binary_string
=
apply_binary_filter
(
binary_string
)
binary_string
=
binary_string
[::
-
1
]
# reverse the binary string to get the original
# case binaries length is not multiple of 8 -> remove the excess bits at the beginning that have been added in the encoding to get a round number of blocks
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment