Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Source_Encoding
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
GitLab upgrade completed. Current version is 17.11.3.
Show more breadcrumbs
dnarXiv
Source_Encoding
Commits
be62c47f
Commit
be62c47f
authored
1 year ago
by
BOULLE Olivier
Browse files
Options
Downloads
Patches
Plain Diff
function to add,remove the compression extension
parent
eb178ae1
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
post_processing.py
+9
-3
9 additions, 3 deletions
post_processing.py
pre_processing.py
+14
-7
14 additions, 7 deletions
pre_processing.py
with
23 additions
and
10 deletions
post_processing.py
+
9
−
3
View file @
be62c47f
...
@@ -12,6 +12,12 @@ do the opposite of pre_processing.py
...
@@ -12,6 +12,12 @@ do the opposite of pre_processing.py
take dna sequences and convert them into files
take dna sequences and convert them into files
"""
"""
def
compressed_name
(
file_path
):
return
file_path
+
"
.gz
"
def
uncompressed_name
(
file_path
):
return
file_path
.
replace
(
"
.gz
"
,
""
)
def
convert_to_binary
(
input_dir_path
,
compressed_dir_path
):
def
convert_to_binary
(
input_dir_path
,
compressed_dir_path
):
"""
"""
...
@@ -20,7 +26,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path):
...
@@ -20,7 +26,7 @@ def convert_to_binary(input_dir_path, compressed_dir_path):
for
filename
in
os
.
listdir
(
input_dir_path
):
for
filename
in
os
.
listdir
(
input_dir_path
):
file_path
=
os
.
path
.
join
(
input_dir_path
,
filename
)
file_path
=
os
.
path
.
join
(
input_dir_path
,
filename
)
result_file_path
=
os
.
path
.
join
(
compressed_dir_path
,
filename
)
+
"
.gz
"
result_file_path
=
os
.
path
.
join
(
compressed_dir_path
,
compressed_name
(
filename
)
)
# checking if it is a file
# checking if it is a file
if
os
.
path
.
isfile
(
file_path
):
if
os
.
path
.
isfile
(
file_path
):
...
@@ -38,14 +44,14 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path):
...
@@ -38,14 +44,14 @@ def uncompress_files(compressed_dir_path, uncompressed_dir_path):
"""
"""
for
filename
in
os
.
listdir
(
compressed_dir_path
):
for
filename
in
os
.
listdir
(
compressed_dir_path
):
file_path
=
os
.
path
.
join
(
compressed_dir_path
,
filename
)
file_path
=
os
.
path
.
join
(
compressed_dir_path
,
filename
)
result_file_path
=
os
.
path
.
join
(
uncompressed_dir_path
,
filename
.
replace
(
"
.gz
"
,
""
))
result_file_path
=
os
.
path
.
join
(
uncompressed_dir_path
,
uncompressed_name
(
filename
))
# checking if it is a file
# checking if it is a file
if
os
.
path
.
isfile
(
file_path
):
if
os
.
path
.
isfile
(
file_path
):
pre_processing
.
unzip_file
(
file_path
)
pre_processing
.
unzip_file
(
file_path
)
# move the unzipped file
# move the unzipped file
os
.
replace
(
file_path
.
replace
(
"
.gz
"
,
""
),
result_file_path
)
os
.
replace
(
uncompressed_name
(
file_path
),
result_file_path
)
elif
os
.
path
.
isdir
(
file_path
):
elif
os
.
path
.
isdir
(
file_path
):
print
(
"
error post processing (uncompress_files) : directory found in compressed_dir_path
"
,
filename
)
print
(
"
error post processing (uncompress_files) : directory found in compressed_dir_path
"
,
filename
)
...
...
This diff is collapsed.
Click to expand it.
pre_processing.py
+
14
−
7
View file @
be62c47f
...
@@ -33,6 +33,13 @@ def unzip_file(file_path):
...
@@ -33,6 +33,13 @@ def unzip_file(file_path):
subprocess
.
run
(
'
/bin/bash -c
"
$COMMAND
"'
,
shell
=
True
,
env
=
{
'
COMMAND
'
:
decompression_command
})
subprocess
.
run
(
'
/bin/bash -c
"
$COMMAND
"'
,
shell
=
True
,
env
=
{
'
COMMAND
'
:
decompression_command
})
def
compressed_name
(
file_path
):
return
file_path
+
"
.gz
"
def
uncompressed_name
(
file_path
):
return
file_path
.
replace
(
"
.gz
"
,
""
)
def
insert_path_in_files
(
input_dir_path
:
str
,
rearanged_files_dir_path
:
str
)
->
None
:
def
insert_path_in_files
(
input_dir_path
:
str
,
rearanged_files_dir_path
:
str
)
->
None
:
"""
"""
copy all files from a directory and paste them at the same level in output directory, with their relative path written before first line
copy all files from a directory and paste them at the same level in output directory, with their relative path written before first line
...
@@ -90,7 +97,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
...
@@ -90,7 +97,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
exit
(
0
)
exit
(
0
)
# get binary size of the compressed file
# get binary size of the compressed file
binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_file_path
+
"
.gz
"
))
binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_
name
(
compressed_
file_path
)
))
if
binary_len
<=
max_binary_length
:
# if acceptable length, it's perfect
if
binary_len
<=
max_binary_length
:
# if acceptable length, it's perfect
files_compressed_size
[
filename
]
=
binary_len
# save the compressed size for this file
files_compressed_size
[
filename
]
=
binary_len
# save the compressed size for this file
...
@@ -98,7 +105,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
...
@@ -98,7 +105,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
else
:
else
:
# file too large, nedd to split it
# file too large, nedd to split it
os
.
remove
(
compressed_file_path
+
"
.gz
"
)
# delete the compressed file
os
.
remove
(
compressed_
name
(
compressed_
file_path
)
)
# delete the compressed file
# read the original file as bytes
# read the original file as bytes
with
open
(
file_path
,
"
rb
"
)
as
input_file
:
with
open
(
file_path
,
"
rb
"
)
as
input_file
:
...
@@ -143,7 +150,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
...
@@ -143,7 +150,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
# compress the split_file to the compressed directory
# compress the split_file to the compressed directory
zip_file
(
split_file_path
,
compressed_file_path
+
split_file_footer
)
zip_file
(
split_file_path
,
compressed_file_path
+
split_file_footer
)
compressed_subfile_path
=
compressed_file_path
+
split_file_footer
+
"
.gz
"
compressed_subfile_path
=
compressed_name
(
compressed_file_path
+
split_file_footer
)
# check the size of the subfile
# check the size of the subfile
binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_subfile_path
))
binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_subfile_path
))
...
@@ -197,7 +204,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
...
@@ -197,7 +204,7 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
# add the merged file path to the bis dict with it's compressed size
# add the merged file path to the bis dict with it's compressed size
zip_file
(
merged_file_path
,
compressed_merged_file_path
)
zip_file
(
merged_file_path
,
compressed_merged_file_path
)
merged_binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_merged_file_path
+
"
.gz
"
))
merged_binary_len
=
len
(
file_to_dna
.
convert_file_to_bits
(
compressed_
name
(
compressed_
merged_file_path
)
))
if
merged_binary_len
>=
max_binary_length
:
if
merged_binary_len
>=
max_binary_length
:
print
(
"
error merging result too large
"
,
compressed_merged_file_path
)
print
(
"
error merging result too large
"
,
compressed_merged_file_path
)
...
@@ -207,8 +214,8 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
...
@@ -207,8 +214,8 @@ def compress_and_split(rearanged_files_dir_path: str, compressed_dir_path: str)
files_compressed_size
[
merged_file_name
]
=
merged_binary_len
files_compressed_size
[
merged_file_name
]
=
merged_binary_len
# remove the 2 compressed files of the 2 files
# remove the 2 compressed files of the 2 files
os
.
remove
(
os
.
path
.
join
(
compressed_dir_path
,
filename
)
+
"
.gz
"
)
os
.
remove
(
os
.
path
.
join
(
compressed_dir_path
,
compressed_name
(
filename
)
)
)
os
.
remove
(
os
.
path
.
join
(
compressed_dir_path
,
filename_2
)
+
"
.gz
"
)
os
.
remove
(
os
.
path
.
join
(
compressed_dir_path
,
compressed_name
(
filename_2
)
)
)
# set the compressed size of the 2 files to a too high number to avoid them to be reused for merging
# set the compressed size of the 2 files to a too high number to avoid them to be reused for merging
files_compressed_size
[
filename
]
=
2
*
max_binary_length
files_compressed_size
[
filename
]
=
2
*
max_binary_length
...
@@ -238,7 +245,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path):
...
@@ -238,7 +245,7 @@ def convert_to_sequence(compressed_dir_path, payload_fragments_dir_path):
# checking if it is a file
# checking if it is a file
if
os
.
path
.
isfile
(
file_path
):
if
os
.
path
.
isfile
(
file_path
):
output_file_path
=
os
.
path
.
join
(
payload_fragments_dir_path
,
filename
.
replace
(
"
.gz
"
,
""
))
output_file_path
=
os
.
path
.
join
(
payload_fragments_dir_path
,
uncompressed_name
(
filename
))
dna_sequence
=
file_to_dna
.
encode_file
(
file_path
,
output_file_path
)
# convert binaries into a dna sequence and save result in the output file
dna_sequence
=
file_to_dna
.
encode_file
(
file_path
,
output_file_path
)
# convert binaries into a dna sequence and save result in the output file
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment