Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Sequencing_Modules
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dnarXiv
Sequencing_Modules
Commits
91431ae1
Commit
91431ae1
authored
4 years ago
by
BOULLE Olivier
Browse files
Options
Downloads
Patches
Plain Diff
commentaires, comptage des sequences non associées
parent
03d53cb2
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
demultiplexing/demultiplexing.py
+18
-10
18 additions, 10 deletions
demultiplexing/demultiplexing.py
with
18 additions
and
10 deletions
demultiplexing/demultiplexing.py
+
18
−
10
View file @
91431ae1
...
...
@@ -24,7 +24,7 @@ class Fastq_seq:
self
.
value
=
list
[
1
]
self
.
description
=
list
[
2
]
self
.
score
=
list
[
3
]
self
.
primers_points
=
{}
self
.
primers_points
=
{}
# 1 point is added for a primer for each kmer the sequence contains
def
add_point_primer
(
self
,
primer_name
):
if
primer_name
in
self
.
primers_points
:
...
...
@@ -46,33 +46,38 @@ for file in os.listdir(primers_dir_path):
primers_dict
.
update
(
dfr
.
read_fasta
(
primers_dir_path
+
"
/
"
+
file
))
sorted_seq_by_primers
=
{}
# dictionary containing the fastq_sequences sorted by the corresponding primers
fastq_seq_list
=
[]
kmer_size
=
5
#
size of the sub_sequences of the primers to search in the fastq_sequences
point_threshold
=
5
# threshold of the minimum number of kmer found in the fastq_sequences to link it to a primer
kmer_size
=
10
# size of the sub_sequences of the primers to search in the fastq_sequences
point_threshold
=
10
#
threshold of the minimum number of kmer found in the fastq_sequences to link it to a primer
unlinked_sequences_counter
=
0
for
fq
in
dfr
.
read_fastq
(
input_path
):
fastq_seq
=
Fastq_seq
(
fq
)
# create the fastq object
fastq_seq_list
.
append
(
fastq_seq
)
for
primer_name
,
primer_value
in
primers_dict
.
items
():
primer_size
=
len
(
primer_value
)
if
"
left
"
in
primer_name
:
# look at the beginning of the fastq_sequence
# look at the beginning of the fastq_sequence
for left_part primers
fastq_part
=
fastq_seq
.
value
[:
primer_size
+
5
]
primer_name
=
primer_name
.
replace
(
"
left_
"
,
""
)
else
:
# look at the end of the fastq_sequence
# look at the end of the fastq_sequence
for right_part primers
fastq_part
=
fastq_seq
.
value
[
-
primer_size
-
5
:]
primer_name
=
primer_name
.
replace
(
"
right_
"
,
""
)
kmer_list
=
[]
# kmers contained in this primer
# search if the subsequences of the primer are in the sequence
for
k
in
range
(
primer_size
-
kmer_size
+
1
):
kmer
=
primer_value
[
k
:
k
+
kmer_size
]
if
kmer
in
fastq_part
:
fastq_seq
.
add_point_primer
(
primer_name
)
# if no kmers has been found, the sequence cannot be linked to a primer
if
fastq_seq
.
primers_points
==
{}:
unlinked_sequences_counter
+=
1
continue
# primer with the highest number of kmers found in the sequence
primer_with_most_points
=
max
(
fastq_seq
.
primers_points
,
key
=
fastq_seq
.
primers_points
.
get
)
if
fastq_seq
.
primers_points
[
primer_with_most_points
]
>
point_threshold
:
if
fastq_seq
.
primers_points
[
primer_with_most_points
]
<
point_threshold
:
unlinked_sequences_counter
+=
1
else
:
if
primer_with_most_points
in
sorted_seq_by_primers
:
sorted_seq_by_primers
[
primer_with_most_points
].
append
(
fastq_seq
)
else
:
...
...
@@ -84,8 +89,10 @@ except OSError:
shutil
.
rmtree
(
output_dir_path
)
os
.
mkdir
(
output_dir_path
)
# write the fastq files for the linked sequences
for
primer_name
,
fastq_sequences_list
in
sorted_seq_by_primers
.
items
():
sequences_output
=
open
(
output_dir_path
+
"
/
"
+
primer_name
+
"
.fastq
"
,
"
w+
"
)
print
(
primer_name
,
"
:
"
,
len
(
fastq_sequences_list
),
"
lectures associées
"
)
for
fastq_sequence
in
fastq_sequences_list
:
sequences_output
.
write
(
fastq_sequence
.
name
+
"
\n
"
)
sequences_output
.
write
(
fastq_sequence
.
value
+
"
\n
"
)
...
...
@@ -93,4 +100,5 @@ for primer_name, fastq_sequences_list in sorted_seq_by_primers.items():
sequences_output
.
write
(
fastq_sequence
.
score
+
"
\n
"
)
sequences_output
.
close
()
print
(
unlinked_sequences_counter
,
"
lectures non associées
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment