Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
vidjil
vidjil
Commits
8424cc73
Commit
8424cc73
authored
Nov 30, 2017
by
Mathieu Giraud
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
get-CD.py: refactor
Prepares something else.
parent
0d486d1c
Pipeline
#21514
failed with stages
in 49 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
12 deletions
+18
-12
germline/get-CD.py
germline/get-CD.py
+18
-12
No files found.
germline/get-CD.py
View file @
8424cc73
...
...
@@ -9,9 +9,10 @@ HUGO_REQUEST = 'http://www.genenames.org/cgi-bin/download?'
HUGO_COLS
=
'&col=gd_hgnc_id&col=md_refseq_id&col=gd_other_ids_list&col=gd_app_sym&col=gd_app_name&col=gd_status&col=gd_prev_sym&col=gd_aliases&col=gd_pub_chrom_map&col=gd_pub_acc_ids&col=gd_pub_refseq_ids'
# HUGO query on 'hcdm.org' entries
HUGO_QUERY
=
'&status=Approved&status=Entry+Withdrawn&status_opt=2&where=gd_other_ids+LIKE+%27%25hcdm.org%25%27&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit'
HUGO_QUERY_HCDM
=
'&status=Approved&status=Entry+Withdrawn&status_opt=2&where=gd_other_ids+LIKE+%27%25hcdm.org%25%27&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit'
HUGO_URL_HCDM
=
HUGO_REQUEST
+
HUGO_COLS
+
HUGO_QUERY_HCDM
HUGO_URL
=
HUGO_REQUEST
+
HUGO_COLS
+
HUGO_QUERY
NCBI_API
=
'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&rettype=fasta&retmode=text'
+
'&id=%s'
...
...
@@ -28,7 +29,19 @@ out = open(OUT, 'w')
print
"==>"
,
SORTING_OUT
sorting_out
=
open
(
SORTING_OUT
,
'w'
)
for
l
in
urllib
.
urlopen
(
HUGO_URL
).
readlines
():
def
ncbi_and_write
(
ncbi
,
hugo
,
cd_id
,
outs
):
print
cd_id
,
hugo
,
ncbi
fasta
=
urllib
.
urlopen
(
NCBI_API
%
ncbi
).
read
()
fasta_with_id
=
fasta
.
replace
(
'>'
,
'>%s|%s|'
%
(
hugo
,
cd_id
))
for
out
in
outs
:
out
.
write
(
fasta_with_id
)
for
l
in
urllib
.
urlopen
(
HUGO_URL_HCDM
).
readlines
():
ll
=
l
.
split
(
'
\t
'
)
try
:
...
...
@@ -37,15 +50,8 @@ for l in urllib.urlopen(HUGO_URL).readlines():
except
:
print
"!"
,
l
continue
print
cd_id
,
hugo
,
ncbi
fasta
=
urllib
.
urlopen
(
NCBI_API
%
ncbi
).
read
()
fasta_with_id
=
fasta
.
replace
(
'>'
,
'>%s|%s|'
%
(
hugo
,
cd_id
))
out
.
write
(
fasta_with_id
)
ncbi_and_write
(
ncbi
,
hugo
,
cd_id
,
[
out
]
+
([
sorting_out
]
if
cd_id
in
SORTING_CD
else
[]))
if
cd_id
in
SORTING_CD
:
sorting_out
.
write
(
fasta_with_id
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment