Commit 63683dc4 authored by BELCOUR Arnaud's avatar BELCOUR Arnaud

Extract only genus and species in genbank organism.

Fix typo.
parent dfa98c3f
......@@ -65,6 +65,8 @@ def check_organism_kegg(genbank_pathname, df_orga):
first_seq_record = next(SeqIO.parse(gbk, "genbank"))
species_name = first_seq_record.annotations['organism']
# Extract only genus and species and not strain.
species_name = ' '.join(species_name.split(' ')[0:2])
except KeyError:
raise KeyError('No organism in the Genbank. In the SOURCE you must have: ORGANISM Species name')
......@@ -72,7 +74,7 @@ def check_organism_kegg(genbank_pathname, df_orga):
if species_name in row[2]:
orga_code = row[1]
if verbose:
print('{0} has been found in KEEG with id {1}.'.format(species_name, orga_code))
print('{0} has been found in KEGG with id {1}.'.format(species_name, orga_code))
return orga_code
return None
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment