Commit 051721e5 authored by VIGNET Pierre's avatar VIGNET Pierre
Browse files

Ability to map identifiers with cadbiom names

parent 1986884b
......@@ -147,6 +147,20 @@ def solutions_2_occcurrences_matrix(args):
transposed=params['transpose_csv']
)
def identifiers_mapping(args):
"""Mapping of identifiers from external databases.
This function exports a CSV formated file presenting the list of known
Cadbiom identifiers for each given external identifier.
"""
# Module import
import solution_repr
params = args_to_param(args)
solution_repr.identifiers_mapping(**params)
def model_comparison(args):
"""Isomorphism test.
......@@ -437,6 +451,25 @@ def main():
## Model-related commands ##################################################
# subparser: Mapping of identifiers
# output: CSV file
parser_identifiers_mapping = subparsers.add_parser(
'identifiers_mapping',
help=identifiers_mapping.__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser_identifiers_mapping.add_argument('model_file',
help="bcx model file.")
group = parser_identifiers_mapping.add_mutually_exclusive_group(required=True)
group.add_argument('--external_file',
help="File with 1 external identifiers to be mapped per line."
)
group.add_argument('--external_identifiers', nargs='+',
help="Multiple external identifiers to be mapped."
)
parser_identifiers_mapping.set_defaults(func=identifiers_mapping)
# subparser: Model comparison
# 2 models
parser_model_comparison = subparsers.add_parser(
......
......@@ -54,7 +54,8 @@ from tools.solutions import load_solutions, convert_solutions_to_json
from tools.models import Reporter
from tools.models import get_transitions, \
get_transitions_from_model_file, \
get_frontier_places
get_frontier_places, \
get_identifiers_mapping
from tools.models import get_places_data
from tools.graphs import build_graph, get_json_graph, export_graph
......@@ -553,6 +554,34 @@ def low_model_info(model_file,
return info
def identifiers_mapping(model_file, *args, **kwargs):
"""Entry point for the mapping of identifiers from external databases
:param model_file: File for the model.
:key external_file: File with 1 external identifier per line.
:key external_identifiers: List of external identifiers to be mapped.
:type model_file: <str>
:type external_file: <str>
:type external_identifiers: <list>
"""
if kwargs['external_csv_file']:
with open(kwargs['external_file'], 'r') as f_d:
external_identifiers = set(line.strip('\n') for line in f_d)
else:
external_identifiers = set(kwargs['external_identifiers'])
mapping = get_identifiers_mapping(model_file, external_identifiers)
# Make CSV file
with open("mapping.csv", 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=str(';'))
# Join multiple Cadbiom names with a |
g = ((external_id, "|".join(cadbiom_names))
for external_id, cadbiom_names in mapping.iteritems())
writer.writerows(g)
def model_graph(model_file, output_dir='./graphs/',
centralities=False,
**kwargs):
......
......@@ -547,3 +547,45 @@ def get_places_data(places, model):
# v1 model: return only the name of the place
return [{'cadbiomName': place_name} for place_name in places]
def get_identifiers_mapping(model_file, external_identifiers):
"""Get Cadbiom names corresponding to the given external identifiers (xrefs)
.. note:: This function works only on v2 formated models with JSON additional data
:param model_file: Model file.
:param external_identifiers: Set of external identifiers to be mapped.
:type model_file: <str>
:type external_identifiers: <set>
:return: Mapping dictionary with external identifiers as keys
and cadbiom names as values.
:rtype: <dict <str>:<list>>
"""
# Get the model
parser = MakeModelFromXmlFile(model_file)
model = parser.handler.model
assert model.xml_namespace == 'http://cadbiom.genouest.org/v2/', \
"Operation not supported: Only v2 models are supported."
# Get all nodes
places_data = get_places_data(parser.handler.node_dict.iterkeys(), model)
# {'xrefs': {'bdd': [values],}, 'cadbiomName': '',}
g = {place['cadbiomName']: frozenset(it.chain(*place['xrefs'].itervalues()))
for place in places_data}
# Mapping: external_identifiers as keys and Cadbiom names as values
mapping = defaultdict(set)
for place, identifiers in g.iteritems():
common_identifiers = identifiers & external_identifiers
if common_identifiers:
[mapping[common_id].add(place) for common_id in common_identifiers]
not_found_identifiers = external_identifiers - set(mapping.keys())
if not_found_identifiers:
LOGGER.info("Some identifiers were not found: %s", not_found_identifiers)
return mapping
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment