Commit bbc6e04a authored by VIGNET Pierre's avatar VIGNET Pierre
Browse files

Add usefull functions: get_cam_lines get_all_cams

parent 39972ebd
......@@ -34,7 +34,8 @@ from __future__ import print_function
# Standard imports
import os
from collections import defaultdict
import glob
from collections import defaultdict, Counter
# Library imports
import cadbiom.commons as cm
......@@ -322,3 +323,90 @@ def get_solutions(file_descriptor):
# print(stripped_line)
# Sort in lower case, remove ' ' empty elements
yield line, stripped_line
def get_cam_lines(filepath):
"""Returns only a set of MAC LINES from A file.
This function is based on :meth:`~cadbiom_cmd.tools.solutions.get_solutions`
that returns mac lines and stripped mac lines.
.. note:: You would prefer to use :meth:`~cadbiom_cmd.tools.solutions.get_all_cams`
which:
- Can handle a directory path and return all cams in it,
- Can handle a simple file,
- Do some verifications on all parsed cams.
.. note:: We assume that at this point, all MAC lines are sorted in
alphabetical order.
.. note:: We return LINES not a set of places.
:param: Filepath to be opened and in which solutions will be returned.
:type: <str>
:return: Set of MAC/CAM from the given file.
:rtype: <set <str>>
"""
# Return cam lines
with open(filepath, 'r') as f_d:
return {stripped_line for _, stripped_line in get_solutions(f_d)}
def get_all_cams(path):
"""Return a set of all MAC LINES from a directory or from a file.
This function is based on :meth:`~cadbiom_cmd.tools.solutions.get_solutions`
that returns mac lines and stripped mac lines, and
:meth:`~cadbiom_cmd.tools.solutions.get_cam_lines` that returns only mac
lines from a file.
.. note:: Alternatively we do some verifications here:
- Detection of duplicated MACS (AssertionError raised)
- Print number of MACS per file
- Print duplicated MACS
- Print number of MACS
:param: Filepath to be opened and in which solutions will be returned.
:type: <str>
:return: Set of MAC/CAM from the given path.
:rtype: <set <str>>
"""
# Put all cams in a list, not a set
# => allow us to detect duplicated macs among all cam files
total_cams = list()
file_number = 0
if os.path.isfile(path):
# Get set of mac lines
total_cams = get_cam_lines(path)
elif os.path.isdir(path):
# Get set of macs for each file
for file_number, file in enumerate(glob.glob(path + '*cam.txt'), 1):
temp_cams = get_cam_lines(file)
total_cams += list(temp_cams)
# Print the number of cams for the given file
LOGGER.info("File %s: MACS: %s", file, len(temp_cams))
LOGGER.info("Files processed: %s", file_number)
# Check of duplicated cams (number > 1)
unique_cams = set(total_cams)
duplicated_cams = {(k, v) for k, v in Counter(total_cams).items() if v != 1}
LOGGER.info("Duplicated MACS: %s", duplicated_cams)
LOGGER.info("Number of MACS loaded:", len(total_cams))
LOGGER.info("TNumber of unique MACS returned: %s", len(unique_cams))
# assert len(total_cams) == len(unique_cams)
return unique_cams
## Handle *.json files #########################################################
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment