vidjilparser.py 5.36 KB
Newer Older
1 2 3 4 5 6 7 8
#!/usr/bin/python
import ijson

class VidjilWriter(object):

    def __init__(self, filepath, pretty=False):
        self._filepath = filepath
        self.pretty = pretty
9 10 11
        self.buffer = ""
        self.buffering = False
        self.conserveBuffer = False
12 13 14 15 16 17 18 19 20

    def __enter__(self):
        self.file = open(self._filepath, 'w')
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.file.close()

    def write(self, prefix, event, value, previous):
21 22 23 24 25 26 27 28 29
        res = self._write(prefix, event, value, previous)
        if self.buffering:
            self.buffer += res
            return ""
        else:
            self.file.write(res)
            return res

    def _write(self, prefix, event, value, previous):
30 31 32 33 34 35 36
        if self.pretty:
            end = '\n'
        else:
            end = ''
        if event == 'start_map':
            mstr = '{{'
        elif event == 'end_map':
37
            mstr = '}}'
38 39 40
        elif event == 'start_array':
            mstr = '['
        elif event == 'end_array':
41
            mstr = ']'
42
        elif event == 'map_key':
43
            mstr = '\"{}\":'
44 45
            end = ''
        elif event == 'string':
46
            mstr = '\"{}\"'
47
        else:
48 49 50
            if event == 'boolean':
                value = str(value).lower()
            mstr = '{}'
51
        padding = ''
52 53 54 55 56
        if type(value) in [str, unicode] :
            value = value.replace("\n", "\\n")
            value = value.replace("\r", "\\r")
        if previous not in ['', 'map_key', 'start_map', 'start_array'] and event not in ['end_map', 'end_array']:
            mstr = "," + mstr
57
        if self.pretty and previous != 'map_key':
58 59 60
            if len(prefix) > 0:
                padding = ''.join(['\t' for i in range(len(prefix.split('.')))])
        mstr = '{}' + mstr + end
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
        return mstr.format(padding, value)

    def purgeBuffer(self):
        self.buffer = ""
        self.conserveBuffer = False

    def writeBuffer(self):
        try:
            self.file.write(self.buffer)
            return self.buffer
        finally:
            self.purgeBuffer()

    def startBuffering(self):
        self.buffering = True

    def endBuffering(self):
        self.buffering = False
        if self.conserveBuffer:
            self.conserveBuffer = False
            return self.writeBuffer()
        else :
            self.purgeBuffer()
        return ""

86 87 88 89 90 91 92 93 94 95 96 97 98 99
class Predicate(object):

    def __init__(self, field, comparator, value):
        self.comp = comparator
        self.field = field
        self.value = value

    def compare(self, field, other):
        if self.comp is None:
            return True
        try:
            return field == self. field and self.comp(other, self.value)
        except:
            return False
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131


class VidjilParser(object):

    def __init__(self, writer=None):
        if writer is not None:
            self._writer = writer
        else:
            self._writer = VidjilWriter('out.json')
        self._model_prefixes = []
        self.prefixes = ['clones.item.seg.affectSigns']

    def initModel(self, model_path):
        with open(model_path, 'r') as model:
            parser = ijson.parse(model)
            for prefix, event, value in parser:
                if (prefix, event) not in self._model_prefixes:
                    self._model_prefixes.append((prefix, event))

    def validate(self, filepath):
        with open(filepath, 'r') as vfile:
            parser = ijson.parse(vfile)
            model = list(self._model_prefixes)
            for prefix, event, value in parser:
                pair = (prefix, event)
                if pair in model:
                    model.remove(pair)
            return len(model) == 0

    def writer(self):
        return self._writer

132 133
    def addPrefix(self, prefix, conditional = None, comp = None, value = None):
        self.prefixes.append((prefix, Predicate(conditional, comp, value)))
134

135 136 137 138
    def reset(self):
        self.prefixes = []
        self._writer.purgeBuffer()

139 140 141 142
    def extract(self, filepath):
        vidjilfile = open(filepath, 'r')
        parser = ijson.parse(vidjilfile)
        with self.writer() as writer:
143
            return self._extract(parser, writer)
144 145 146

    def _extract(self, parser, writer):
        previous = ''
147
        res = ""
148 149
        for prefix, event, value in parser:
            #There must be a better way !!!
150 151
            cond = any(prefix.startswith(item[0]) for item in self.prefixes) \
                    or (any(item[0].startswith(prefix) for item in self.prefixes) \
152
                        and (value is None \
153 154
                            or any(item[0].startswith(prefix + '.' + str(value)) for item in self.prefixes) \
                                or any(item[0].startswith(str(value)) for item in self.prefixes)))
155
            if cond:
156 157 158 159 160 161 162 163
                if not self._writer.conserveBuffer \
                        and any((item[1].compare(prefix, value)) for item in self.prefixes):
                    self._writer.conserveBuffer = True

                bufferOn = any(prefix == item[0] or prefix == item[0]+'.item' for item in self.prefixes)
                if bufferOn and event == "start_map":
                    self._writer.startBuffering()

164
                res += writer.write(prefix, event, value, previous)
165 166 167 168

                if bufferOn and event == "end_map":
                    res += self._writer.endBuffering()
                previous = event
169
        return res