vidjilparser.py 4.27 KB
Newer Older
1 2 3 4 5 6 7 8
#!/usr/bin/python
import ijson

class VidjilWriter(object):

    def __init__(self, filepath, pretty=False):
        self._filepath = filepath
        self.pretty = pretty
9 10 11
        self.buffer = ""
        self.buffering = False
        self.conserveBuffer = False
12 13 14 15 16 17 18 19 20

    def __enter__(self):
        self.file = open(self._filepath, 'w')
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.file.close()

    def write(self, prefix, event, value, previous):
21 22 23 24 25 26 27 28 29
        res = self._write(prefix, event, value, previous)
        if self.buffering:
            self.buffer += res
            return ""
        else:
            self.file.write(res)
            return res

    def _write(self, prefix, event, value, previous):
30 31 32 33 34 35 36
        if self.pretty:
            end = '\n'
        else:
            end = ''
        if event == 'start_map':
            mstr = '{{'
        elif event == 'end_map':
37
            mstr = '}}'
38 39 40
        elif event == 'start_array':
            mstr = '['
        elif event == 'end_array':
41
            mstr = ']'
42
        elif event == 'map_key':
43
            mstr = '\"{}\":'
44 45
            end = ''
        elif event == 'string':
46
            mstr = '\"{}\"'
47
        else:
48 49 50
            if event == 'boolean':
                value = str(value).lower()
            mstr = '{}'
51
        padding = ''
52 53 54 55 56
        if type(value) in [str, unicode] :
            value = value.replace("\n", "\\n")
            value = value.replace("\r", "\\r")
        if previous not in ['', 'map_key', 'start_map', 'start_array'] and event not in ['end_map', 'end_array']:
            mstr = "," + mstr
57
        if self.pretty and previous != 'map_key':
58 59 60
            if len(prefix) > 0:
                padding = ''.join(['\t' for i in range(len(prefix.split('.')))])
        mstr = '{}' + mstr + end
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
        return mstr.format(padding, value)

    def purgeBuffer(self):
        self.buffer = ""
        self.conserveBuffer = False

    def writeBuffer(self):
        try:
            self.file.write(self.buffer)
            return self.buffer
        finally:
            self.purgeBuffer()

    def startBuffering(self):
        self.buffering = True

    def endBuffering(self):
        self.buffering = False
        if self.conserveBuffer:
            self.conserveBuffer = False
            return self.writeBuffer()
        else :
            self.purgeBuffer()
        return ""

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124


class VidjilParser(object):

    def __init__(self, writer=None):
        if writer is not None:
            self._writer = writer
        else:
            self._writer = VidjilWriter('out.json')
        self._model_prefixes = []
        self.prefixes = ['clones.item.seg.affectSigns']

    def initModel(self, model_path):
        with open(model_path, 'r') as model:
            parser = ijson.parse(model)
            for prefix, event, value in parser:
                if (prefix, event) not in self._model_prefixes:
                    self._model_prefixes.append((prefix, event))

    def validate(self, filepath):
        with open(filepath, 'r') as vfile:
            parser = ijson.parse(vfile)
            model = list(self._model_prefixes)
            for prefix, event, value in parser:
                pair = (prefix, event)
                if pair in model:
                    model.remove(pair)
            return len(model) == 0

    def writer(self):
        return self._writer

    def addPrefix(self, prefix):
        self.prefixes.append(prefix)

    def extract(self, filepath):
        vidjilfile = open(filepath, 'r')
        parser = ijson.parse(vidjilfile)
        with self.writer() as writer:
125
            return self._extract(parser, writer)
126 127 128

    def _extract(self, parser, writer):
        previous = ''
129
        res = ""
130 131 132 133 134 135 136 137
        for prefix, event, value in parser:
            #There must be a better way !!!
            cond = any(prefix.startswith(item) for item in self.prefixes) \
                    or (any(item.startswith(prefix) for item in self.prefixes) \
                        and (value is None \
                            or any(item.startswith(prefix + '.' + str(value)) for item in self.prefixes) \
                                or any(item.startswith(str(value)) for item in self.prefixes)))
            if cond:
138 139
                res += writer.write(prefix, event, value, previous)
        return res