vidjil_utils.py 12.6 KB
Newer Older
1
import math
2
import re
3
import defs
4
import json
5
import datetime
6
from gluon import current
7
from datetime import date
8

9
def format_size(n, unit='B'):
10
11
    '''
    Takes an integer n, representing a filesize and returns a string
12
13
    where the size is formatted with the correct SI prefix and
    with a constant number of significant digits.
14
15

    Example:
16
17
18
19
20
21
    >>> format_size(42)
    '42 B'
    >>> format_size(123456)
    '123 kB'
    >>> format_size(1000*1000)
    '1.00 MB'
22
    >>> format_size(1024*1024*1024)
23
24
25
    '1.07 GB'
    >>> format_size(42*(2**40))
    '46.2 TB'
26
    '''
27
28
29
30

    if n == 0:
        return '0'

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
    size = float(n)
    PREFIXES = ['', 'k', 'M', 'G', 'T', 'P']

    for prefix in PREFIXES:
        if size < 1000:
            break
        size /= 1000


    if size > 100 or not prefix:
        fmt = '%.0f'
    elif size > 10:
        fmt = '%.1f'
    else:
        fmt = '%.2f'

    return fmt % size + ' ' + prefix + unit

49

50
51
52
53


def age_years_months(birth, months_below_year=4):
    '''Get the age in years, and possibly months.'''
54
55
56
57
    
    if not isinstance(birth, datetime.date) :
        return '-/-'
    
58
59
60
61
62
63
64
65
66
67
68
69
70
71
    today = date.today()
    years = today.year - birth.year - ((today.month, today.day) < (birth.month, birth.day))
    age = '%dy' % years

    if years >= months_below_year:
        return age

    months = today.month - birth.month - (today.day < birth.day)
    if months < 0:
        months += 12

    age += ' %dm' % months
    return age

72
73
74
75
76
77
def anon_birth(patient_id, user_id):
    '''Anonymize birth date. Only the 'anon' access see the full birth date.'''
    db = current.db
    auth=current.auth

    birth = db.patient[patient_id].birth
78
79
80
81

    if birth is None:
        return ""

82
83
    age = age_years_months(birth)

84
    if auth.get_permission("anon", "patient", patient_id, user_id):
85
86
87
88
        return "%s (%s)" % (birth, age)
    else:
        return age

89
def anon_ids(patient_id):
90
    '''Anonymize patient name. Only the 'anon' access see the full patient name.'''
91
92
93
94
95
    db = current.db
    auth=current.auth
    
    last_name = db.patient[patient_id].last_name
    first_name = db.patient[patient_id].first_name
96
97
98
99
100
101
102
103
104
105
106
107

    return anon_names(patient_id, first_name, last_name)

def anon_names(patient_id, first_name, last_name, can_view=None):
    '''
    Anonymize the given names of the patient whose ID is patient_id.
    This function performs at most one db call (to know if we can see
    the patient's personal informations). None is performed if can_view
    is provided (to tell if one can view the patient's personal informations)
    '''
    auth=current.auth

Mikaël Salson's avatar
Mikaël Salson committed
108
109
    ln = safe_encoding(last_name)
    fn = safe_encoding(first_name)
110
    if can_view or (can_view == None and auth.can_view_patient_info(patient_id)):
Mikaël Salson's avatar
Mikaël Salson committed
111
        name = ln + " " + fn
112
    else:
113
        ln = safe_encoding(last_name)
114
115
        name = ln[:3]

116
    # Admins also see the patient id
117
    if auth.is_admin():
118
119
        name += ' (%s)' % patient_id

120
    return name
121

122
123
124
125
126
127
def safe_encoding(string):
    '''
    Try to encode the string in UTF-8 but if it fails just
    returns the string.
    '''
    try:
128
        return unicode(string).encode('utf-8')
129
130
131
    except UnicodeDecodeError:
        return string

Mikaël Salson's avatar
Mikaël Salson committed
132
133
134
135
136
137
138
139
140
141
142
143
144
def prevent_none(value, replacement_value):
    '''
    Return value if it is not None otherwise
    replacement_value

    >>> prevent_none(None, 2)
    2
    >>> prevent_none('toto', 2)
    'toto'
    '''
    if value is not None:
        return value
    return replacement_value
145

146
# take a list of strings to check and a filter_str (list of word to find (or not)) 
147
# return true if the string respect the filter list 
148
def advanced_filter(list_searched, filter_str):
149
    filter_list = filter_str.split(" ")
150
    list_searched = map(lambda s: s.lower(), list_searched)
151

152
153
    for f in filter_list :
        if len(f) > 0 and f[0] == "-" :
154
155
156
            pattern = f[1:]
        else:
            pattern = f
157
        result = filter(lambda item: pattern.lower() in item, list_searched)
158
159
        if len(result) == 0:
            return False
160
    return True
161
162


163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def put_properties_in_dict(src_dict, dest_dict, properties):
    '''
    Put the values of src_dict in dest_dict.
    Only keys that are keys in properties are copied to dest_dict.
    The key in dest_dict is determined by properties[key]

    >>> put_properties_in_dict({'toto': [1, 2], 'tutu': 'A'}, {'toto': 3, 'machin': 2}, {'toto': 'toto', 'titi': 'titi', 'tutu': 'truc'}) == {'toto': [1, 2], 'truc': 'A', 'machin': 2}
    True
    '''
    for key in properties.iterkeys():
        if key in src_dict:
            dest_dict[properties[key]] = src_dict[key]
    return dest_dict


178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#### Utilities on regex
def search_first_regex_in_file(regex, filename, max_nb_line=None):
    try:
        if max_nb_line is None:
            results = open(filename).readlines()
        else:
            results = open(filename).readlines(max_nb_line)
    except IOError as e:
        results = []

    matched_keys = {}
    for r in regex:
        for line in results:
            m = r.search(line)
            if m:
                for (key, val) in m.groupdict().items():
                    matched_keys[key] = val.replace('\\', '')
                break
    return matched_keys

198
199
200

#### Utilities on JSON

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def cleanup_json_sample(json_string):
    '''
    Takes a JSON sample and close the ) ] " ' so that
    the string can be parsed by a JSON parser.
    >>> cleanup_json_sample('"toto": [ [ 1 ], "t')
    '"toto": [ [ 1 ], "t"]'
    >>> cleanup_json_sample('"toto": [ [ 1 ], ')
    '"toto": [ [ 1 ]]'
    >>> cleanup_json_sample('{"germlines": {"custom": {"3": [')
    '{"germlines": {"custom": {"3": []}}}'
    >>> cleanup_json_sample('{"germlines": {"custom": {"3":')
    '{"germlines": {"custom": {}}}'
    >>> cleanup_json_sample('{"germlines": {"custom": {"3')
    '{"germlines": {"custom": {}}}'
    >>> cleanup_json_sample('{"germlines": {"custom": {"3": [2], "2')
    '{"germlines": {"custom": {"3": [2]}}}'
    >>> cleanup_json_sample('{"germlines": {"custom": {"3": [2], "2": "truc"')
    '{"germlines": {"custom": {"3": [2], "2": "truc"}}}'

    '''
    start_delimiters = ['{', '[', '"', "'"]
    end_delimiters = ['}', ']', '"', "'"]

    delimiter_stack = []
    pos_isolated_comma = None

    for i, char in enumerate(json_string):
        if char in start_delimiters or char in end_delimiters:
            try:
                corresponding_delimiter = start_delimiters[end_delimiters.index(char)]
            except ValueError:
                corresponding_delimiter = None
233
            if len(delimiter_stack) == 0 or delimiter_stack[-1][0] != corresponding_delimiter:
234
235
236
237
238
239
240
                delimiter_stack.append(char)
            else:
                delimiter_stack.pop()
            pos_isolated_comma = None
        elif char == ',':
            pos_isolated_comma = i

241
    if pos_isolated_comma != None:
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
        json_string = json_string[:pos_isolated_comma]
    json_string = json_string.strip()

    delimiter_stack.reverse()
    end_delimiter_stack = map(lambda c: end_delimiters[start_delimiters.index(c)], delimiter_stack)

    if (len(end_delimiter_stack) > 0 and end_delimiter_stack[0] == '}')\
       or (len(end_delimiter_stack) > 1 and end_delimiter_stack[0] in ['"', "'"] and end_delimiter_stack[1] == '}'):
        # We didn't close a dict. Are we in the middle of a property (eg. "toto": )
        last_colon = json_string.rfind(':')
        last_bracket = json_string.rfind('{')
        last_comma = json_string.rfind(',')-1
        property_start = max(last_comma, last_bracket)
        if last_colon == len(json_string)-1\
           or property_start > last_colon:
            json_string = json_string[:property_start+1]
258
            if len(end_delimiter_stack) > 1 and end_delimiter_stack[0] != '}':
259
260
261
262
263
264
265
                end_delimiter_stack.pop(0)


    return json_string + ''.join(end_delimiter_stack)



266
267
268
269
270
271
def extract_value_from_json_path(json_path, json):
    '''
    Highly inspired from http://stackoverflow.com/a/7320664/1192742

    Takes a path (for instance field1/field2/field3) and returns
    the value at that path.
272
    The path also support indexed opeations (such as field1/field2[3]/field4)
273
274
275
276
277
278

    If the value doesn't exist None will be returned.
    '''
    elem = json
    try:
        for x in json_path.strip("/").split("/"):
279
280
281
282
283
284
285
286
            list_pos = re.search(r'[[]\d+[]]', x)
            if list_pos is not None:
                list_pos = list_pos.span()
                index = int(x[list_pos[0]+1:list_pos[1]-1])
                x = x[:list_pos[0]]
                elem = elem.get(x)[index]
            else:
                elem = elem.get(x)
287
288
289
290
291
    except:
        pass

    return elem

292
def extract_fields_from_json(json_fields, pos_in_list, filename, max_bytes = None):
293
294
295
296
297
298
    '''
    Takes a map of JSON fields (the key is a common name
    and the value is a path) and return a similar map
    where the values are the values from the JSON filename.

    If the value retrieved from a JSON is an array, we will
299
300
    get only the item at position <pos_in_list> (if None, will
    get all of them)
301
302
    '''
    try:
303
        if max_bytes is None:
304
            json_dict = json.loads(open(filename).read())
305
306
        else:
            json_dict = json.loads(cleanup_json_sample(open(filename).read(max_bytes)))
307
    except IOError:
308
        current.log.debug('JSON loading failed')
309
        json_dict = {}
310
311
    except ValueError as e:
        current.log.debug(str(e))
312
313
314
    matched_keys = {}
    for field in json_fields:
        value = extract_value_from_json_path(json_fields[field], json_dict)
315
        if value is not None:
316
317
            if  not isinstance(value, basestring) and pos_in_list is not None\
                and len(value) > pos_in_list:
318
319
320
                matched_keys[field] = value[pos_in_list]
            else:
                matched_keys[field] = value
321
322
323
324
325

    return matched_keys

####

326
327
328
329
330
SOURCES = "https://github.com/vidjil/vidjil/blob/master/server/web2py/applications/vidjil/%s#L%s"
SOURCES_DIR_DEFAULT = 'controllers/'
SOURCES_DIR = {
    'task.py': 'models/',
    'db.py': 'models/',
331
332
    'sequence_file.py': 'models/',
    'vidjil_utils.py': 'modules/',
333
334
335
}


336
337
log_patient = re.compile('\((\d+)\)')
log_config = re.compile(' c(\d+)')
338
log_task = re.compile('\[(\d+)\]')
339
log_py = re.compile('(.*[.]py):(\d+)')
340
341
342
343
344
345

def log_links(s):
    '''Add HTML links to a log string

    >>> log_links("abcdef")
    'abcdef'
346
    >>> log_links("[1234]abcdef")
347
    '[<a class="loglink pointer" onclick="db.call(\\'admin/showlog\\', {\\'file\\': \\'../..//mnt/result/tmp/out-001234/001234.vidjil.log\\', \\'format\\': \\'raw\\'})">1234</a>]abcdef'
348
    >>> log_links("abcdef(234)")
349
    'abcdef(<a class="loglink pointer" onclick="db.call(\\'patient/info\\', {\\'id\\': \\'234\\'})">234</a>)'
350
    >>> log_links("abcdef(234)abcdef c11")
351
    'abcdef(234)abcdef <a class="loglink pointer" href="?patient=234&config=11">c11</a>'
352
353
354
355
356
357
358
359
360
361
    '''

    ### Parses the input string

    m_patient = log_patient.search(s)
    patient = m_patient.group(1) if m_patient else None

    m_config = log_config.search(s)
    config = m_config.group(1) if m_config else None

362
363
364
    m_task = log_task.search(s)
    task = int(m_task.group(1)) if m_task else None

365
366
367
368
369
370
371
372
    m_py = log_py.search(s)
    if m_py:
        source = m_py.group(1)
        if source in SOURCES_DIR:
            source = SOURCES_DIR[source] + source
        else:
            source = SOURCES_DIR_DEFAULT + source

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
    ### Rules

    url = ''  # href link
    call = '' # call to db

    if patient and config:
        url = "?patient=%s&config=%s" % (patient, config)
        (start, end) = m_config.span()
        start += 1

    elif patient:
        call = "patient/info"
        args = {'id': patient}
        (start, end) = m_patient.span()
        start += 1
        end -= 1

390
    if task:
391
        call = "admin/showlog"
392
        args = {'file': '../../' + defs.DIR_OUT_VIDJIL_ID % task + defs.BASENAME_OUT_VIDJIL_ID % task + '.vidjil.log', 'format': 'raw'}
393
394
395
396
        (start, end) = m_task.span()
        start += 1
        end -= 1

397
398
399
400
    if m_py:
        (start, end) = m_py.span(2)
        url = SOURCES % (source, m_py.group(2))

401
402
403
404
405
406
407
408
409
    ### Build final string

    link = ''
    if url:
        link = 'href="%s"' % url
    if call:
        link = '''onclick="db.call('%s', %s)"''' % (call, str(args))

    if link:
410
        s = '%s<a class="loglink pointer" %s>%s</a>%s' % (s[:start], link, s[start:end], s[end:])
411
412

    return s
413
414
415
416
417

def check_enough_space(directory):
    import subprocess
    df = subprocess.Popen(["df", directory], stdout=subprocess.PIPE)
    output = df.communicate()[0]
HERBERT Ryan's avatar
HERBERT Ryan committed
418
    print output
419
420
421
    device, size, used, available, percent, mountpoint = output.split("\n")[1].split()
    available = int(available)
    size = int(size)
422
    result = available >= (size * (defs.FS_LOCK_THRESHHOLD/100))
423
    return result