analysis-windows.py 1.79 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11

import json
import argparse

parser = argparse.ArgumentParser(description = 'Change the window in "id" fields within .analysis files\nTo be used for maintenance purposes only')
parser.add_argument('--window', '-w', type=int, default=50, help='new window length (%default)')
parser.add_argument('--overwrite', '-o', action='store_true', help='overwrite existing .analysis files')
parser.add_argument('file', nargs='+', help='''.vidjil files''')

args = parser.parse_args()

12 13
used_windows = []

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
def change_window(clone, w):
    old_window = clone['id']
    pos = clone['sequence'].find(old_window)

    if pos < 0:
        print("! Clone without window in 'id' - clone '%s' unchanged" % old_window)
        return

    new_start = pos + (len(old_window) - w) / 2

    if new_start < 0 or new_start+w > len(clone['sequence']):
        print("! Sequence too short for new window - clone '%s' unchanged" % old_window)
        return

    new_window = clone['sequence'][new_start:new_start+w]

30 31 32 33
    if new_window in used_windows:
        print("! Window '%s' is already used - clone '%s' unchanged" % (new_window, old_window))
        return
    
34
    clone['id'] = new_window
35
    used_windows.append(new_window)
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70


class Analysis():

    def __init__(self):
        self.d = {}

    def load(self, f):
        self.d = json.load(f)

    def save(self, f):
        json.dump(self.d, f, indent=2)

    def __iter__(self):
        '''Iter on clones'''
        return self.d["clones"].__iter__()
            


suffix = '' if args.overwrite else '.new'
    
for f in args.file:
    f_new = f + suffix
    print("  %s ==> %s" % (f, f_new))
    
    an = Analysis()
    
    with open(f) as ff:
        an.load(ff)
    
    for clone in an:
        change_window(clone, args.window)

    with open(f_new, 'w') as ff_new:
        an.save(ff_new)