gb-to-should.py 983 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
'''Rough conversion from .gb to .should-vdj.fa'''

# python gb-to-should.py *.gb

import sys

def parse(stream):
    sys.stdout.write(">")

    phase = 0
    for l in stream:

        l = l.strip()

        if l.startswith("FEATURES"):
            phase = 1
            continue
    
        if l == "ORIGIN":
            phase = 2
            continue
        
        if l == "//":
            phase = 3
            continue
        
        if not phase in [1, 2]:
            continue
    
        if phase == 1 and l.startswith('/label'):
            what = l.split('=')[1]
            if not 'TR' in what:
                continue
            sys.stdout.write(what + ' ')
            continue
    
        if phase == 2:
            seq = ''.join(l.split(' ')[1:])
            sys.stdout.write('\n' + seq)
            continue
        
    # print "! Not parsed:", l

    sys.stdout.write('\n\n')



for f in sys.argv:
    sys.stdout.write('#%s\n' % f)
    parse(open(f))