buildBrowserGermline.py 2.25 KB
Newer Older
1 2 3
import json
import sys

4 5 6 7 8 9 10 11 12 13 14 15 16 17
def get_required_files(germlines_data):
    '''
    Parse the germlines data and get all the files that are required by that
    file.

    The function returns a list of the files (uniqueness is guaranteed)
    '''
    germlines_json = json.load(open(germlines_data, 'r'))

    files = []
    for germline in germlines_json.keys():
        for recombination in germlines_json[germline]['recombinations']:
            for gene in ['5', '4', '3']:
                if gene in recombination:
18 19 20
                    for f in recombination[gene]:
                        if f not in files:
                            files.append(f)
21
    return files
22
    
23 24
if len(sys.argv) != 3:
    print("Usage: %s <JSON/DATA germline file> <JSON output file>" % sys.argv[0])
25
    sys.exit()
26

27
data_file = sys.argv[1]
28
output_name = sys.argv[2]
29 30 31 32 33


table = {}
identifiant = ""
sequence = ""
34 35 36 37

germline_files = get_required_files(data_file)

for current_file in germline_files:
38 39 40 41 42 43 44 45
    try:
        fasta = open(current_file, "r")
    except IOError as e:
        raise type(e),\
            type(e)(str(e) + '\nDid you forget to run ``make\'\' in the germline directory?\n'\
                    +'Otherwise, please tell us about the problem at contact@vidjil.org'),\
            sys.exc_info()[2]
    
46
    system = current_file.split('/')[-1].split('.')[0]
47 48 49 50 51 52

    table[system] = {}
    
    for ligne in fasta :
        ligne = ligne.rstrip('\n\r')
    
53
        if ligne:
54 55 56
            if ligne[0]=='>' :
                identifiant=ligne[1:]
            
57 58
                if '|' in identifiant:
                    identifiant = identifiant.split('|')[1]
59
                
60 61
                if '_' in identifiant:
                    identifiant = identifiant.split('_')[0]
62
                    
63
                sequence = ""
64 65 66
            else :
                sequence+=ligne
        
67 68
        if sequence:
            # If there is still some sequence left, this value will be overwritten in the next pass
69
            table[system][identifiant]=sequence
70

71
    fasta.close()
72

73 74


75 76 77 78 79 80 81 82
with open(output_name, "w") as file :
    file.write("germline = ")
    json.dump(table, file, indent=2, sort_keys=True)

    data = open(data_file, "r")
    file.write( "\n\n" )
    file.write("germline_data = ")
    file.write( data.read() )