buildBrowserGermline.py 2.06 KB
Newer Older
1 2 3
import json
import sys

4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
def get_required_files(germlines_data):
    '''
    Parse the germlines data and get all the files that are required by that
    file.

    The function returns a list of the files (uniqueness is guaranteed)
    '''
    germlines_json = json.load(open(germlines_data, 'r'))

    files = []
    for germline in germlines_json.keys():
        for recombination in germlines_json[germline]['recombinations']:
            for gene in ['5', '4', '3']:
                if gene in recombination:
                    for file in recombination[gene]:
                        if file not in files:
                            files.append(file)
    return files
22
    
23 24
if len(sys.argv) < 2:
    print("Usage: %s JSON/DATA germline file [JSON output file]" % sys.argv[0])
25
    sys.exit()
26

27 28 29 30
data_file = sys.argv[1]

if len(sys.argv) > 2:
    output_name = sys.argv[2]
31 32 33 34 35


table = {}
identifiant = ""
sequence = ""
36 37 38 39 40 41

germline_files = get_required_files(data_file)

for current_file in germline_files:
    fasta = open(current_file, "r")
    system = current_file.split('/')[-1].split('.')[0]
42 43 44 45 46 47

    table[system] = {}
    
    for ligne in fasta :
        ligne = ligne.rstrip('\n\r')
    
48
        if ligne:
49 50 51
            if ligne[0]=='>' :
                identifiant=ligne[1:]
            
52 53
                if '|' in identifiant:
                    identifiant = identifiant.split('|')[1]
54
                
55 56
                if '_' in identifiant:
                    identifiant = identifiant.split('_')[0]
57
                    
58
                sequence = ""
59 60 61
            else :
                sequence+=ligne
        
62 63
        if sequence:
            # If there is still some sequence left, this value will be overwritten in the next pass
64
            table[system][identifiant]=sequence
65

66
    fasta.close()
67

68 69 70


if output_name:
71
    with open(output_name, "w") as file :
72
        file.write("germline = ")
73 74 75 76
        json.dump(table, file, indent=2, sort_keys=True)
        
        data = open(data_file, "r")
        file.write( "\n\n" )
77
        file.write("germline_data = ")
78
        file.write( data.read() )