buildBrowserGermline.py 1.44 KB
Newer Older
1 2 3
import json
import sys

4
    
5
if len(sys.argv) < 3:
6
    print("Usage: %s <FASTA input files> [JSON/DATA germline file] [JSON output file]" % sys.argv[0])
7
    sys.exit()
8 9 10
input_name = sys.argv[1]
output_name = ""

11
if len(sys.argv) >= 4:
12
    output_name = sys.argv[-1]
13
    data_file = sys.argv[-2]
14 15 16 17 18 19


table = {}
identifiant = ""
sequence = ""
  
20
for i in range(1, len(sys.argv)-2) :
21 22 23 24 25 26 27 28
    fasta = open(sys.argv[i], "r")
    system = sys.argv[i].split('/')[-1].split('.')[0]

    table[system] = {}
    
    for ligne in fasta :
        ligne = ligne.rstrip('\n\r')
    
29
        if ligne:
30 31 32
            if ligne[0]=='>' :
                identifiant=ligne[1:]
            
33 34
                if '|' in identifiant:
                    identifiant = identifiant.split('|')[1]
35
                
36 37
                if '_' in identifiant:
                    identifiant = identifiant.split('_')[0]
38
                    
39
                sequence = ""
40 41 42
            else :
                sequence+=ligne
        
43 44
        if sequence:
            # If there is still some sequence left, this value will be overwritten in the next pass
45
            table[system][identifiant]=sequence
46

47
    fasta.close()
48

49 50 51


if output_name:
52
    with open(output_name, "w") as file :
53
        file.write("germline = ")
54 55 56 57
        json.dump(table, file, indent=2, sort_keys=True)
        
        data = open(data_file, "r")
        file.write( "\n\n" )
58
        file.write("germline_data = ")
59
        file.write( data.read() )