Commit fe096698 authored by Mikaël Salson's avatar Mikaël Salson

generate-recombinations.py: Update following changes in .g file

parent a0e02b5a
...@@ -5,6 +5,7 @@ import json ...@@ -5,6 +5,7 @@ import json
import fasta import fasta
import random import random
import argparse import argparse
import os.path
random.seed(33328778554) random.seed(33328778554)
...@@ -245,7 +246,7 @@ def list_int(s): ...@@ -245,7 +246,7 @@ def list_int(s):
if __name__ == '__main__': if __name__ == '__main__':
DESCRIPTION='Script generating fake V(D)J recombinations' DESCRIPTION='Script generating fake V(D)J recombinations'
parser = argparse.ArgumentParser(description=DESCRIPTION) parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument('-g', '--germlines', type=file, default='germlines.data', help='path to the germlines.data file') parser.add_argument('-g', '--germlines', type=file, default='homo-sapiens.g', help='path to the germlines.data file')
parser.add_argument('--deletions', '-d', type=list_int, default = [(lambda: 5)], help='List -- separated by colons -- of the number of deletions at junctions (or single value, if the number is the same everywhere).') parser.add_argument('--deletions', '-d', type=list_int, default = [(lambda: 5)], help='List -- separated by colons -- of the number of deletions at junctions (or single value, if the number is the same everywhere).')
parser.add_argument('--insertions', '-i', type=list_int, default = [(lambda: 3)], help='List -- separated by colons -- of the number of insertions at junctions (or single value, if the number is the same everywhere') parser.add_argument('--insertions', '-i', type=list_int, default = [(lambda: 3)], help='List -- separated by colons -- of the number of insertions at junctions (or single value, if the number is the same everywhere')
parser.add_argument('--random-deletions', '-D', type=list_random_tuple, help='List of random deletions at junctions under the format mean,standard_deviation (or single value, if the number is the same everywhere') parser.add_argument('--random-deletions', '-D', type=list_random_tuple, help='List of random deletions at junctions under the format mean,standard_deviation (or single value, if the number is the same everywhere')
...@@ -258,23 +259,23 @@ if __name__ == '__main__': ...@@ -258,23 +259,23 @@ if __name__ == '__main__':
germlines_json = args.germlines.read().replace('germline_data = ', '') germlines_json = args.germlines.read().replace('germline_data = ', '')
germlines = json.loads(germlines_json) germlines = json.loads(germlines_json)
for code in germlines: for code in germlines["systems"]:
g = germlines[code] g = germlines["systems"][code]
print("--- %s - %-4s - %s" % (g['shortcut'], code, g['description'])) print("--- %s - %-4s - %s" % (g['shortcut'], code, g['description']))
basepath = germlines["path"] + os.path.sep
# Read germlines # Read germlines
nb_recomb = 0 nb_recomb = 0
for recomb in g['recombinations']: for recomb in g['recombinations']:
labels = ['V'] labels = ['V']
files = [recomb['5']] files = [[basepath + f for f in recomb['5']]]
if '4' in recomb: if '4' in recomb:
labels.append('D') labels.append('D')
files.append(recomb['4']) files.append([basepath + f for f in recomb['4']])
labels.append('J') labels.append('J')
files.append(recomb['3']) files.append([basepath + f for f in recomb['3']])
repertoire = vdj_repertoire.files(labels, files) repertoire = vdj_repertoire.files(labels, files)
print(" 5: %3d sequences\n" % repertoire.nb_sequences('V'), print(" 5: %3d sequences\n" % repertoire.nb_sequences('V'),
...@@ -286,14 +287,14 @@ if __name__ == '__main__': ...@@ -286,14 +287,14 @@ if __name__ == '__main__':
code_in_filename = code_in_filename + '-%d' % (nb_recomb+1) code_in_filename = code_in_filename + '-%d' % (nb_recomb+1)
# Generate recombinations # Generate recombinations
recombination0 = vdj_recombination() # recombination0 = vdj_recombination()
generate_to_file(repertoire, recombination0, code, '../data/gen/0-removes-%s.should-vdj.fa' % code_in_filename, 1) # generate_to_file(repertoire, recombination0, code, '../data/gen/0-removes-%s.should-vdj.fa' % code_in_filename, 1)
deletions = args.deletions if args.random_deletions is None else args.random_deletions deletions = args.deletions if args.random_deletions is None else args.random_deletions
insertions = args.insertions if args.random_insertions is None else args.random_insertions insertions = args.insertions if args.random_insertions is None else args.random_insertions
recombination5 = vdj_recombination(deletions=deletions, insertions=insertions, processing = [(lambda s: mutate_sequence(s, args.error))]) recombination5 = vdj_recombination(deletions=deletions, insertions=insertions, processing = [(lambda s: mutate_sequence(s, args.error))])
generate_to_file(repertoire, recombination5, code, '../data/gen/5-removes-%s.should-vdj.fa' % code_in_filename, args.nb_recombinations) generate_to_file(repertoire, recombination5, code, '../data/gen/generated-%s.should-vdj.fa' % code_in_filename, args.nb_recombinations)
print() print()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment