Commit 558bf710 authored by Ryan Herbert's avatar Ryan Herbert add function to compute clones above min abundance

takes a min threshold and returns a dict with the number of clones
per locus above the percentage threshold of the total reads for that locus.

See #3454
parent ddc885aa
......@@ -91,6 +91,23 @@ def compute_contamination(sequence_file_id, results_file_id, config_id):
return result
def compute_num_clones(results_file_id, min_threshold):
results_file = db.results_file[results_file_id]
with open(results_file.data_file, 'wb') as results:
d = json.load(results)
loci_threshold = {}
loci_min = {}
loci_totals = d['reads']['germline']
for locus in loci_totals:
loci_threshold[locus] = 0
loci_min[locus] = loci_totals[locus][0] * (min_threshold/100.0)
for clone in d["clones"]:
gerlmine = clones['reads']['germline']
if clone['reads'][0] >= loci_min[germline]:
loci_threshold[germline] += 1
def schedule_run(id_sequence, id_config, grep_reads=None):
from subprocess import Popen, PIPE, STDOUT, os
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment