Commit c8bc243f authored by flothoni's avatar flothoni

spike-normalization; Update fuse for better handling of MRD fields

Also update tests for it.
parent e7bf843a
Pipeline #178580 failed with stages
in 1 minute and 42 seconds
......@@ -218,19 +218,26 @@ class Window:
"R2": [0],
"family": ["None"],
"norm_coeff": [0]}
if "mrd" in self.d:
first = self.d["mrd"]
else:
first = zeroed
if "mrd" in other.d:
second = other.d["mrd"]
else:
second = zeroed
obj.d["mrd"] = {}
concatenate_with_padding(obj.d["mrd"],
first, len(self.d["reads"]),
second, len(other.d["reads"]))
if "mrd" in self.d or "mrd" in other.d:
if "mrd" in self.d:
first = self.d["mrd"]
else:
first = zeroed
for key in first.keys():
first[key] = first[key] * len(self.d["reads"])
if "mrd" in other.d:
second = other.d["mrd"]
else:
second = zeroed
for key in second.keys():
second[key] = second[key] * len(other.d["reads"])
obj.d["mrd"] = {}
concatenate_with_padding(obj.d["mrd"],
first, len(self.d["reads"]),
second, len(other.d["reads"]))
# All other data, including 'top'
# When there are conflicting keys, keep data from the 'topmost' clone
order = [other, self] if other.d["top"] < self.d["top"] else [self, other]
......@@ -454,9 +461,9 @@ class Samples:
class MRD:
def __init__(self):
def __init__(self, number=1):
self.d={}
self.d["number"] = 0
self.d["number"] = number
def __add__(self, other):
obj=MRD()
......@@ -611,7 +618,6 @@ class ListWindows(VidjilJson):
self.d["clones"] = []
self.d["clusters"] = []
self.d["germlines"] = {}
self.d["mrd"] = MRD()
self.d["vidjil_json_version"] = VIDJIL_JSON_VERSION
self.d["producer"] = FUSE_VERSION
......@@ -769,7 +775,12 @@ class ListWindows(VidjilJson):
obj.d["samples"] = self.d["samples"] + other.d["samples"]
obj.d["reads"] = self.d["reads"] + other.d["reads"]
obj.d["diversity"] = self.d["diversity"] + other.d["diversity"]
if "mrd" in self.d and "mrd" in other.d:
if "mrd" in self.d or "mrd" in other.d:
if not "mrd" in self.d:
self.d["mrd"] = MRD()
if not "mrd" in other.d:
other.d["mrd"] = MRD()
obj.d["mrd"] = self.d["mrd"] + other.d["mrd"]
try:
......
......@@ -69,7 +69,7 @@
"3": 51
},
"sequence": "GTTCAGTGGCAGTGGGTCAGGCACTGATTTCACACTGAAAATCAGCAGGGTGGAGGCTGAGGATGTTGGAGTTTATTACTGCATGCAACGCATAGAGTTTCCCTGGGGTACTTTTGGCCAGGGGACCAAGGTGGAGATCAAACGTAAG",
"top": 44,
"top": 3,
"warn": [
{
"code": "W53",
......@@ -152,7 +152,7 @@
"3": 31
},
"sequence": "GTTCAGTGGCAGTGGGTCAGGCACTGATTTCACACTGAAAATCAGCAGGGTGGAGGCTGAGGATGTTGGAGTTTATTACTGCATGCAACGTATAGAATTTCCCTGGGGTACTTTTGGCCAGGGGACCAAGGTGGAGATCAAACGTAAG",
"top": 83,
"top": 4,
"warn": [
{
"code": "W53",
......@@ -313,7 +313,7 @@
"3": 401
},
"sequence": "GTTCAGTGGCAGTGGGTCAGGCACTGATTTCACACTGAAAATCAGCAGGGTGGAGGCTGAGGATGTTGGAGTTTATTACTGCATGCAACGTATAGAGTTTCCCTGGGGTACTTTTGGCCAGGGGACCAGGTGGAGATCAAACGTAAG",
"top": 4,
"top": 2,
"warn": [
{
"code": "W53",
......@@ -337,7 +337,7 @@
"3": 1
},
"sequence": 0,
"top": 3074
"top": 9
},
{
"germline": "TRG",
......@@ -349,7 +349,7 @@
"3": 1
},
"sequence": 0,
"top": 3073
"top": 8
},
{
"germline": "IGK",
......@@ -361,7 +361,7 @@
"3": 3
},
"sequence": 0,
"top": 1284
"top": 6
},
{
"germline": "TRD+",
......@@ -373,7 +373,7 @@
"3": 20
},
"sequence": 0,
"top": 146
"top": 5
},
{
"germline": "TRD+",
......@@ -385,7 +385,7 @@
"3": 1
},
"sequence": 0,
"top": 1943
"top": 7
}
],
"config": {
......
......@@ -10,7 +10,7 @@
"3": 2
},
"sequence": 0,
"top": 13375
"top": 6
},
{
"germline": "IGH",
......@@ -22,7 +22,7 @@
"3": 17
},
"sequence": 0,
"top": 2930
"top": 3
},
{
"germline": "IGH+",
......@@ -34,7 +34,7 @@
"3": 5
},
"sequence": 0,
"top": 4514
"top": 5
},
{
"germline": "TRG",
......@@ -46,7 +46,7 @@
"3": 1
},
"sequence": 0,
"top": 58163
"top": 8
},
{
"germline": "unexpected",
......@@ -58,7 +58,7 @@
"3": 1
},
"sequence": 0,
"top": 52899
"top": 7
},
{
"_average_read_length": [
......@@ -138,7 +138,7 @@
"3": 16303
},
"sequence": "CGTCCAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACCGCTGCGGACACGGCCGTGTATTACTGTGCGAGAAGGGGTATAGCAGCAGCTGGTGCCTACTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCAGGTAAG",
"top": 6
"top": 2
},
{
"_average_read_length": [
......@@ -218,7 +218,7 @@
"3": 25527
},
"sequence": "TACCATGACCAACATGGACCCTGTGGACACAGCCACATATTACTGTGCACGGAAGGGACCTTTACGATTTTTGGAGTGGTTATTAGGCCCTCTTACTACGGTATGGACGTCTGGGGCCAAGGGACCACGGTCACCGTCTCCTCAGGTAAG",
"top": 3,
"top": 1,
"warn": [
{
"code": "W69",
......@@ -237,7 +237,7 @@
"3": 2
},
"sequence": 0,
"top": 7007
"top": 5
}
],
"config": {
......
......@@ -10,7 +10,7 @@
"3": 269
},
"sequence": 0,
"top": 231
"top": 3
},
{
"germline": "IGH",
......@@ -22,7 +22,7 @@
"3": 7
},
"sequence": 0,
"top": 6639
"top": 4
},
{
"germline": "TRG",
......@@ -34,7 +34,7 @@
"3": 1
},
"sequence": 0,
"top": 47580
"top": 7
},
{
"_average_read_length": [
......@@ -105,7 +105,7 @@
"3": 1054
},
"sequence": "CAAGGTTCAGCGGCAGTGGATCTGGGACAGATTTCACTCTCACCATCAGCAGTCTGCAACCTGAAGATTTTGCAACTTACTACTGTCAACAGAGTTACAGTACCCCTCGGACGTTCGGCCAAGGGACCAAGGTGGAGATCAAACGTAAG",
"top": 10,
"top": 2,
"warn": [
{
"code": "W69",
......@@ -184,7 +184,7 @@
"3": 1
},
"sequence": 0,
"top": 38430
"top": 6
},
{
"germline": "IGH",
......@@ -196,7 +196,7 @@
"3": 1
},
"sequence": 0,
"top": 15850
"top": 5
}
],
"config": {
......
##### Test on fuse file with spike normalization
!LAUNCH: python ../../fuse.py spike_out2.vidjil spike_out2.vidjil -o spike_fuse_2_2.vidjil && cat spike_fuse_2_2.vidjil
$ clones in the output, same as in the input
8: "seg_stat"
$ "normalized_reads" expected in this sample
6: "normalized_reads"
$ mrd field; 6 for clones, 1 for sample
7: "mrd"
$ Good value normalized_reads for the first clone, (and 6th); present 2 times after fuse
4: 0.3739975328711452
$ ampl_coeff value is present 2 time
2: 16.69631843174755
##### Test on fuse file with NO spike normalization (or no MRD)
!OUTPUT_FILE: spike_fuse_1_1.vidjil
!LAUNCH: python ../../fuse.py spike_out1.vidjil spike_out1.vidjil -o spike_fuse_1_1.vidjil;
$ MRD key is NOT present
j0:mrd
### One file without MRD + 1 file with MRD
!OUTPUT_FILE: spike_fuse_1_2.vidjil
!LAUNCH: python ../../fuse.py spike_out1.vidjil spike_out2.vidjil -o spike_fuse_1_2.vidjil;
$ MRD firld is present at top level
j:mrd
$ MRD key is NOt present for clone 0
j0:clones[0].mrd
$ MRD key is present for clone 1
j:clones[1].mrd
$ UNI_COEFF values for mixed fused files
j: mrd.UNI_COEFF[0]: 0
j: mrd.UNI_COEFF[1]: 0.02677504183600287
$ ampl_coeff values for mixed fused files
j: mrd.ampl_coeff[0]: 0
j: mrd.ampl_coeff[1]: 16.69631843174755
$ prevalent values for mixed fused files
j: mrd.prevalent[0]: 0
j: mrd.prevalent[1]: IGH
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment