...
  View open merge request
Commits (3)
#!/usr/bin/env python
# #-*- coding: utf-8 -*-
# ===============================
# Script by Florian Thonier
# florian.thonier@inria.fr
# ===============================
from __future__ import division
from __future__ import print_function
from optparse import OptionParser
from collections import defaultdict
import operator
import math
import json
import sys
# ===============================
#########################
###### VidjilRep #####
#########################
class Morisita(object):
# def __init__(self, fiName, path="./", germlineFilter=[], incomplete=1, topClones=100, filtreTag=0, filtreProd=0, autocat=0, verbose=1):
def __init__(self, fiName, verbose=False):
self.fiName = fiName
self.foName = fiName.replace(".vidjil", "_morisita.vidjil")
self.fi = open(self.fiName, "r")
self.fo = open(self.foName, "w")
try:
self.data = json.load(self.fi)
if verbose: # pragma: no cover
print("json loaded")
except: # pragma: no cover
print("Error 10 : vidjil json file cannot be loaded")
raise SystemError
print("Il y a %s clones [%s]" % (len(self.data["clones"]), fiName))
self.computeMorisita()
self.export_result(self.fo)
return
# def __str__(self):
# return "Repertoire : %s\nVersion : %s; timestamp : %s\nProducer : %s\nContient %s clones." % (
# self.name, self.version, self.timestamp, self.producer, len(self.clones))
# def __unicode__(self):
# return "Repertoire : %s\nVersion : %s; timestamp : %s\nProducer : %s\nContient %s clones." % (
# self.name, self.version, self.timestamp, self.producer, len(self.clones))
# def __repr__(self):
# return "Repertoire : %s\nVersion : %s; timestamp : %s\nProducer : %s\nContient %s clones." % (
# self.name, self.version, self.timestamp, self.producer, len(self.clones))
def computeMorisita(self):
""" Permet de donner un filtre.
Celui-ci doit être au format dico, ou chaque entrée correspond à un attribut des clones, et son contenu a une valeur (ou une liste de valeurs) de ce champs. """
# compute number total of reads; before
morisita = defaultdict( lambda: defaultdict( lambda: False ) )
jaccard = defaultdict( lambda: defaultdict( lambda: False ) )
nb_sample = self.data["samples"]["number"]
for pos_0 in range(0, nb_sample):
for pos_1 in range(0, nb_sample):
morisita[pos_0][pos_1] = self.compute_one_morisita(pos_0, pos_1)
jaccard[pos_0][pos_1] = self.compute_one_Jaccard_index(pos_0, pos_1)
self.data["morisita"] = morisita
self.data["jaccard"] = jaccard
return
def compute_one_morisita(self, pos_0, pos_1):
index_div = "index_Ds_diversity"
clones = self.data["clones"]
reads = self.data["reads"]["segmented"]
"""
# Indice de similarité de Morisita-Horn (Morisita-Horn index)
# Contrairement aux indices de similarité de Sørensen et de Jaccard qui s’appliquent sur les données
# de présence-absence, l’indice de similarité de Morisita-Horn s’applique aux données quantitatives.
# Il permet d’évaluer la similarité entre les différents groupes et n’est pas influencé par la richesse
# spécifique et l’effort d’échantillonnage. Sa formule est :
# CMH = 2 ∑▒((ai x bi))/((da+db)x(Na x Nb))
# da = ∑ai2/Na2
# db = ∑bi2/Nb2
# Na = nombre total d’individus au site a
# Nb = nombre total d’individus au site b
# ai = nombre d’individus de l’espèce i au site a
# bi = nombre d’individus de l’espèce i au site b
# Sa valeur est comprise entre 0 (communautés dissemblables) et 1 (similarité maximale).
# Deux groupes sont semblables (faible diversité) si la valeur de CMH est supérieure à 0,5 et
# dissemblables si cette valeur est inférieure à 0,5 (diversité élevée).
"""
m = 0
da = 0
db = 0
Na = reads[pos_0] * reads[pos_0]
Nb = reads[pos_1] * reads[pos_1]
for clone in clones:
ai = clone["reads"][pos_0]
bi = clone["reads"][pos_1]
m += (ai * bi)
da += ( (ai*ai) / Na )
db += ( (bi*bi) / Nb )
m *= 2
d = ( (da/Na)+(db/Nb))*(Na*Nb)
m = m/d
return m
def compute_one_Jaccard_index(self, pos_0, pos_1):
"""
Indice de similarité de Jaccard (Jaccard index)
Définit la similitude comme étant l’importance de remplacement des espèces
ou les changements biotiques à travers les gradients environnementaux.
Il permet une comparaison entre deux sites, car il évalue la ressemblance entre deux relevés
en faisant le rapport entre les espèces communes aux deux relevés et celles propres à chaque relevé.
Il a pour formule :
I = Nc / (N1 + N2 - Nc)
Nc : nombre de taxons commun aux stations 1 et 2
N1 et N2 : nombre de taxons présents respectivement aux stations 1 et 2
Cet indice I varie de 0 à 1 et ne tient compte que des associations positives.
Si l’indice I augmente, un nombre important d’espèces se rencontre dans les deux habitats
evoquant ainsi que la biodiversité inter habitat est faible (conditions environnementales similaires
entre les habitats). Dans le cas contraire, si l’indice diminue, seul un faible nombre d’espèces
est présent sur les deux habitats. Ainsi, les espèces pour les deux habitats comparés sont
totalement différentes indiquant que les différentes conditions de l’habitat déterminent un
turn-over des espèces importantes.
"""
clones = self.data["clones"]
reads = self.data["reads"]["segmented"]
N1 = self.get_nb_species_of_sample(pos_0)
N2 = self.get_nb_species_of_sample(pos_1)
Nc = 0
for clone in clones:
ai = clone["reads"][pos_0]
bi = clone["reads"][pos_1]
Nc += bool(ai * bi)
# print( "Nc: %s" % Nc)
I = Nc / (N1 + N2 - Nc)
return I
def export_result(self, fo):
# fo = open(foname, "w")
json.dump(self.data, fo, sort_keys=True, indent=4, ensure_ascii=False)
return
def get_nb_species_of_sample(self, pos):
X = 0
for clone in self.data["clones"]:
if clone["reads"][pos]:
X += 1
return X
# ===============================
if __name__ == '__main__': # pragma: no cover
verbose = 1
description = "\nCalcul divers indice de similarité et d'overlap entre les sample d'un fichier vidjil."
description += "\nPour l'instant sur la version 2016a des fichier vidjil."
DEFAULT_verbose = False
DEFAULT_TOP_CLONE = 0 # nb max de clones a travailler
usage = "Compilation d'analyses d'assignations effectuees par Vidjil pour differentes iterations.\n"
usage += "usage: python %prog [options] -i $path"
parser = OptionParser(usage=usage)
### Options ###
parser.add_option("-v", "--verbose", action="store_true",
dest="verbose", default=DEFAULT_verbose,
help="make lots of noise (%(DEFAULT_verbose)s)." % vars())
parser.add_option("-t", "--topclone", default=DEFAULT_TOP_CLONE)
## Obligations
parser.add_option("-i", "--vidjil", default=False,
metavar="PATH", help="Vidjil file to use")
### Getter des options ###
argv = sys.argv
(options, argv) = parser.parse_args(argv)
# GERMLINE_FILTER = []
INPUT_REP = options.vidjil
# vprint(title="top clone", variable=TOP_CLONE)
vdjfi = Morisita(INPUT_REP)
{
"clones": [
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
5000,
0
],
"top": 1
},
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
4500,
3000
],
"top": 2
},
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
4000,
2500
],
"top": 3
},
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
3500,
2500
],
"top": 4
},
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
2000,
2000
],
"top": 5
}
],
"clusters": [],
"diversity": {
"index_Ds_diversity": [
0.999757289886475,
0.999773621559143
],
"index_E_equitability": [
0.710613250732422,
0.718931078910828
],
"index_H_entropy": [
9.92569569544503,
10.0674523124544
]
},
"germlines": {},
"jaccard": {
"0": {
"0": 1.0,
"1": 0.8
},
"1": {
"0": 0.8,
"1": 1.0
}
},
"morisita": {
"0": {
"0": 1.0,
"1": 0.6386362940379345
},
"1": {
"0": 0.6386362940379345,
"1": 1.0
}
},
"producer": "vidjil fuse",
"reads": {
"distribution": {
"0.0000001": [
35684,
56680
],
"0.000001": [
191092,
219033
],
"0.00001": [
536497,
542406
],
"0.0001": [
314742,
312260
],
"0.001": [
86469,
76275
],
"0.01": [
0,
0
],
"0.1": [
0,
0
]
},
"germline": {
"IGH": [
190947,
191888
],
"IGH+": [
78925,
80162
],
"IGK": [
657338,
665446
],
"IGK+": [
0,
0
],
"IGL": [
235104,
265933
],
"TRA": [
0,
0
],
"TRA+D": [
0,
0
],
"TRB": [
2,
7
],
"TRB+": [
0,
0
],
"TRD": [
0,
0
],
"TRD+": [
0,
0
],
"TRG": [
0,
0
],
"unexpected": [
2168,
3218
]
},
"segmented": [
19000,
10000
],
"total": [
105515154,
105515154
]
},
"samples": {
"commandline": [
"/home/vidjil-ci/releases/vidjil/latest//vidjil-algo -o /mnt/data/prod/result/tmp/out-037293/ -b 037293 -c clones -3 -z 100 -r 1 -g/home/vidjil-ci/releases/vidjil/latest//germline/homo-sapiens.g -e 1 -2 -d -w 50 /mnt/data/prod/upload/uploads/sequence_file.data_file.99a5f66500f1a303.4d3634395f52445f4230304a3644395f315f315f484c473246424258582e494e4431392e66617374712e677a.gz ",
"/home/vidjil-ci/releases/vidjil/latest//vidjil-algo -o /mnt/data/prod/result/tmp/out-037294/ -b 037294 -c clones -3 -z 100 -r 1 -g/home/vidjil-ci/releases/vidjil/latest//germline/homo-sapiens.g -e 1 -2 -d -w 50 /mnt/data/prod/upload/uploads/sequence_file.data_file.bbebc4f976be1a7d.4d3634395f52445f4230304a3644395f315f325f484c473246424258582e494e4431392e66617374712e677a.gz "
],
"log": [
" ==> junction detected in 1164484 reads (1.1%)\n ==> found 94208 windows in 1164484 reads (1.1% of 105515154 reads)\n ! There are not so many CDR3 windows found in this set of reads.\n ! Please check the unsegmentation causes below and refer to the documentation.\n reads av. len clones clo/rds\n IGH -> 190947 76.0 20065 0.105\n IGH+ -> 78925 76.0 10363 0.131\n IGK -> 657338 76.0 43921 0.067\n IGK+ -> 0 - 0 -\n IGL -> 235104 76.0 19478 0.083\n TRA -> 0 - 0 -\n TRA+D -> 0 - 0 -\n TRB -> 2 76.0 1 0.500\n TRB+ -> 0 - 0 -\n TRD -> 0 - 0 -\n TRD+ -> 0 - 0 -\n TRG -> 0 - 0 -\n unexpected -> 2168 76.0 380 0.175\n\n SEG -> 1164484 76.0\n SEG_+ -> 2858 76.0\n SEG_- -> 1161626 76.0\n SEG changed w -> 412847 76.0\n\n UNSEG too short -> 0 -\n UNSEG strand -> 395256 76.0\n UNSEG too few V/J -> 91046588 76.0\n UNSEG only V/5' -> 11147607 76.0\n UNSEG only J/3' -> 1760542 76.0\n UNSEG < delta_min -> 0 -\n UNSEG ambiguous -> 677 76.0\n UNSEG too short w -> 0 -\n",
" ==> junction detected in 1206654 reads (1.14%)\n ==> found 118031 windows in 1206654 reads (1.14% of 105515154 reads)\n ! There are not so many CDR3 windows found in this set of reads.\n ! Please check the unsegmentation causes below and refer to the documentation.\n reads av. len clones clo/rds\n IGH -> 191888 76.0 24279 0.127\n IGH+ -> 80162 76.0 12664 0.158\n IGK -> 665446 76.0 56047 0.084\n IGK+ -> 0 - 0 -\n IGL -> 265933 76.0 24295 0.091\n TRA -> 0 - 0 -\n TRA+D -> 0 - 0 -\n TRB -> 7 76.0 3 0.429\n TRB+ -> 0 - 0 -\n TRD -> 0 - 0 -\n TRD+ -> 0 - 0 -\n TRG -> 0 - 0 -\n unexpected -> 3218 76.0 743 0.231\n\n SEG -> 1206654 76.0\n SEG_+ -> 1204728 76.0\n SEG_- -> 1926 76.0\n SEG changed w -> 373689 76.0\n\n UNSEG too short -> 0 -\n UNSEG strand -> 406722 76.0\n UNSEG too few V/J -> 91159272 76.0\n UNSEG only V/5' -> 11119940 76.0\n UNSEG only J/3' -> 1622500 76.0\n UNSEG < delta_min -> 0 -\n UNSEG ambiguous -> 66 76.0\n UNSEG too short w -> 0 -\n"
],
"number": 2,
"original_names": [
"/mnt/data/prod/upload/uploads/sequence_file.data_file.99a5f66500f1a303.4d3634395f52445f4230304a3644395f315f315f484c473246424258582e494e4431392e66617374712e677a.gz",
"/mnt/data/prod/upload/uploads/sequence_file.data_file.bbebc4f976be1a7d.4d3634395f52445f4230304a3644395f315f325f484c473246424258582e494e4431392e66617374712e677a.gz"
],
"producer": [
"vidjil-algo 2018.02",
"vidjil-algo 2018.02"
],
"run_timestamp": [
"2018-03-28 10:26:09",
"2018-03-28 10:26:11"
],
"timestamp": [
"2018-04-06 13:48:45",
"2018-04-06 13:48:48"
]
},
"similarity": [
[
0.0,
9.0,
7.0,
6.0,
19.0,
13.0,
14.0,
7.0,
12.0,
18.0,
20.0,
18.0,
13.0,
5.0,
12.0,
6.0,
18.0,
15.0,
27.0,
19.0
],
[
9.0,
0.0,
5.0,
6.0,
16.0,
11.0,
5.0,
7.0,
6.0,
16.0,
29.0,
15.0,
10.0,
14.0,
12.0,
15.0,
18.0,
6.0,
20.0,
11.0
],
[
7.0,
5.0,
0.0,
6.0,
16.0,
6.0,
10.0,
5.0,
10.0,
11.0,
27.0,
15.0,
10.0,
12.0,
12.0,
13.0,
20.0,
11.0,
25.0,
16.0
],
[
6.0,
6.0,
6.0,
0.0,
16.0,
12.0,
11.0,
6.0,
9.0,
16.0,
25.0,
15.0,
10.0,
11.0,
6.0,
12.0,
19.0,
12.0,
26.0,
17.0
],
[
19.0,
16.0,
16.0,
16.0,
0.0,
10.0,
16.0,
17.0,
11.0,
11.0,
30.0,
6.0,
6.0,
19.0,
10.0,
13.0,
4.0,
10.0,
28.0,
15.0
],
[
13.0,
11.0,
6.0,
12.0,
10.0,
0.0,
11.0,
11.0,
16.0,
5.0,
30.0,
16.0,
16.0,
13.0,
6.0,
7.0,
14.0,
5.0,
28.0,
15.0
],
[
14.0,
5.0,
10.0,
11.0,
16.0,
11.0,
0.0,
12.0,
11.0,
16.0,
30.0,
10.0,
15.0,
9.0,
12.0,
15.0,
18.0,
6.0,
24.0,
6.0
],
[
7.0,
7.0,
5.0,
6.0,
17.0,
11.0,
12.0,
0.0,
11.0,
16.0,
26.0,
16.0,
11.0,
12.0,
12.0,
13.0,
20.0,
13.0,
27.0,
17.0
],
[
12.0,
6.0,
10.0,
9.0,
11.0,
16.0,
11.0,
11.0,
0.0,
15.0,
28.0,
10.0,
5.0,
17.0,
15.0,
18.0,
13.0,
11.0,
26.0,
10.0
],
[
18.0,
16.0,
11.0,
16.0,
11.0,
5.0,
16.0,
16.0,
15.0,
0.0,
30.0,
15.0,
15.0,
18.0,
11.0,
12.0,
15.0,
10.0,
31.0,
10.0
],
[
20.0,
29.0,
27.0,
25.0,
30.0,
30.0,
30.0,
26.0,
28.0,
30.0,
0.0,
29.0,
28.0,
24.0,
28.0,
23.0,
30.0,
31.0,
13.0,
30.0
],
[
18.0,
15.0,
15.0,
15.0,
6.0,
16.0,
10.0,
16.0,
10.0,
15.0,
29.0,
0.0,
5.0,
13.0,
16.0,
19.0,
10.0,
16.0,
28.0,
11.0
],
[
13.0,
10.0,
10.0,
10.0,
6.0,
16.0,
15.0,
11.0,
5.0,
15.0,
28.0,
5.0,
0.0,
18.0,
16.0,
19.0,
10.0,
16.0,
28.0,
15.0
],
[
5.0,
14.0,
12.0,
11.0,
19.0,
13.0,
9.0,
12.0,
17.0,
18.0,
24.0,
13.0,
18.0,
0.0,
12.0,
6.0,
18.0,
15.0,
29.0,
15.0
],
[
12.0,
12.0,
12.0,
6.0,
10.0,
6.0,
12.0,
12.0,
15.0,
11.0,
28.0,
16.0,
16.0,
12.0,
0.0,
6.0,
13.0,
6.0,
28.0,
16.0
],
[
6.0,
15.0,
13.0,
12.0,
13.0,
7.0,
15.0,
13.0,
18.0,
12.0,
23.0,
19.0,
19.0,
6.0,
6.0,
0.0,
12.0,
9.0,
27.0,
19.0
],
[
18.0,
18.0,
20.0,
19.0,
4.0,
14.0,
18.0,
20.0,
13.0,
15.0,
30.0,
10.0,
10.0,
18.0,
13.0,
12.0,
0.0,
12.0,
28.0,
17.0
],
[
15.0,
6.0,
11.0,
12.0,
10.0,
5.0,
6.0,
13.0,
11.0,
10.0,
31.0,
16.0,
16.0,
15.0,
6.0,
9.0,
12.0,
0.0,
23.0,
10.0
],
[
27.0,
20.0,
25.0,
26.0,
28.0,
28.0,
24.0,
27.0,
26.0,
31.0,
13.0,
28.0,
28.0,
29.0,
28.0,
27.0,
28.0,
23.0,
0.0,
28.0
],
[
19.0,
11.0,
16.0,
17.0,
15.0,
15.0,
6.0,
17.0,
10.0,
10.0,
30.0,
11.0,
15.0,
15.0,
16.0,
19.0,
17.0,
10.0,
28.0,
0.0
],
[
0.0,
9.0,
7.0,
13.0,
6.0,
25.0,
5.0,
12.0,
13.0,
14.0,
26.0,
7.0,
25.0,
11.0,
21.0,
19.0,
18.0,
18.0,
26.0,
26.0
],
[
9.0,
0.0,
5.0,
10.0,
6.0,
23.0,
14.0,
6.0,
11.0,
5.0,
25.0,
7.0,
24.0,
13.0,
20.0,
16.0,
15.0,
16.0,
25.0,
25.0
],
[
7.0,
5.0,
0.0,
10.0,
6.0,
24.0,
12.0,
10.0,
6.0,
10.0,
26.0,
5.0,
25.0,
12.0,
23.0,
16.0,
15.0,
11.0,
25.0,
26.0
],
[
13.0,
10.0,
10.0,
0.0,
10.0,
26.0,
18.0,
5.0,
16.0,
15.0,
25.0,
11.0,
23.0,
15.0,
24.0,
6.0,
5.0,
15.0,
25.0,
25.0
],
[
6.0,
6.0,
6.0,
10.0,
0.0,
23.0,
11.0,
9.0,
12.0,
11.0,
27.0,
6.0,
26.0,
10.0,
22.0,
16.0,
15.0,
16.0,
26.0,
27.0
],
[
25.0,
23.0,
24.0,
26.0,
23.0,
0.0,
21.0,
24.0,
20.0,
19.0,
16.0,
25.0,
13.0,
25.0,
14.0,
21.0,
22.0,
20.0,
13.0,
15.0
],
[
5.0,
14.0,
12.0,
18.0,
11.0,
21.0,
0.0,
17.0,
13.0,
9.0,
22.0,
12.0,
22.0,
16.0,
17.0,
19.0,
13.0,
18.0,
23.0,
22.0
],
[
12.0,
6.0,
10.0,
5.0,
9.0,
24.0,
17.0,
0.0,
16.0,
11.0,
24.0,
11.0,
23.0,
14.0,
23.0,
11.0,
10.0,
15.0,
24.0,
24.0
],
[
13.0,
11.0,
6.0,
16.0,
12.0,
20.0,
13.0,
16.0,
0.0,
11.0,
22.0,
11.0,
21.0,
18.0,
19.0,
10.0,
16.0,
5.0,
21.0,
22.0
],
[
14.0,
5.0,
10.0,
15.0,
11.0,
19.0,
9.0,
11.0,
11.0,
0.0,
21.0,
12.0,
21.0,
18.0,
16.0,
16.0,
10.0,
16.0,
22.0,
21.0
],
[
26.0,
25.0,
26.0,
25.0,
27.0,
16.0,
22.0,
24.0,
22.0,
21.0,
0.0,
25.0,
11.0,
23.0,
16.0,
21.0,
21.0,
20.0,
3.0,
1.0
],
[
7.0,
7.0,
5.0,
11.0,
6.0,
25.0,
12.0,
11.0,
11.0,
12.0,
25.0,
0.0,
26.0,
14.0,
24.0,
17.0,
16.0,
16.0,
25.0,
26.0
],
[
25.0,
24.0,
25.0,
23.0,
26.0,
13.0,
22.0,
23.0,
21.0,
21.0,
11.0,
26.0,
0.0,
26.0,
16.0,
20.0,
21.0,
22.0,
8.0,
10.0
],
[
11.0,
13.0,
12.0,
15.0,
10.0,
25.0,
16.0,
14.0,
18.0,
18.0,
23.0,
14.0,
26.0,
0.0,
22.0,
21.0,
20.0,
18.0,
23.0,
24.0
],
[
21.0,
20.0,
23.0,
24.0,
22.0,
14.0,
17.0,
23.0,
19.0,
16.0,
16.0,
24.0,
16.0,
22.0,
0.0,
19.0,
20.0,
18.0,
14.0,
16.0
],
[
19.0,
16.0,
16.0,
6.0,
16.0,
21.0,
19.0,
11.0,
10.0,
16.0,
21.0,
17.0,
20.0,
21.0,
19.0,
0.0,
6.0,
11.0,
21.0,
21.0
],
[
18.0,
15.0,
15.0,
5.0,
15.0,
22.0,
13.0,
10.0,
16.0,
10.0,
21.0,
16.0,
21.0,
20.0,
20.0,
6.0,
0.0,
15.0,
22.0,
21.0
],
[
18.0,
16.0,
11.0,
15.0,
16.0,
20.0,
18.0,
15.0,
5.0,
16.0,
20.0,
16.0,
22.0,
18.0,
18.0,
11.0,
15.0,
0.0,
20.0,
20.0
],
[
26.0,
25.0,
25.0,
25.0,
26.0,
13.0,
23.0,
24.0,
21.0,
22.0,
3.0,
25.0,
8.0,
23.0,
14.0,
21.0,
22.0,
20.0,
0.0,
4.0
],
[
26.0,
25.0,
26.0,
25.0,
27.0,
15.0,
22.0,
24.0,
22.0,
21.0,
1.0,
26.0,
10.0,
24.0,
16.0,
21.0,
21.0,
20.0,
4.0,
0.0
]
],
"timestamp": "2019-04-08 15:18:00",
"vidjil_json_version": "2016b",
"warn": [
{
"code": "W20",
"msg": "Very few V(D)J recombinations found: 1.10%"
},
{
"code": "W20",
"msg": "Very few V(D)J recombinations found: 1.14%"
}
]
}
\ No newline at end of file
{
"clones": [
{
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
5000,
0
],
"top": 1
}, {
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
4500,
3000
],
"top": 2
}, {
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
4000,
2500
],
"top": 3
}, {
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
3500,
2500
],
"top": 4
}, {
"germline": "IGK",
"id": "TACTGTCAACAGAGTTACAGTACCCCGTACACTTTTGGCCAGGGGACCAA",
"name": "IGKV1-39*01 3//1 IGKJ2*01",
"reads": [
2000,
2000
],
"top": 5
}
],
"clusters": [],
"diversity": {
"index_Ds_diversity": [
0.999757289886475,
0.999773621559143
],
"index_E_equitability": [
0.710613250732422,
0.718931078910828
],
"index_H_entropy": [
9.92569569544503,
10.0674523124544
]
},
"germlines": {},
"producer": "vidjil fuse",
"reads": {
"distribution": {
"0.0000001": [
35684,
56680
],
"0.000001": [
191092,
219033
],
"0.00001": [
536497,
542406
],
"0.0001": [
314742,
312260
],
"0.001": [
86469,
76275
],
"0.01": [
0,
0
],
"0.1": [
0,
0
]
},
"germline": {
"IGH": [
190947,
191888
],
"IGH+": [
78925,
80162
],
"IGK": [
657338,
665446
],
"IGK+": [
0,
0
],
"IGL": [
235104,
265933
],
"TRA": [
0,
0
],
"TRA+D": [
0,
0
],
"TRB": [
2,
7
],
"TRB+": [
0,
0
],
"TRD": [
0,
0
],
"TRD+": [
0,
0
],
"TRG": [
0,
0
],
"unexpected": [
2168,
3218
]
},
"segmented": [
19000,
10000
],
"total": [
105515154,
105515154
]
},
"samples": {
"commandline": [
"/home/vidjil-ci/releases/vidjil/latest//vidjil-algo -o /mnt/data/prod/result/tmp/out-037293/ -b 037293 -c clones -3 -z 100 -r 1 -g/home/vidjil-ci/releases/vidjil/latest//germline/homo-sapiens.g -e 1 -2 -d -w 50 /mnt/data/prod/upload/uploads/sequence_file.data_file.99a5f66500f1a303.4d3634395f52445f4230304a3644395f315f315f484c473246424258582e494e4431392e66617374712e677a.gz ",
"/home/vidjil-ci/releases/vidjil/latest//vidjil-algo -o /mnt/data/prod/result/tmp/out-037294/ -b 037294 -c clones -3 -z 100 -r 1 -g/home/vidjil-ci/releases/vidjil/latest//germline/homo-sapiens.g -e 1 -2 -d -w 50 /mnt/data/prod/upload/uploads/sequence_file.data_file.bbebc4f976be1a7d.4d3634395f52445f4230304a3644395f315f325f484c473246424258582e494e4431392e66617374712e677a.gz "
],
"log": [
" ==> junction detected in 1164484 reads (1.1%)\n ==> found 94208 windows in 1164484 reads (1.1% of 105515154 reads)\n ! There are not so many CDR3 windows found in this set of reads.\n ! Please check the unsegmentation causes below and refer to the documentation.\n reads av. len clones clo/rds\n IGH -> 190947 76.0 20065 0.105\n IGH+ -> 78925 76.0 10363 0.131\n IGK -> 657338 76.0 43921 0.067\n IGK+ -> 0 - 0 -\n IGL -> 235104 76.0 19478 0.083\n TRA -> 0 - 0 -\n TRA+D -> 0 - 0 -\n TRB -> 2 76.0 1 0.500\n TRB+ -> 0 - 0 -\n TRD -> 0 - 0 -\n TRD+ -> 0 - 0 -\n TRG -> 0 - 0 -\n unexpected -> 2168 76.0 380 0.175\n\n SEG -> 1164484 76.0\n SEG_+ -> 2858 76.0\n SEG_- -> 1161626 76.0\n SEG changed w -> 412847 76.0\n\n UNSEG too short -> 0 -\n UNSEG strand -> 395256 76.0\n UNSEG too few V/J -> 91046588 76.0\n UNSEG only V/5' -> 11147607 76.0\n UNSEG only J/3' -> 1760542 76.0\n UNSEG < delta_min -> 0 -\n UNSEG ambiguous -> 677 76.0\n UNSEG too short w -> 0 -\n",
" ==> junction detected in 1206654 reads (1.14%)\n ==> found 118031 windows in 1206654 reads (1.14% of 105515154 reads)\n ! There are not so many CDR3 windows found in this set of reads.\n ! Please check the unsegmentation causes below and refer to the documentation.\n reads av. len clones clo/rds\n IGH -> 191888 76.0 24279 0.127\n IGH+ -> 80162 76.0 12664 0.158\n IGK -> 665446 76.0 56047 0.084\n IGK+ -> 0 - 0 -\n IGL -> 265933 76.0 24295 0.091\n TRA -> 0 - 0 -\n TRA+D -> 0 - 0 -\n TRB -> 7 76.0 3 0.429\n TRB+ -> 0 - 0 -\n TRD -> 0 - 0 -\n TRD+ -> 0 - 0 -\n TRG -> 0 - 0 -\n unexpected -> 3218 76.0 743 0.231\n\n SEG -> 1206654 76.0\n SEG_+ -> 1204728 76.0\n SEG_- -> 1926 76.0\n SEG changed w -> 373689 76.0\n\n UNSEG too short -> 0 -\n UNSEG strand -> 406722 76.0\n UNSEG too few V/J -> 91159272 76.0\n UNSEG only V/5' -> 11119940 76.0\n UNSEG only J/3' -> 1622500 76.0\n UNSEG < delta_min -> 0 -\n UNSEG ambiguous -> 66 76.0\n UNSEG too short w -> 0 -\n"
],
"number": 2,
"original_names": [
"/mnt/data/prod/upload/uploads/sequence_file.data_file.99a5f66500f1a303.4d3634395f52445f4230304a3644395f315f315f484c473246424258582e494e4431392e66617374712e677a.gz",
"/mnt/data/prod/upload/uploads/sequence_file.data_file.bbebc4f976be1a7d.4d3634395f52445f4230304a3644395f315f325f484c473246424258582e494e4431392e66617374712e677a.gz"
],
"producer": [
"vidjil-algo 2018.02",
"vidjil-algo 2018.02"
],
"run_timestamp": [
"2018-03-28 10:26:09",
"2018-03-28 10:26:11"
],
"timestamp": [
"2018-04-06 13:48:45",
"2018-04-06 13:48:48"
]
},
"similarity": [
[
0.0,
9.0,
7.0,
6.0,
19.0,
13.0,
14.0,
7.0,
12.0,
18.0,
20.0,
18.0,
13.0,
5.0,
12.0,
6.0,
18.0,
15.0,
27.0,
19.0
],
[
9.0,
0.0,
5.0,
6.0,
16.0,
11.0,
5.0,
7.0,
6.0,
16.0,
29.0,
15.0,
10.0,
14.0,
12.0,
15.0,
18.0,
6.0,
20.0,
11.0
],
[
7.0,
5.0,
0.0,
6.0,
16.0,
6.0,
10.0,
5.0,
10.0,
11.0,
27.0,
15.0,
10.0,
12.0,
12.0,
13.0,
20.0,
11.0,
25.0,
16.0
],
[
6.0,
6.0,
6.0,
0.0,
16.0,
12.0,
11.0,
6.0,
9.0,
16.0,
25.0,
15.0,
10.0,
11.0,
6.0,
12.0,
19.0,
12.0,
26.0,
17.0
],
[
19.0,
16.0,
16.0,
16.0,
0.0,
10.0,
16.0,
17.0,
11.0,
11.0,
30.0,
6.0,
6.0,
19.0,
10.0,
13.0,
4.0,
10.0,
28.0,
15.0
],
[
13.0,
11.0,
6.0,
12.0,
10.0,
0.0,
11.0,
11.0,
16.0,
5.0,
30.0,
16.0,
16.0,
13.0,
6.0,
7.0,
14.0,
5.0,
28.0,
15.0
],
[
14.0,
5.0,
10.0,
11.0,
16.0,
11.0,
0.0,
12.0,
11.0,
16.0,
30.0,
10.0,
15.0,
9.0,
12.0,
15.0,
18.0,
6.0,
24.0,
6.0
],
[
7.0,
7.0,
5.0,
6.0,
17.0,
11.0,
12.0,
0.0,
11.0,
16.0,
26.0,
16.0,
11.0,
12.0,
12.0,
13.0,
20.0,
13.0,
27.0,
17.0
],
[
12.0,
6.0,
10.0,
9.0,
11.0,
16.0,
11.0,
11.0,
0.0,
15.0,
28.0,
10.0,
5.0,
17.0,
15.0,
18.0,
13.0,
11.0,
26.0,
10.0
],
[
18.0,
16.0,
11.0,
16.0,
11.0,
5.0,
16.0,
16.0,
15.0,
0.0,
30.0,
15.0,
15.0,
18.0,
11.0,
12.0,
15.0,
10.0,
31.0,
10.0
],
[
20.0,
29.0,
27.0,
25.0,
30.0,
30.0,
30.0,
26.0,
28.0,
30.0,
0.0,
29.0,
28.0,
24.0,
28.0,
23.0,
30.0,
31.0,
13.0,
30.0
],
[
18.0,
15.0,
15.0,
15.0,
6.0,
16.0,
10.0,
16.0,
10.0,
15.0,
29.0,
0.0,
5.0,
13.0,
16.0,
19.0,
10.0,
16.0,
28.0,
11.0
],
[
13.0,
10.0,
10.0,
10.0,
6.0,
16.0,
15.0,
11.0,
5.0,
15.0,
28.0,
5.0,
0.0,
18.0,
16.0,
19.0,
10.0,
16.0,
28.0,
15.0
],
[
5.0,
14.0,
12.0,
11.0,
19.0,
13.0,
9.0,
12.0,
17.0,
18.0,
24.0,
13.0,
18.0,
0.0,
12.0,
6.0,
18.0,
15.0,
29.0,
15.0
],
[
12.0,
12.0,
12.0,
6.0,
10.0,
6.0,
12.0,
12.0,
15.0,
11.0,
28.0,
16.0,
16.0,
12.0,
0.0,
6.0,
13.0,
6.0,
28.0,
16.0
],
[
6.0,
15.0,
13.0,
12.0,
13.0,
7.0,
15.0,
13.0,
18.0,
12.0,
23.0,
19.0,
19.0,
6.0,
6.0,
0.0,
12.0,
9.0,
27.0,
19.0
],
[
18.0,
18.0,
20.0,
19.0,
4.0,
14.0,
18.0,
20.0,
13.0,
15.0,
30.0,
10.0,
10.0,
18.0,
13.0,
12.0,
0.0,
12.0,
28.0,
17.0