From b9903704c637f94b1711be5b1947802339a228c2 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 6 Jan 2022 18:19:26 +0100 Subject: [PATCH 1/9] core/windows.cpp: factorize division by nb_seg_nb_seg_m1 --- algo/core/windows.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/algo/core/windows.cpp b/algo/core/windows.cpp index 5366dfa4e..6445157da 100644 --- a/algo/core/windows.cpp +++ b/algo/core/windows.cpp @@ -238,7 +238,6 @@ json WindowsStorage::computeDiversity(int nb_segmented) { double index_H_entropy = 0.0 ; double index_1_minus_Ds_diversity = 0.0 ; - double nb_seg_nb_seg_m1 = (double) nb_segmented * ((double) nb_segmented - 1); for (auto it = seqs_by_window.begin(); it != seqs_by_window.end(); ++it) { size_t clone_nb_reads = it->second.getNbInserted(); @@ -246,11 +245,13 @@ json WindowsStorage::computeDiversity(int nb_segmented) { float ratio = (float) clone_nb_reads / nb_segmented ; index_H_entropy -= ratio * log(ratio) ; - index_1_minus_Ds_diversity += ((double) clone_nb_reads * ((double) clone_nb_reads - 1)) / nb_seg_nb_seg_m1 ; + index_1_minus_Ds_diversity += ((double) clone_nb_reads * ((double) clone_nb_reads - 1)); } + double nb_seg_nb_seg_m1 = (double) nb_segmented * ((double) nb_segmented - 1); + float index_Ds_diversity = 1 - index_1_minus_Ds_diversity / nb_seg_nb_seg_m1 ; + float index_E_equitability = index_H_entropy / log(nb_segmented) ; - float index_Ds_diversity = 1 - index_1_minus_Ds_diversity ; cout << "Diversity measures" << endl << " H = " << index_H_entropy << endl // Shannon's diversity -- GitLab From c8a5b9c77cd7de36f06014b471eb7a36f4c0a783 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 6 Jan 2022 19:11:04 +0100 Subject: [PATCH 2/9] core/segment.h, core/windows.cpp: computeDiversity for each locus (and ALL_LOCI) see #1781 --- algo/core/segment.h | 2 ++ algo/core/windows.cpp | 42 +++++++++++++++++++++++++----------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/algo/core/segment.h b/algo/core/segment.h index 4641e0baa..a9e0e4bc5 100644 --- a/algo/core/segment.h +++ b/algo/core/segment.h @@ -89,6 +89,8 @@ const char* const segmented_mesg[] = { "?", "UNSEG too short w", } ; +#define ALL_LOCI "all" + // Unproductivity causes #define UNPROD_TOO_SHORT "too-short" #define UNPROD_OUT_OF_FRAME "out-of-frame" diff --git a/algo/core/windows.cpp b/algo/core/windows.cpp index 6445157da..641ca926d 100644 --- a/algo/core/windows.cpp +++ b/algo/core/windows.cpp @@ -235,35 +235,43 @@ ostream &WindowsStorage::printSortedWindows(ostream &os) { json WindowsStorage::computeDiversity(int nb_segmented) { - double index_H_entropy = 0.0 ; - double index_1_minus_Ds_diversity = 0.0 ; + map index_H_entropy; + map index_1_minus_Ds_diversity; - - for (auto it = seqs_by_window.begin(); it != seqs_by_window.end(); ++it) { + for (auto it = seqs_by_window.begin(); it != seqs_by_window.end(); ++it) + { size_t clone_nb_reads = it->second.getNbInserted(); + string code = getGermline(it->first)->code; float ratio = (float) clone_nb_reads / nb_segmented ; - index_H_entropy -= ratio * log(ratio) ; + float ratio_logratio = ratio * log(ratio) ; + + index_H_entropy[ALL_LOCI] -= ratio_logratio; + index_H_entropy[code] -= ratio_logratio; - index_1_minus_Ds_diversity += ((double) clone_nb_reads * ((double) clone_nb_reads - 1)); + double inc_diversity = ((double) clone_nb_reads * ((double) clone_nb_reads - 1)); + index_1_minus_Ds_diversity[ALL_LOCI] += inc_diversity; + index_1_minus_Ds_diversity[code] += inc_diversity; } - double nb_seg_nb_seg_m1 = (double) nb_segmented * ((double) nb_segmented - 1); - float index_Ds_diversity = 1 - index_1_minus_Ds_diversity / nb_seg_nb_seg_m1 ; + json jsonDiversity; - float index_E_equitability = index_H_entropy / log(nb_segmented) ; + for (const auto& kv: index_H_entropy) + { + string code = kv.first ; + jsonDiversity["index_H_entropy"][code] = kv.second; + + double nb_seg_nb_seg_m1 = (double) nb_segmented * ((double) nb_segmented - 1); + jsonDiversity["index_Ds_diversity"][code] = 1 - index_1_minus_Ds_diversity[code] / nb_seg_nb_seg_m1 ; + jsonDiversity["index_E_equitability"][code] = index_H_entropy[code] / log(nb_segmented) ; + } cout << "Diversity measures" << endl - << " H = " << index_H_entropy << endl // Shannon's diversity - << " E = " << index_E_equitability << endl // Shannon's equitability - << " Ds = " << index_Ds_diversity << endl // Simpson's diversity + << " H = " << jsonDiversity["index_H_entropy"] << endl // Shannon's diversity + << " E = " << jsonDiversity["index_E_equitability"] << endl // Shannon's equitability + << " Ds = " << jsonDiversity["index_Ds_diversity"] << endl // Simpson's diversity << endl; - json jsonDiversity; - jsonDiversity["index_H_entropy"] = index_H_entropy ; - jsonDiversity["index_E_equitability"] = index_E_equitability ; - jsonDiversity["index_Ds_diversity"] = index_Ds_diversity ; - return jsonDiversity; } -- GitLab From 2075e6feac92215a1cb61e6ab20e2f6182200d03 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 6 Jan 2022 19:14:31 +0100 Subject: [PATCH 3/9] core/windows.{h,cpp}, vidjil.cpp: use the correct denominator, by germline, to compute index_Ds_diversity see #1781 --- algo/core/windows.cpp | 19 +++++++++++-------- algo/core/windows.h | 2 +- algo/vidjil.cpp | 19 +++++++++++-------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/algo/core/windows.cpp b/algo/core/windows.cpp index 641ca926d..6bc7ded3f 100644 --- a/algo/core/windows.cpp +++ b/algo/core/windows.cpp @@ -233,7 +233,7 @@ ostream &WindowsStorage::printSortedWindows(ostream &os) { -json WindowsStorage::computeDiversity(int nb_segmented) { +json WindowsStorage::computeDiversity(map nb_segmented) { map index_H_entropy; map index_1_minus_Ds_diversity; @@ -243,11 +243,10 @@ json WindowsStorage::computeDiversity(int nb_segmented) { size_t clone_nb_reads = it->second.getNbInserted(); string code = getGermline(it->first)->code; - float ratio = (float) clone_nb_reads / nb_segmented ; - float ratio_logratio = ratio * log(ratio) ; - - index_H_entropy[ALL_LOCI] -= ratio_logratio; - index_H_entropy[code] -= ratio_logratio; + float ratio_all = (float) clone_nb_reads / nb_segmented[ALL_LOCI] ; + float ratio_code = (float) clone_nb_reads / nb_segmented[code] ; + index_H_entropy[ALL_LOCI] -= ratio_all * log(ratio_all) ; + index_H_entropy[code] -= ratio_code * log(ratio_code) ; double inc_diversity = ((double) clone_nb_reads * ((double) clone_nb_reads - 1)); index_1_minus_Ds_diversity[ALL_LOCI] += inc_diversity; @@ -259,11 +258,15 @@ json WindowsStorage::computeDiversity(int nb_segmented) { for (const auto& kv: index_H_entropy) { string code = kv.first ; + // Shannon's diversity jsonDiversity["index_H_entropy"][code] = kv.second; - double nb_seg_nb_seg_m1 = (double) nb_segmented * ((double) nb_segmented - 1); + // Shannon's equitability + double nb_seg_nb_seg_m1 = nb_segmented[code] * (nb_segmented[code] - 1); + jsonDiversity["index_E_equitability"][code] = index_H_entropy[code] / log(nb_segmented[code]) ; + + // Simpson's diversity jsonDiversity["index_Ds_diversity"][code] = 1 - index_1_minus_Ds_diversity[code] / nb_seg_nb_seg_m1 ; - jsonDiversity["index_E_equitability"][code] = index_H_entropy[code] / log(nb_segmented) ; } cout << "Diversity measures" << endl diff --git a/algo/core/windows.h b/algo/core/windows.h index e4f65a892..9c73eeeb0 100644 --- a/algo/core/windows.h +++ b/algo/core/windows.h @@ -192,7 +192,7 @@ class WindowsStorage { * @pre should be called before keepInterestingWindows() * Compute, display, and return some diversity measures */ - json computeDiversity(int nb_segmented); + json computeDiversity(map nb_segmented_by_germline); /** * @pre sort() must have been called. diff --git a/algo/vidjil.cpp b/algo/vidjil.cpp index ca2f6f43c..f757a15f7 100644 --- a/algo/vidjil.cpp +++ b/algo/vidjil.cpp @@ -1298,7 +1298,17 @@ int main (int argc, char **argv) cout << endl; //$$ compute, display and store diversity measures - json jsonDiversity = windowsStorage->computeDiversity(nb_segmented); + json reads_germline; + map nb_segmented_by_germline; + for (list::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it){ + Germline *germline = *it ; + size_t nb = we.getNbReadsGermline(germline->code); + nb_segmented_by_germline[germline->code] = nb; + reads_germline[germline->code] = {nb}; + } + + nb_segmented_by_germline[ALL_LOCI] = nb_segmented; + json jsonDiversity = windowsStorage->computeDiversity(nb_segmented_by_germline); ////////////////////////////////// //$$ min_reads_clone (ou label) @@ -1764,13 +1774,6 @@ int main (int argc, char **argv) windowsStorage->clearSequences(); windowsStorage->sortedWindowsToOutput(&output, max_clones_id); - - json reads_germline; - for (list::const_iterator it = multigermline->germlines.begin(); it != multigermline->germlines.end(); ++it){ - Germline *germline = *it ; - reads_germline[germline->code] = {we.getNbReadsGermline(germline->code)}; - } - // Complete main output output.set("config", j_config); -- GitLab From 111ea244865d2c9c9d006ce648d1a754da457f2e Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 6 Jan 2022 19:58:28 +0100 Subject: [PATCH 4/9] core/windows.cpp: stdout, pretty-print diversity indices see #1781 --- algo/core/windows.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/algo/core/windows.cpp b/algo/core/windows.cpp index 6bc7ded3f..3211e8e07 100644 --- a/algo/core/windows.cpp +++ b/algo/core/windows.cpp @@ -269,11 +269,19 @@ json WindowsStorage::computeDiversity(map nb_segmented) { jsonDiversity["index_Ds_diversity"][code] = 1 - index_1_minus_Ds_diversity[code] / nb_seg_nb_seg_m1 ; } - cout << "Diversity measures" << endl - << " H = " << jsonDiversity["index_H_entropy"] << endl // Shannon's diversity - << " E = " << jsonDiversity["index_E_equitability"] << endl // Shannon's equitability - << " Ds = " << jsonDiversity["index_Ds_diversity"] << endl // Simpson's diversity - << endl; + // Pretty-print + cout << setw(24) << "Diversity measures" ; + for (const auto& kv: index_H_entropy) + cout << setw(6) << kv.first ; + cout << endl; + + for (const string index: {"index_H_entropy", "index_E_equitability", "index_Ds_diversity"}) + { + cout << " " << setw(22) << index ; + for (const auto& kv: index_H_entropy) + cout << fixed << setprecision(3) << setw(6) << (float) jsonDiversity[index][kv.first] ; + cout << endl; + } return jsonDiversity; } -- GitLab From fa623a0806a9cdd5d3e2fddbf26116b1b056093a Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Thu, 6 Jan 2022 20:01:47 +0100 Subject: [PATCH 5/9] tests: update --- algo/tests/should-get-tests/large-r.should | 4 ++-- algo/tests/should-get-tests/multi-detect.should | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/algo/tests/should-get-tests/large-r.should b/algo/tests/should-get-tests/large-r.should index 31a50de50..f9edcf209 100644 --- a/algo/tests/should-get-tests/large-r.should +++ b/algo/tests/should-get-tests/large-r.should @@ -8,5 +8,5 @@ $ Find a representative $ Compute the diversity. No diversity here. 1: 1 0.000 -1: E = 0.000 -1: Ds = 0.000 +1: E_.* 0.000 +1: Ds_.* 0.000 diff --git a/algo/tests/should-get-tests/multi-detect.should b/algo/tests/should-get-tests/multi-detect.should index c7bf95284..b77f29534 100644 --- a/algo/tests/should-get-tests/multi-detect.should +++ b/algo/tests/should-get-tests/multi-detect.should @@ -26,8 +26,8 @@ $ Detect one read on IGL $ Compute the diversity. All windows have only one read, full diversity. 7: 1 1.000 -1: E = 1.000 -1: Ds = 1.000 +1: E.* 1.000 +1: Ds.* 1.000 ### Focusing on Ig recombinations with -g:IGH,IGK,IGL !LAUNCH: $VIDJIL_DIR/$EXEC -g $VIDJIL_DIR/germline/homo-sapiens.g:IGH,IGK,IGL $VIDJIL_DATA/multi-complete.fa -- GitLab From b7d878c6a01f2934ed40d2e6200cde0293da791e Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Sun, 9 Jan 2022 01:05:55 +0100 Subject: [PATCH 6/9] tests: diversity-index --- algo/tests/data/diversity.fa | 51 +++++++++++++++++++ .../should-get-tests/diversity-index.should | 34 +++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 algo/tests/data/diversity.fa create mode 100644 algo/tests/should-get-tests/diversity-index.should diff --git a/algo/tests/data/diversity.fa b/algo/tests/data/diversity.fa new file mode 100644 index 000000000..b3b6a8686 --- /dev/null +++ b/algo/tests/data/diversity.fa @@ -0,0 +1,51 @@ + +# TRA: no diversity, 3 reads / 1 clone + +# 1 - Ds = 1 +# H = 0 +# E = 0 + +>TRA--V1-1*01--J1*01--1 +cctccttctacaggagctccagatgaaagactctgcctcttacttctgcgctgtgagaga +gtatgaaagtattacctcccagttgcaatttggcaaaggaaccagagtttccacttctcc +>TRA--V1-1*01--J1*01--2 +cctccttctacaggagctccagatgaaagactctgcctcttacttctgcgctgtgagaga +gtatgaaagtattacctcccagttgcaatttggcaaaggaaccagagtttccacttctcc +>TRA--V1-1*01--J1*01--3 +cctccttctacaggagctccagatgaaagactctgcctcttacttctgcgctgtgagaga +gtatgaaagtattacctcccagttgcaatttggcaaaggaaccagagtttccacttctcc + +# TRG: full diversity, 3 reads / 3 clones + +# 1 - Ds = 0 +# H = 3 * (- 1/3 ln 1/3) = ln 3 = 1.09861... +# E = H / ln 3 = 1 + +>TRG--V1*01--J1*01--1 +agactgcaaaatctaattaaaaatgattctgggttctattactgtgccacctgggacagg +gaattattataagaaactctttggcagtggaacaacactggttgtcacag +>TRG--V1*01--ACT--J1*01--2 +agactgcaaaatctaattaaaaatgattctgggttctattactgtgccacctgggacagg +ACT +gaattattataagaaactctttggcagtggaacaacactggttgtcacag +>TRG--V1*01--GG--J1*01--3 +agactgcaaaatctaattaaaaatgattctgggttctattactgtgccacctgggacagg +GG +gaattattataagaaactctttggcagtggaacaacactggttgtcacag + +# IGK: 1 clone with 2 reads, 1 clone with 1 read + +# 1 - Ds = (2*1 + 1*0) / (3*2) = 1/3 +# H = - 2/3 ln 2/3 - 1/3 ln 1/3 = 0.63651... +# E = H / ln 3 = 0.57938... + +>IGK--V1-12*01--J1*01--1 +cagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc +gtggacgttcggccaagggaccaaggtggaaatcaaac +>IGK--V1-12*01--J1*01--2 +cagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc +gtggacgttcggccaagggaccaaggtggaaatcaaac +>IGK--V1-12*01--CCG--J1*01--3 +cagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc +CCG +gtggacgttcggccaagggaccaaggtggaaatcaaac diff --git a/algo/tests/should-get-tests/diversity-index.should b/algo/tests/should-get-tests/diversity-index.should new file mode 100644 index 000000000..f63f03c7b --- /dev/null +++ b/algo/tests/should-get-tests/diversity-index.should @@ -0,0 +1,34 @@ + +!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:IGK ../data/diversity.fa + +$ Focusing on IGK recombination, three reads are detected +1: junction detected in 3 reads + +$ Diversity measures are correct +1:index_Ds_diversity 0.667 0.667 +1:index_H_entropy 0.637 0.637 +1:index_E_equitability 0.579 0.579 + + +!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRG,IGK ../data/diversity.fa + +$ Reads are all detected +1: junction detected in 9 reads + +$ Diversity measures are reported by germline +1:Diversity measures IGK TRA TRG + +$ Diversity measures are correct +1:index_Ds_diversity 0.667 0.000 1.000 +1:index_H_entropy 0.637 0.000 1.099 +1:index_E_equitability 0.579 0.000 1.000 + + +!NO_LAUNCHER: +!LAUNCH: cat out/diversity.vidjil +!OPTIONS: --mod jR + +$ Diversity measures are reported in the json +:diversity.index_Ds_diversity.all: 0\.88[89] +:diversity.index_H_entropy.IGK: 0\.63[67] +:diversity.index_E_equitability.TRA: 0\.0 -- GitLab From 71a0b3fe3cd8daef5c82397eb529d2f218e1c34d Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Sun, 9 Jan 2022 11:59:14 +0100 Subject: [PATCH 7/9] tdd: some diversity indices are not defined with only one read --- algo/tests/data/diversity.fa | 10 ++++++++++ .../should-get-tests/diversity-index.should | 17 +++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/algo/tests/data/diversity.fa b/algo/tests/data/diversity.fa index b3b6a8686..0a8ee1ad6 100644 --- a/algo/tests/data/diversity.fa +++ b/algo/tests/data/diversity.fa @@ -49,3 +49,13 @@ gtggacgttcggccaagggaccaaggtggaaatcaaac cagcctgcagcctgaagattttgcaacttactattgtcaacaggctaacagtttccctcc CCG gtggacgttcggccaagggaccaaggtggaaatcaaac + +# IGL: only 1 read, some indices are not defined + +# 1 - Ds = 0 +# H and E not defined + +>IGLV2-14*01 12/4/0 IGLJ3*02 +TAATCGCTTCTCTGGCTCCAAGTCTGGCAACACGGCCTCCCTGACCATCTCTGG +GCTCCAGGCGAGGACGAGGCTGATTATTACTGCAGCTCATATACAAGCTTTCTT +GGGTGTTCGGCGGAGGGACCAAGCTG \ No newline at end of file diff --git a/algo/tests/should-get-tests/diversity-index.should b/algo/tests/should-get-tests/diversity-index.should index f63f03c7b..d8f802a2f 100644 --- a/algo/tests/should-get-tests/diversity-index.should +++ b/algo/tests/should-get-tests/diversity-index.should @@ -10,25 +10,26 @@ $ Diversity measures are correct 1:index_E_equitability 0.579 0.579 -!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRG,IGK ../data/diversity.fa +!LAUNCH: $VIDJIL_DIR/$EXEC -c clones -g $VIDJIL_DIR/germline/homo-sapiens.g:TRA,TRG,IGK,IGL ../data/diversity.fa $ Reads are all detected -1: junction detected in 9 reads +1: junction detected in 10 reads $ Diversity measures are reported by germline -1:Diversity measures IGK TRA TRG +1:Diversity measures IGK IGL TRA TRG $ Diversity measures are correct -1:index_Ds_diversity 0.667 0.000 1.000 -1:index_H_entropy 0.637 0.000 1.099 -1:index_E_equitability 0.579 0.000 1.000 +1:index_Ds_diversity 0.667 - 0.000 1.000 +1:index_H_entropy 0.637 - 0.000 1.099 +1:index_E_equitability 0.579 - 0.000 1.000 !NO_LAUNCHER: !LAUNCH: cat out/diversity.vidjil !OPTIONS: --mod jR -$ Diversity measures are reported in the json -:diversity.index_Ds_diversity.all: 0\.88[89] +$ Diversity measures are reported in the json, except when there are not defined +:diversity.index_Ds_diversity.all: 0\.911 :diversity.index_H_entropy.IGK: 0\.63[67] :diversity.index_E_equitability.TRA: 0\.0 +0:diversity.index_E_equitability.IGL: -- GitLab From 3245bb369b4f5a77a5de092008f5599c064f6cb9 Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Sun, 9 Jan 2022 13:09:15 +0100 Subject: [PATCH 8/9] core/windows.cpp: do not export diversity measures when they are not defined see #1781 --- algo/core/windows.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/algo/core/windows.cpp b/algo/core/windows.cpp index 3211e8e07..d1c673065 100644 --- a/algo/core/windows.cpp +++ b/algo/core/windows.cpp @@ -258,6 +258,11 @@ json WindowsStorage::computeDiversity(map nb_segmented) { for (const auto& kv: index_H_entropy) { string code = kv.first ; + + // Only one read + if (nb_segmented[code] <= 1) + continue ; + // Shannon's diversity jsonDiversity["index_H_entropy"][code] = kv.second; @@ -279,7 +284,13 @@ json WindowsStorage::computeDiversity(map nb_segmented) { { cout << " " << setw(22) << index ; for (const auto& kv: index_H_entropy) - cout << fixed << setprecision(3) << setw(6) << (float) jsonDiversity[index][kv.first] ; + { + cout << setw(6) ; + if (jsonDiversity[index].contains(kv.first)) + cout << fixed << setprecision(3) << (float) jsonDiversity[index][kv.first]; + else + cout << "-" ; + } cout << endl; } -- GitLab From 61f2c12f5b60776ee1cb7d769ee2e1ef57bc640a Mon Sep 17 00:00:00 2001 From: Mathieu Giraud Date: Mon, 10 Jan 2022 08:52:37 +0100 Subject: [PATCH 9/9] doc/vidjil-algo.md: diversity by locus closes #1781 --- doc/vidjil-algo.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/vidjil-algo.md b/doc/vidjil-algo.md index 3e798a9ef..a58123b64 100644 --- a/doc/vidjil-algo.md +++ b/doc/vidjil-algo.md @@ -675,7 +675,8 @@ The `--out-reads` option produces large files, and is not recommended in general ## Diversity measures -Several [diversity indices](https://en.wikipedia.org/wiki/Diversity_index) are reported, both on the standard output and in the `.vidjil` file: +Several [diversity indices](https://en.wikipedia.org/wiki/Diversity_index) are reported, both on the standard output and in the `.vidjil` file, +for each germline/locus as well as for the entire data: - H (`index_H_entropy`): Shannon's diversity - E (`index_E_equitability`): Shannon's equitability @@ -683,7 +684,7 @@ Several [diversity indices](https://en.wikipedia.org/wiki/Diversity_index) are r E ans Ds values are between 0 (no diversity, one clone clusters all analyzed reads) and 1 (full diversity, each analyzed read belongs to a different clone). -These values are now computed on the windows, before any further clustering. +These values are computed on the full list of clones, before any further clustering. PCR and sequencing errors can thus lead to slightly over-estimate the diversity. ## Reads without detected recombinations -- GitLab