Commit 3139b094 authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-a/3866-random-sample-by-default' into 'dev'

core/representative.h, vidjil.cpp: RandomScore by default

See merge request !464
parents ed299a22 18565743
Pipeline #78605 failed with stages
in 1 minute and 25 seconds
......@@ -14,7 +14,7 @@ using namespace std;
#define THRESHOLD_BAD_COVERAGE .5 /* Threshold below which the representatie
coverage is considered bad */
static ReadQualityScore DEFAULT_READ_SCORE;
static RandomScore DEFAULT_READ_SCORE;
/**
* Compute a representative sequence from a list of sequences.
......
......@@ -26,7 +26,7 @@ $ First clone -- find the good number of reads
2:clone-001--.*--0000008
$ First clone -- find the good representative
1:clone-001--.*--lcl.FLN1FA001BQ9J5.1.-.88,232.-.4
1:GACAATTCCAAGAACACGCTGTACCTGCAAATGAACAGCCTGCGAGCCGAGGACACGGCCACCTATTACTGTACCCGGGAGGAACAATATAGCAGCTGGTACTTTGACTTCTGGGGCCAGGGGATCCTGGTCACCGTCTCCTCAG
$ First clone -- find the good coverage
1:clone-001--.* 145 bp .62. of 232.0 bp.
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-default.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-random-sample $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG --consensus-on-longest-sequences $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-longest.log
!LAUNCH: $VIDJIL_DIR/$EXEC -w 20 -g $VIDJIL_DIR/germline/homo-sapiens.g:TRG $VIDJIL_DATA/test-random-consensus.fa.gz > consensus-random.log
!NO_LAUNCHER:
!LAUNCH: diff consensus-default.log consensus-random.log
!LAUNCH: diff consensus-longest.log consensus-random.log
!EXIT_CODE: 1
$ Output should differ: default has a consensus of 52bp (with the spurious insertion)
$ Output should differ: ReadQualityScore gives a consensus of 52bp (with the spurious insertion)
# Appears twice in the header of the consensus sequence and in the similarity matrix
2:^< .* 52 bp
1:^< CTTTT
......
#include "core/bioreader.hpp"
#include "core/representative.h"
#include "core/read_score.h"
void testRepresentative() {
list<Sequence> reads = BioReader("data/representative.fq").getAll();
......@@ -13,7 +14,8 @@ void testRepresentative() {
krc.setCoverageReferenceLength(50);
krc.setRequiredSequence("CCGGGGGGGGGGTTT");
krc.compute();
ReadQualityScore vrs = ReadQualityScore();
krc.compute(vrs);
Sequence representative = krc.getRepresentative();
// Seq3 is the longest it should be taken when performing 0 extra iteration
......@@ -21,20 +23,20 @@ void testRepresentative() {
"If we take the first representative we should have seq3, and not at the beginning (" << representative.label << " instead)");
krc.setStabilityLimit(1);
krc.compute();
krc.compute(vrs);
representative = krc.getRepresentative();
TAP_TEST_EQUAL(representative.label.find("seq3-[37,73]"), 0, TEST_KMER_REPRESENTATIVE,
"When allowing one step before stability, we should still have seq3 (" << representative.label << " instead)");
krc.setStabilityLimit(2);
krc.compute();
krc.compute(vrs);
representative = krc.getRepresentative();
TAP_TEST_EQUAL(representative.label.find("seq1-[0,41]"), 0, TEST_KMER_REPRESENTATIVE,
"When allowing two steps before stability, we should reach seq1 (" << representative.label << " instead)");
krc.setRevcomp(true);
krc.setRequiredSequence("ATCGCGCCCT"); // revcomp
krc.compute();
krc.compute(vrs);
representative = krc.getRepresentative();
string quality = krc.getQuality();
TAP_TEST_EQUAL(representative.label.find("seq2-[33,52]"), 0, TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ,
......@@ -47,7 +49,7 @@ void testRepresentative() {
krc.setRevcomp(false);
krc.compute();
krc.compute(vrs);
TAP_TEST(! krc.hasRepresentative(), TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ,
"When requiring sequence AGGGCGCGAT and revcomp=false, we shouldn't find anything (the sequence is revcomp-ed)");
}
......
#include <core/windows.h>
#include <core/germline.h>
#include <core/bioreader.hpp>
#include <core/read_score.h>
#include <map>
void testWSAdd() {
......@@ -110,6 +111,8 @@ void testWSAdd() {
void testWSAddWithLimit() {
map<string, string> labels;
WindowsStorage ws(labels);
ReadQualityScore rqs;
ws.setScorer(&rqs);
ws.setMaximalNbReadsPerWindow(3);
ws.setBinParameters(1, 20);
Sequence seq = {"label", "l", "GATACATTAGACAGCT", "", 0};
......
......@@ -471,12 +471,12 @@ int main (int argc, char **argv)
-> group(group);
VirtualReadScore *readScorer = &DEFAULT_READ_SCORE;
RandomScore randomScore;
app.add_flag_function("--consensus-on-random-sample",
[&readScorer, &randomScore](size_t n) {
ReadQualityScore readQualityScore;
app.add_flag_function("--consensus-on-longest-sequences",
[&readScorer, &readQualityScore](size_t n) {
UNUSED(n);
readScorer = &randomScore;
}, "for large clones, use a random sample of reads to compute the consensus sequence (instead of a sample of the longest and highest quality reads)")
readScorer = &readQualityScore;
}, "for large clones, use a sample of the longest and highest quality reads to compute the consensus sequence (instead of a random sample)")
->group(group) -> level();
// ----------------------------------------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment