 ...

Commits (9)
 ... ... @@ -357,11 +357,57 @@ int DynProg::compute(bool onlyBottomTriangle, int onlyBottomTriangleShift) { int tbest = best.score ; if (mode == LocalEndWithSomeDeletions) tbest += cost.deletion_end*(n-j); if (mode == LocalEndWithSomeDeletions) { int context_length = 10; pair back = go_back(context_length, B[i][j]); int score_back = back.first.score; // Try to guess the nb of differences (here only mismatches) based on scores // Just a consequence of the formula (n - e) * match + e * mismatch = score int nb_mismatch_left_part = max(0., (score_back - (back.first.j+1) * cost.match) * 1. / (cost.mismatch - cost.match)); int standardized_mismatch_left_part = nb_mismatch_left_part * back.second / max(1, (back.first.j + 1)); float percent_error_right_part = 1 - nb_mismatch_left_part * 1. / max(1, (back.first.j+1)); // Heuristically guess the number of mismatches in the back.second last aligned letters int nb_mismatch_right_part = max(0., ((tbest - score_back) - back.second * cost.match)*1. / (cost.mismatch - cost.match)); float proba_error = 0; // proba_error : probability that the error scheme at the end is different from what we had before // probability that we had more errors in the right part than the left part for (int i = standardized_mismatch_left_part+1; i <= nb_mismatch_right_part; i++) { proba_error += pow(1 - percent_error_right_part, i) * pow(percent_error_right_part, back.second - i) * nChoosek(back.second, i); } float del_end_cost = cost.deletion_end * (1 - proba_error) + cost.deletion * proba_error; #ifdef DEBUG_DEL_END_SCORE // if (n-j <= 15 && m -i <= 5) { PRINT_VAR(nb_mismatch); PRINT_VAR(back.second); PRINT_VAR(nb_mismatch_left_part); PRINT_VAR(back.first.j); PRINT_VAR(standardized_mismatch_left_part); PRINT_VAR(tbest); PRINT_VAR(percent_error_right_part); // PRINT_VAR(percent_score_back); PRINT_VAR(score_back); PRINT_VAR(proba_error); PRINT_VAR(del_end_cost); PRINT_VAR(i); PRINT_VAR(j); cerr << (n-j) << " deletions at the end" << endl; cerr << "score would be " << tbest + del_end_cost * (n - j) << endl << endl; // } #endif tbest += del_end_cost*(n-j); } if (tbest > best_score) { #ifdef DEBUG_DEL_END_SCORE cerr << " *** This is the best score for now " << tbest << " *** " << endl; PRINT_VAR(i); PRINT_VAR(j); #endif best_score = tbest ; best_i = i ; best_j = j ; ... ... @@ -447,6 +493,14 @@ int DynProg::compute(bool onlyBottomTriangle, int onlyBottomTriangleShift) return best_score ; } pair DynProg::go_back(int nb_positions, operation cell) { int nb; for (nb = 0; nb < nb_positions && cell.type != FIN && cell.i >= 0 && cell.j >= 0; nb++) { cell = B[cell.i][cell.j]; } return make_pair(cell, nb); } int DynProg::best_score_on_i(int i, int *best_j) { int best_score = MINUS_INF ; ... ...
 ... ... @@ -74,7 +74,7 @@ const Cost Levenshtein = Cost(0, -1, -1); const Cost DNA = Cost(+5, -4, -10); // Vidjil costs const Cost VDJ = Cost(+4, -6, -10, -1); const Cost VDJ = Cost(+40, -100, -160, -3); const Cost VDJaffine = Cost(+4, -6, -15, -1, -1, MINUS_INF); const Cost IdentityDirty = Cost(+1000, -1, -1); // pour avoir une estimation de longueur de l'alignement, utilise dans compare-all ... ... @@ -147,6 +147,15 @@ class DynProg int compute(bool onlyBottomTriangle = false, int onlyBottomTriangleShift = 0); void backtrack(); /** * Go back in the alignment by nb_positions (at most) by starting at * the given cell. * @return the corresponding cell in the matrix and the number of * steps that have been performed (<= nb_positions) */ pair go_back(int nb_positions, operation cell); void SemiGlobal_hits_threshold(vector &hits, int threshold, int shift_pos, int verbose); string SemiGlobal_extract_best(); ... ...
 # For the moment 9/CTA/2 >TRGV2*01 12/CCCCTA/2 TRGJ1*02 [TRG] GGAAGGCCCCACAGCGTCTTCAGTACTATGACTCCTACAACTCCAAGGTTGTGTTGGAATCAGGAGTCAGTCCAGGGAAGTATTATACTTACGCAAGCACAAGGAACAACTTGAGATTGATACTGCGAAATCTAATTGAAAATGACTCTGGGGTCTATTACTGTGCCCCC CTA ATTATAAGAAACTCTTTGGCAGTGGAACAACAC # For the moment 0//10 >TRGV4*01 5/ATGGG/10 TRGJP1*01 [TRG] GGAAGGCCCCACAGCGTCTTCTGTACTATGACTCCTACACCTCCAGCGTTGTGTTGGAATCAGGAATCAGCCCAGGGAAGTATGATACTTATGGAAGCACAAGGAAGAACTTGAGAATGATACTGCGAAATCTTATTGAAAATGACTCTGGAGTCTATTACTGTGCCACCTGGGCTCGGTTGGTTCAAGATATTTGCTGAAGGGACTAAGCTCATAGTAACTTCGCCTGGTAA
 # IGHV3-30 GGAGGTCCCTGAGACTCTCCTGTGCAGCCTCTGGATTCACCTTCAGTAGCTATGCTATGCACTGGGTCCGCCAGGCTCCAGGCAAGGGGCTAGAGTGGGTGGCAGTTATATCATATGATGGAAGTAATAAATACTACGCAGACTCCGTGAAGGGCCGATTCACCATCTCCAG # |||||||||||||||||||||||||||||||||||||||| # |||||||||| |||||| |||||||||||||||||||||| # read GGAGGTCCCTAAGACTCCCCTGTGCAGCCTCTGGATTCACTGGTCACCGTCTCCTCAGGTAAG # ||||||||||||||||||| # IGHJ4 ACTACTTTGACTACTGGGGCCAAGGAACCCTGGTCACCGTCTCCTCAG ... ...
 #include "core/dynprog.h" void testEndDeletions() { string v = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; vector seq = { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "AAAAAAAAGAAAGAAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGAA", "AAAAAAAAGAAAGAAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGGG", "AAAAAAAAGAAAGAAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGGA", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAGAAGAAGAGAGAGAGGGA", }; vector del_ends = {0, 0, 3, 3, 3, 20}; for (size_t i = 0; i < seq.size(); i++) { DynProg dp(v, seq[i], DynProg::LocalEndWithSomeDeletions, VDJ); dp.compute(); TAP_TEST_EQUAL(seq[i].length()-1 - dp.best_j, del_ends[i], TEST_DYNPROG_DEL_END, "dp.best_j = " << dp.best_j <<"\ti=" << i); } } void testDynProg() { testEndDeletions(); }
 ... ... @@ -15,6 +15,7 @@ #include "testWindowsStorage.cpp" #include "testReadStorage.cpp" #include "testAutomaton.cpp" #include "testDynProg.cpp" int main(void) { TAP_START(NB_TESTS); ... ... @@ -35,6 +36,7 @@ int main(void) { testWindowStorage(); testReadStorage(); testAutomaton(); testDynProg(); TAP_END_TEST_EXIT }
 ... ... @@ -172,6 +172,9 @@ enum { TEST_BRS_GET_NB_SCORES, TEST_BRS_GET_BEST_READS, /* Dynprog */ TEST_DYNPROG_DEL_END, /* Bugs */ TEST_BUG_SEGMENTATION, TEST_SEGMENT_POSITION, ... ... @@ -337,6 +340,10 @@ inline void declare_tests() { RECORD_TAP_TEST(TEST_KMER_REPRESENTATIVE_REQUIRED_SEQ, "Test KmerRepresentativeComputer computations with a required sequence"); RECORD_TAP_TEST(TEST_KMER_REPRESENTATIVE_REVCOMP, "Test KmerRepresentativeComputer computations on a dataset and its revcomp"); RECORD_TAP_TEST(TEST_KMER_REPRESENTATIVE_QUALITY, "Test KmerRepresentativeComputer quality computations"); RECORD_TAP_TEST(TEST_DYNPROG_DEL_END, "Test number of end deletions for Dynamic Programming"); RECORD_TAP_TEST(TEST_BUG_SEGMENTATION, "Test segmentation bug"); RECORD_TAP_TEST(TEST_SEGMENT_POSITION, "Test V,D,J position"); RECORD_TAP_TEST(TEST_KMER_SEGMENT_OVERLAP, "Test kmer segmentation with an overlap"); ... ...