dynprog.h 4.08 KB
Newer Older
Mikaël Salson's avatar
Mikaël Salson committed
1 2 3 4 5 6 7 8 9 10
#ifndef DYNPROG_H
#define DYNPROG_H

#include <string>
#include <vector>

#include <iostream>
#include <iomanip>


Mikaël Salson's avatar
Mikaël Salson committed
11
#define MINUS_INF -9999
Mikaël Salson's avatar
Mikaël Salson committed
12 13 14 15 16 17

using namespace std;


float identity_percent(int score);

18
typedef struct {
19
  char type;
20 21
  int i;
  int j;
22 23 24
  int score;
} operation;

25

Mikaël Salson's avatar
Mikaël Salson committed
26 27 28
class Cost
{
 public:
29 30
  bool debug = false;

Mikaël Salson's avatar
Mikaël Salson committed
31 32 33 34 35 36 37 38 39
  int match;
  int mismatch;

  /**
   * @param homopolymer: if MINUS_INF => same score as indel 
   */

  // del_end -> utilise seulement pour LocalEndWithSomeDeletions
  Cost (int match, int mismatch, int indel, int del_end = 0, int homopolymer = MINUS_INF);
40 41
  // affine gaps
  Cost(int match, int mismatch, int open_gap, int extend_gap, int del_end, int homopolymer);
Mikaël Salson's avatar
Mikaël Salson committed
42 43 44 45 46 47 48 49
  Cost ();

  int insertion;
  int deletion;
  int deletion_end;
  int homopolymer;
  int substitution(char a, char b);
  int homo2(char xa, char xb, char y);
50

51 52 53
  /**
   * @return p-value of having a random alignment of the given score
   */
54
  double toPValue(const int score);
55

56 57 58 59 60 61
  int open_insertion;
  int open_deletion;
  int extend_insertion;
  int extend_deletion;

  bool affine_gap;
62 63 64 65

  void estimate_K_lambda();
  float K;
  float lambda;
Mikaël Salson's avatar
Mikaël Salson committed
66 67 68 69
};


ostream& operator<<(ostream& out, const Cost& cost);
70
string string_of_cost(const Cost cost);
Mikaël Salson's avatar
Mikaël Salson committed
71

72 73 74 75
// Usual costs
const Cost Hamming = Cost(0, -1, MINUS_INF);
const Cost Levenshtein = Cost(0, -1, -1);
const Cost DNA = Cost(+5, -4, -10);
Mikaël Salson's avatar
Mikaël Salson committed
76

77
// Vidjil costs
78 79
const Cost VDJ = Cost(+4, -6, -10, -1);
const Cost VDJaffine = Cost(+4, -6, -15, -1, -1, MINUS_INF);
Mikaël Salson's avatar
Mikaël Salson committed
80 81 82 83

const Cost IdentityDirty = Cost(+1000, -1, -1); // pour avoir une estimation de longueur de l'alignement, utilise dans compare-all
const Cost Cluster = Cost(+1, -4, -4, 0, 0);

84 85 86 87 88 89 90 91 92 93 94

const char* const mode_description[] = {
  "XXX",
  "Local",
  "LocalEndWithSomeDeletions",
  "SemiGlobalTrans",
  "SemiGlobal",
  "GlobalButMostlyLocal",
  "Global"
} ;

Mikaël Salson's avatar
Mikaël Salson committed
95 96 97 98
class DynProg
{
 public:
  enum DynProgMode {
99
    XXX,
Mikaël Salson's avatar
Mikaël Salson committed
100 101 102 103
    Local,            // partial x / partial y
    LocalEndWithSomeDeletions, // local + some deletions on __
    SemiGlobalTrans,  // start-to-partial x / partial-to-end y 
    SemiGlobal,       // complete x / partial y
Mathieu Giraud's avatar
Mathieu Giraud committed
104
    GlobalButMostlyLocal,  // complete x / complete y, but deletions at begin (DONE) and end (TODO) are cheaper
Mikaël Salson's avatar
Mikaël Salson committed
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
    Global            // complete x / complete y
  } ;

  DynProgMode mode ;
  Cost cost;

 private:
  string x ;
  string y ;
  int m ;
  int n ;
  bool reverse_x ;
  bool reverse_y ;

 public:
  int best_score ;
  int best_i ;                  /* Start at 1 */
  int best_j ;                  /* Start at 1 */
  int first_i ;                 /* Start at 1 */
  int first_j ;                 /* Start at 1 */
125

Mikaël Salson's avatar
Mikaël Salson committed
126
  string str_back ;
127 128
  int marked_pos_i ; // To be computed (in x)
  int marked_pos_j ; // Given (in y)
Mikaël Salson's avatar
Mikaël Salson committed
129

130 131 132
  DynProg(const string &x, const string &y, DynProgMode mode, const Cost &c,
          const bool reverse_x=false, const bool reverse_y=false,
          const int marked_pos_j=0);
Mikaël Salson's avatar
Mikaël Salson committed
133 134
  ~DynProg();
  void init();
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149


  /*
   * Launch the DP matrix computation
   *
   * @param onlyBottomTriangle:      limits the DP matrix computation to its bottom 'half'
   *                                 (the reference point being the 'last' point (m,n))
   *                                 (if the matrix is not square, this 'half' will be larger or smaller than 50%)
   *
   * @param onlyBottomTriangleShift: shifts the border between the top 'half' and the bottom 'half' of the DP matrix
   *                                 (when this value is positive, slighty more than 'half' of the matrix will be computed)
   */

  int compute(bool onlyBottomTriangle = false, int onlyBottomTriangleShift = 0);

Mikaël Salson's avatar
Mikaël Salson committed
150 151
  void backtrack();
  void SemiGlobal_hits_threshold(vector<int> &hits, int threshold, int shift_pos, int verbose);
Mikaël Salson's avatar
Mikaël Salson committed
152

Mikaël Salson's avatar
Mikaël Salson committed
153 154
  string SemiGlobal_extract_best();

155 156
  int best_score_on_i(int i, int *best_j);

Mikaël Salson's avatar
Mikaël Salson committed
157 158
  friend ostream& operator<<(ostream& out, const DynProg& dp);
  
159
  operation **B;  // Score and backtrack matrix
160 161
  operation **Bins ; // affine gap
  operation **Bdel ; // affine gap
Mikaël Salson's avatar
Mikaël Salson committed
162 163 164
  int *gap1 ;
  int *linkgap ;
  int *gap2 ;
Mikaël Salson's avatar
Mikaël Salson committed
165 166 167 168 169 170 171 172 173

};

ostream& operator<<(ostream& out, const DynProg& dp);

Cost strToCost(string str, Cost default_cost);

#endif