dynprog.h 2.97 KB
Newer Older
Mikael Salson's avatar
Mikael Salson committed
1 2 3 4 5 6 7 8 9 10
#ifndef DYNPROG_H
#define DYNPROG_H

#include <string>
#include <vector>

#include <iostream>
#include <iomanip>


Mikael Salson's avatar
Mikael Salson committed
11
#define MINUS_INF -9999
Mikael Salson's avatar
Mikael Salson committed
12 13 14 15 16 17

using namespace std;


float identity_percent(int score);

18
typedef struct {
19
  char type;
20 21
  int i;
  int j;
22 23 24
  int score;
} operation;

25

Mikael Salson's avatar
Mikael Salson committed
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
class Cost
{
 public:
  int match;
  int mismatch;

  /**
   * @param homopolymer: if MINUS_INF => same score as indel 
   */

  // del_end -> utilise seulement pour LocalEndWithSomeDeletions
  Cost (int match, int mismatch, int indel, int del_end = 0, int homopolymer = MINUS_INF);
  Cost ();

  int insertion;
  int deletion;
  int deletion_end;
  int homopolymer;
  int substitution(char a, char b);
  //int ins(char current, char next);
  //int del(char current, char next);
  int homo2(char xa, char xb, char y);
};


ostream& operator<<(ostream& out, const Cost& cost);


/* const Cost DNA = Cost(+5, -4, -10); */
/* const Cost VDJ = Cost(+5, -8, -8, -1); */

const Cost DNA = Cost(+5, -4, -10, 0, 0);
Mathieu Giraud's avatar
Mathieu Giraud committed
58
const Cost VDJ = Cost(+4, -6, -10, -1, -2);
Mikael Salson's avatar
Mikael Salson committed
59 60 61 62 63 64 65
const Cost Identity = Cost(+1, -1, -1, 0, 0);

const Cost Homopolymers = Cost(+1, MINUS_INF, -1); // TODO: true homopolymer
const Cost IdentityToto= Cost(+1, -1, -1); // avec seuil de length-2: un homopoly xou une substituion
/* const Cost Identity = Cost(+1, 0, 0); */
const Cost IdentityDirty = Cost(+1000, -1, -1); // pour avoir une estimation de longueur de l'alignement, utilise dans compare-all
const Cost Hamming = Cost(0, -1, MINUS_INF);
WebTogz's avatar
WebTogz committed
66
const Cost Levenshtein = Cost(0, -1, -1);
Mikael Salson's avatar
Mikael Salson committed
67 68 69 70 71 72 73 74 75 76 77 78
const Cost Cluster = Cost(+1, -4, -4, 0, 0);

//const Cost Hamming = Cost();

class DynProg
{
 public:
  enum DynProgMode {
    Local,            // partial x / partial y
    LocalEndWithSomeDeletions, // local + some deletions on __
    SemiGlobalTrans,  // start-to-partial x / partial-to-end y 
    SemiGlobal,       // complete x / partial y
Mathieu Giraud's avatar
Mathieu Giraud committed
79
    GlobalButMostlyLocal,  // complete x / complete y, but deletions at begin (DONE) and end (TODO) are cheaper
Mikael Salson's avatar
Mikael Salson committed
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
    Global            // complete x / complete y
  } ;

  DynProgMode mode ;
  Cost cost;

 private:
  string x ;
  string y ;
  int m ;
  int n ;
  bool reverse_x ;
  bool reverse_y ;

 public:
  int best_score ;
  int best_i ;                  /* Start at 1 */
  int best_j ;                  /* Start at 1 */
  int first_i ;                 /* Start at 1 */
  int first_j ;                 /* Start at 1 */
  string str_back ;

  DynProg(const string &x, const string &y, DynProgMode mode, const Cost &c, const bool reverse_x=false, const bool reverse_y=false);
  ~DynProg();
  void init();
  int compute();
  void backtrack();
  void SemiGlobal_hits_threshold(vector<int> &hits, int threshold, int shift_pos, int verbose);
Mikael Salson's avatar
Mikael Salson committed
108

Mikael Salson's avatar
Mikael Salson committed
109 110 111 112
  string SemiGlobal_extract_best();

  friend ostream& operator<<(ostream& out, const DynProg& dp);
  
113 114
  int **S;        // Score matrix
  operation **B;  // Backtrack matrix
Mikael Salson's avatar
Mikael Salson committed
115 116 117
  int *gap1 ;
  int *linkgap ;
  int *gap2 ;
Mikael Salson's avatar
Mikael Salson committed
118 119 120 121 122 123 124 125 126

};

ostream& operator<<(ostream& out, const DynProg& dp);

Cost strToCost(string str, Cost default_cost);

#endif