align.cpp 3.48 KB
Newer Older
Mikaël Salson's avatar
Mikaël Salson committed
1
2
3
4
5
6

#include <fstream>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <cstdlib>
7
#include <stdlib.h>   
WebTogz's avatar
WebTogz committed
8
#include <core/dynprog.h>
9
#include <core/bioreader.hpp>
10
#include "CLI11.hpp"
Mikaël Salson's avatar
Mikaël Salson committed
11
12
13
14

using namespace std;


15
16
17
// Should be moved in core/dynprog

DynProg::DynProgMode getdpMode(int mode)
Mikaël Salson's avatar
Mikaël Salson committed
18
19
{
  DynProg::DynProgMode dpMode = DynProg::Local;
20
21
22
23
24
25
26
27
28
29
30
  if (mode == 1) dpMode = DynProg::Local;
  if (mode == 2) dpMode = DynProg::LocalEndWithSomeDeletions;
  if (mode == 3) dpMode = DynProg::SemiGlobalTrans;
  if (mode == 4) dpMode = DynProg::SemiGlobal;
  if (mode == 5) dpMode = DynProg::GlobalButMostlyLocal;
  if (mode == 6) dpMode = DynProg::Global;
  return dpMode ;
}

Cost getCost(int cost)
{
Mikaël Salson's avatar
Mikaël Salson committed
31
32
  Cost dpCost = VDJ;

33
34
35
36
37
38
39
40
41
42
43
44
  if (cost == 1) dpCost = DNA;
  if (cost == 2) dpCost = VDJ;
  if (cost == 9) dpCost = VDJaffine;
  if (cost == 5) dpCost = IdentityDirty;
  if (cost == 6) dpCost = Hamming;
  if (cost == 7) dpCost = Levenshtein;
  if (cost == 8) dpCost = Cluster;

  return dpCost ;
}  


45
int main(int argc, char** argv)
46
{
47
  CLI::App app{"Align sequences using core/dynprog.cpp.\n The two sequences can be taken in either one or two fasta files."};
48

49
50
  int i = 0;
  int j = 0;
Mathieu Giraud's avatar
Mathieu Giraud committed
51
52
53
54
55
  app.add_option("-i", i, "Sequence number in file1", true)
    ->group("Sequence selection");

  app.add_option("-j", j, "Sequence number in file2", true)
    ->group("Sequence selection");
Mikaël Salson's avatar
Mikaël Salson committed
56
  
57
  // Files
58
59
  string file1 = "" ;
  string file2 = "" ;
60
61
62
  app.add_option("file1", file1, "file1 (.fa)")
    ->check(CLI::ExistingFile)
    ->required();
63
  app.add_option("file2", file2, "file2 (.fa) [when not given, take file1 twice]")
64
    ->check(CLI::ExistingFile);
65
66
67

  // Mode
  int m = 0;
68
69
70
71
72
73
74
75
  app.add_option("-m,--mode", m, R"(Mode
    0       loop on all modes
    1       Local
    2       LocalEndWithSomeDeletions
    3       SemiGlobalTrans
    4       SemiGlobal
    5       GlobalButMostlyLocal
    6       Global)", true);
76
77
78

  // Cost
  int cost = 0;
79
80
81
82
83
84
85
86
  app.add_option("-c,--cost", cost, R"(Cost
    1       DNA
    2       VDJ
    9       VDJaffine
    5       IdentityDirty
    6       Hammong
    7       Levenshtein
    8       Cluster)", true);
87
88
89
90

  // Matrix
  bool matrix = false;
  app.add_flag("-x,--matrix", matrix, "Display matrix");
Mikaël Salson's avatar
Mikaël Salson committed
91
  
92
93
94
  // Options parsing
  CLI11_PARSE(app, argc, argv);

95
96
97
  if (!file2.size())
    file2 = file1;

98
  // ----------------------------------------------------------
Mikaël Salson's avatar
Mikaël Salson committed
99
  
100
  // Read files
101
102
  BioReader fasta1(file1, 1, " ");
  BioReader fasta2(file2, 1, " ");
Mikaël Salson's avatar
Mikaël Salson committed
103
  
104
105
  // Sequences
  if (i >= fasta1.size())
Mikaël Salson's avatar
Mikaël Salson committed
106
    {
107
      cerr << "ERROR : no sequence #" << i << " in " << file1 << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
108
109
110
      exit(1);
    }
  
111
  if (j >= fasta2.size())
Mikaël Salson's avatar
Mikaël Salson committed
112
    {
113
      cerr << "ERROR : no sequence #" << j << " in " << file2 << endl ;
Mikaël Salson's avatar
Mikaël Salson committed
114
115
116
      exit(1);
    }

117
118
119
120
  string seq1 = fasta1.sequence(i);
  string seq2 = fasta2.sequence(j);

  cout << ">" << fasta1.label(i) << "\t" << file1 << " " << i << endl << seq1 << endl;
121
  cout << ">" << fasta2.label(j) << "\t" << file2 << " " << j << endl << seq2 << endl;
122
123
124
125
  cout << endl;

  
  // Cost
126
  Cost dpCost = getCost(cost);
127
  dpCost.debug = true ;
128
129
130
131
132
133
134
135
136
137
138
  cout << "Cost: " << dpCost << endl;
  cout << endl;

  
  // Mode
  for (int mm=1; mm<=6; mm++)
    {      
      int this_m = m > 0 ? m : mm ;
      cout << "===== -m " << this_m << " : " << mode_description[this_m] << endl ;
      DynProg::DynProgMode dpMode = getdpMode(this_m);
    DynProg dp = DynProg(seq1, seq2, dpMode, dpCost);
Mikaël Salson's avatar
Mikaël Salson committed
139
140
141

    dp.compute(); 
    dp.backtrack();
142

143
    if (matrix)
144
      cout << dp;
Mikaël Salson's avatar
Mikaël Salson committed
145
146
    
    cout << dp.str_back << endl;
147
148
149

    if (m)
      break ;      
Mikaël Salson's avatar
Mikaël Salson committed
150
151
  }
}