align.cpp 3.8 KB
Newer Older
Mikael Salson's avatar
Mikael Salson committed
1 2 3 4 5 6

#include <fstream>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <cstdlib>
7
#include <stdlib.h>   
WebTogz's avatar
WebTogz committed
8 9
#include <core/dynprog.h>
#include <core/fasta.h>
10
#include "docopt.h"
Mikael Salson's avatar
Mikael Salson committed
11 12 13 14

using namespace std;


15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55

// très sensible aux espaces, tabulations...

static const char VERSION[] = "align beta" ;

static const char USAGE[] =
R"(Align sequences using core/dynprog.cpp
The two sequences can be taken in either one or two fasta files.

    Usage:       
      align [options] <file>         
      align [options] <file> <file>  

    Options:
      -h --help     Show help
      --version     Show version
      -i <i>        Sequence number in file1 [default: 0]
      -j <j>        Sequence number in file2 [default: 0]
      -m <mode>     Mode [default: 0]
                0       loop on all modes
                1       Local 
                2       LocalEndWithSomeDeletions 
                3       SemiGlobalTrans 
                4       SemiGlobal
                5       GlobalButMostlyLocal
                6       Global
      -c <cost>     Cost [default: 2]
                1       DNA
                2       VDJ
                9       VDJaffine
                5       IdentityDirty
                6       Hammong
                7       Levenshtein
                8       Cluster
      -x --matrix    Display matrix
)";


// Should be moved in core/dynprog

DynProg::DynProgMode getdpMode(int mode)
Mikael Salson's avatar
Mikael Salson committed
56 57
{
  DynProg::DynProgMode dpMode = DynProg::Local;
58 59 60 61 62 63 64 65 66 67 68
  if (mode == 1) dpMode = DynProg::Local;
  if (mode == 2) dpMode = DynProg::LocalEndWithSomeDeletions;
  if (mode == 3) dpMode = DynProg::SemiGlobalTrans;
  if (mode == 4) dpMode = DynProg::SemiGlobal;
  if (mode == 5) dpMode = DynProg::GlobalButMostlyLocal;
  if (mode == 6) dpMode = DynProg::Global;
  return dpMode ;
}

Cost getCost(int cost)
{
Mikael Salson's avatar
Mikael Salson committed
69 70
  Cost dpCost = VDJ;

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
  if (cost == 1) dpCost = DNA;
  if (cost == 2) dpCost = VDJ;
  if (cost == 9) dpCost = VDJaffine;
  if (cost == 5) dpCost = IdentityDirty;
  if (cost == 6) dpCost = Hamming;
  if (cost == 7) dpCost = Levenshtein;
  if (cost == 8) dpCost = Cluster;

  return dpCost ;
}  


int main(int argc, const char** argv)
{
  // Options parsing
  std::map<std::string, docopt::value> args
    = docopt::docopt(USAGE, { argv + 1, argv + argc }, true, VERSION); 

  for(auto const& arg : args) 
    cout << arg.first << ":" <<arg.second << endl;
  cout << endl ;
Mikael Salson's avatar
Mikael Salson committed
92
  
93 94
  // Files
  vector<string> files = args["<file>"].asStringList();
Mikael Salson's avatar
Mikael Salson committed
95
  
96 97
  const char* file1 = files.front().c_str();
  const char* file2 = files.back().c_str();
Mikael Salson's avatar
Mikael Salson committed
98
  
99 100
  Fasta fasta1(file1, 1, " ");
  Fasta fasta2(file2, 1, " ");
Mikael Salson's avatar
Mikael Salson committed
101
  
102 103 104 105 106
  // Sequences
  int i = atoi(args["-i"].asString().c_str());
  int j = atoi(args["-j"].asString().c_str());

  if (i >= fasta1.size())
Mikael Salson's avatar
Mikael Salson committed
107
    {
108
      cerr << "ERROR : no sequence #" << i << " in " << file1 << endl ;
Mikael Salson's avatar
Mikael Salson committed
109 110 111
      exit(1);
    }
  
112
  if (j >= fasta2.size())
Mikael Salson's avatar
Mikael Salson committed
113
    {
114
      cerr << "ERROR : no sequence #" << j << " in " << file2 << endl ;
Mikael Salson's avatar
Mikael Salson committed
115 116 117
      exit(1);
    }

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
  string seq1 = fasta1.sequence(i);
  string seq2 = fasta2.sequence(j);

  cout << ">" << fasta1.label(i) << "\t" << file1 << " " << i << endl << seq1 << endl;
  cout << ">" << fasta2.label(i) << "\t" << file2 << " " << j << endl << seq2 << endl;
  cout << endl;

  
  // Cost
  Cost dpCost = getCost( atoi(args["-c"].asString().c_str()));
  cout << "Cost: " << dpCost << endl;
  cout << endl;

  
  // Mode
  int m = atoi(args["-m"].asString().c_str());

  for (int mm=1; mm<=6; mm++)
    {      
      int this_m = m > 0 ? m : mm ;
      cout << "===== -m " << this_m << " : " << mode_description[this_m] << endl ;
      DynProg::DynProgMode dpMode = getdpMode(this_m);
    DynProg dp = DynProg(seq1, seq2, dpMode, dpCost);
Mikael Salson's avatar
Mikael Salson committed
141 142 143

    dp.compute(); 
    dp.backtrack();
144 145 146

    if (args["--matrix"].asBool())
      cout << dp;
Mikael Salson's avatar
Mikael Salson committed
147 148
    
    cout << dp.str_back << endl;
149 150 151

    if (m)
      break ;      
Mikael Salson's avatar
Mikael Salson committed
152 153
  }
}