Mise à jour terminée. Pour connaître les apports de la version 13.8.4 par rapport à notre ancienne version vous pouvez lire les "Release Notes" suivantes :
https://about.gitlab.com/releases/2021/02/11/security-release-gitlab-13-8-4-released/
https://about.gitlab.com/releases/2021/02/05/gitlab-13-8-3-released/

Commit e7573a2d authored by Mathieu Giraud's avatar Mathieu Giraud

core/dynprog.{cpp,h}: refactoring the score computation and backtrack loops

Edit operations, with their score computation and their backtracking information,
are now defined in a unique location.
The code should thus be more flexible and easier to read and maintain.
parent 6f50dfe8
......@@ -28,12 +28,14 @@
#include <cstdlib>
#include <string>
#define SUBST 0
#define INSER 1
#define DELET 2
#define HOMO2X 3
#define HOMO2Y 4
#define FIN 5
#define SUBST '|'
#define MISMATCH '.'
#define INSER 'i'
#define DELET 'd'
#define HOMO2X 'h'
#define HOMO2Y 'h'
#define FIN ' '
#define BEGIN 'B'
#define BACKSIZE 120
using namespace std;
......@@ -106,9 +108,9 @@ DynProg::DynProg(const string &x, const string &y, DynProgMode mode, const Cost&
S[i] = new int[n+1];
}
B = new backtrack_info*[m+1];
B = new operation*[m+1];
for (int i = 0; i <= m; i++) {
B[i] = new backtrack_info[n+1];
B[i] = new operation[n+1];
}
this -> mode = mode;
......@@ -130,20 +132,32 @@ DynProg::~DynProg() {
for (int i = 0; i <= m; i++) {
delete [] S[i];
}
delete [] S;
delete [] S;
for (int i = 0; i <= m; i++) {
delete [] B[i];
}
delete [] B;
delete [] B;
delete [] gap1;
delete [] gap2;
delete [] linkgap;
}
void DynProg::init()
{
{
for (int i=1; i<=m; i++)
{
B[i][0].type = INSER ;
B[i][0].i = i-1 ;
B[i][0].j = 0 ;
}
for (int j=1; j<=n; j++)
{
B[0][j].type = DELET ;
B[0][j].i = 0 ;
B[0][j].j = j-1 ;
}
if (mode == Local || mode == LocalEndWithSomeDeletions)
for (int i=0; i<=m; i++)
S[i][0] = 0 ;
......@@ -165,34 +179,55 @@ void DynProg::init()
S[0][j] = j * cost.deletion ;
}
inline void try_operation(operation &best, int type, int i, int j, int score)
{
if (score > best.score)
{
best.type = type ;
best.i = i ;
best.j = j ;
best.score = score ;
}
}
int DynProg::compute()
{
best_score = MINUS_INF ;
best_i = 0 ;
best_j = 0 ;
operation best ;
for (int i=1; i<=m; i++)
for (int j=1; j<=n; j++)
{
int subst = S[i-1][j-1] + cost.substitution(x[i-1], y[j-1]);
best.score = MINUS_INF ;
int inser = S[i-1][j] + cost.insertion;
int delet = S[i][j-1] + cost.deletion;
// The edit operations, with their backtracking information and their score
// Also dealing with homopolymers (in a dirty way!)
// int inser = S[i-1][j] + (i > 1 ? cost.ins(x[i-2], x[i-1]) : cost.ins(0,1));
// int delet = S[i][j-1] + (j > 1 ? cost.del(y[j-2], y[j-1]) : cost.del(0,1));
try_operation(best, SUBST, i-1, j-1, S[i-1][j-1] + cost.substitution(x[i-1], y[j-1]));
try_operation(best, INSER, i-1, j , S[i-1][j ] + cost.insertion);
try_operation(best, DELET, i , j-1, S[i ][j-1] + cost.deletion);
try_operation(best, HOMO2X, i-2, j-1, i > 1 ? S[i-2][j-1] + cost.homo2(x[i-2], x[i-1], y[j-1]) : MINUS_INF);
try_operation(best, HOMO2Y, i-1, j-2, j > 1 ? S[i-1][j-2] + cost.homo2(y[j-2], y[j-1], x[i-1]) : MINUS_INF);
if (mode == Local || mode == LocalEndWithSomeDeletions)
try_operation(best, BEGIN, 0, 0, 0);
int best = max(max(subst, inser), delet);
if ((best.type == SUBST) && (x[i-1] != y[j-1]))
best.type = MISMATCH ;
// Homopolymers (2 <-> 1)
int homo2x = i > 1 ? S[i-2][j-1] + cost.homo2(x[i-2], x[i-1], y[j-1]) : MINUS_INF ;
int homo2y = j > 1 ? S[i-1][j-2] + cost.homo2(y[j-2], y[j-1], x[i-1]) : MINUS_INF ;
best = max(max(homo2x, homo2y), best);
// Fill the score and the backtrack information
S[i][j] = best.score ;
B[i][j] = best ;
// cout << " " << i << "," << j << " " << best_op.type << " " << best_op.i << "," << best_op.j << " " << best_op.score << " " << best << endl ;
if (mode == Local || mode == LocalEndWithSomeDeletions)
{
best = max(0, best);
int tbest = best ;
int tbest = best.score ;
if (mode == LocalEndWithSomeDeletions)
tbest += cost.deletion_end*(n-j);
......@@ -204,55 +239,26 @@ int DynProg::compute()
best_j = j ;
}
if (best == 0){
if (best.score == 0){
B[i][j].type = FIN;
}
}
S[i][j] = best ;
if (best==subst){
B[i][j].i = i-1 ;
B[i][j].j = j-1 ;
B[i][j].type = SUBST ;
}
if (best==inser){
B[i][j].i = i-1 ;
B[i][j].j = j ;
B[i][j].type = INSER ;
}
if (best==delet){
B[i][j].i = i ;
B[i][j].j = j-1 ;
B[i][j].type = DELET ;
}
if (best==homo2x){
B[i][j].i = i-2 ;
if (B[i][j].i < 0) B[i][j].i = 0 ;
B[i][j].j = j-1 ;
B[i][j].type = HOMO2X ;
}
if (best==homo2y){
B[i][j].i = i-1 ;
B[i][j].j = j-2 ;
if (B[i][j].j < 0) B[i][j].j = 0 ;
B[i][j].type = HOMO2Y ;
}
if (mode == Local || mode == LocalEndWithSomeDeletions)
{
if (best == 0){
if (best.score == 0){
B[i][j].type = FIN;
}
}
}
// End. Find best_i and best_j, put FIN keywords where the backtrack should stop
if (mode == Local || mode == LocalEndWithSomeDeletions)
{
for (int i=0; i<=n; i++){
B[0][i].type = FIN;
for (int j=0; j<=n; j++){
B[0][j].type = FIN;
}
for (int i=0; i<=m; i++){
B[i][0].type = FIN;
......@@ -269,11 +275,6 @@ int DynProg::compute()
best_score = S[m][j] ;
best_j = j ;
}
for (int i=1; i<=n; i++){
B[0][i].i = 0 ;
B[0][i].j = i-1 ;
B[0][i].type = DELET;
}
for (int i=0; i<=m; i++){
B[i][0].type = FIN;
}
......@@ -292,32 +293,14 @@ int DynProg::compute()
for (int i=0; i<=n; i++){
B[0][i].type = FIN;
}
for (int i=0; i<=m; i++){
B[i][0].i = i-1 ;
B[i][0].j = 0 ;
B[i][0].type = INSER;
}
}
}
if ((mode == Global) || (mode == GlobalButMostlyLocal))
{
best_i = m ;
best_j = n ;
best_score = S[m][n];
for (int i=0; i<=m; i++){
B[i][0].i = i-1 ;
B[i][0].j = 0 ;
B[i][0].type = INSER;
}
for (int i=1; i<=n; i++){
B[0][i].i = 0 ;
B[0][i].j = i-1 ;
B[0][i].type = DELET;
}
B[0][0].type = FIN;
}
if (reverse_x)
......@@ -326,6 +309,8 @@ int DynProg::compute()
if (reverse_y)
best_j = n - best_j + 1;
B[0][0].type = FIN;
// In the matrix positions start at 1, but start positions may start at 0
best_i -= 1;
best_j -= 1;
......@@ -351,86 +336,83 @@ void DynProg::backtrack()
int g1=x.size();
int g2=y.size();
int ti=best_i+1;
int tj=best_j+1;
int i=best_i+1;
int j=best_j+1;
int tmpi, tmpj;
tmpi=ti;
tmpj=tj;
tmpi=i;
tmpj=j;
str_back = "";
string str1, str2, str3;
// Compute backtrack strings
ostringstream back_s1;
ostringstream back_s2;
ostringstream back_tr;
ostringstream back;
while ( B[ti][tj].type != FIN ){
tmpi=B[ti][tj].i;
tmpj=B[ti][tj].j;
if (B[ti][tj].type == SUBST ){
linkgap[g1]=g2;
back_s1 << x[ti-1];
g1--;
back_s2 << y[tj-1];
g2--;
if(x[ti-1]==y[tj-1]) {
back_tr << "|";
}else{
back_tr << ":";
while (1) {
if ((i<0) || (j<0))
{
cout << "Invalid backtrack: " << i << "," << j << endl ;
exit(1);
}
if (B[i][j].type == FIN)
break ;
}
if (B[ti][tj].type == INSER ){
linkgap[g1]=g2;
back_s1 << x[ti-1];
g1--;
back_s2 << " ";
gap2[g2]++;
back_tr << ".";
}
if (B[ti][tj].type == DELET){
linkgap[g1]=g2;
back_s1 << " ";
gap1[g1]++;
back_s2 << y[tj-1];
g2--;
back_tr << ".";
}
if (B[ti][tj].type == HOMO2X ){
linkgap[g1]=g2;
back_s1 << x[ti-1] << x[ti-2];
g1--;
linkgap[g1]=g2;
g1--;
back_s2 << " " << y[tj-1];
gap2[g2]++;
g2--;
back_tr << " h";
// cout << "bt " << i << "/" << j << " " << B[i][j].type << " " << B[i][j].i << "," << B[i][j].j << endl ;
tmpi=B[i][j].i;
tmpj=B[i][j].j;
}
int max_k = max(i - tmpi, j - tmpj);
if (B[ti][tj].type == HOMO2Y ){
linkgap[g1]=g2;
back_s1 << " " << x[ti-1] ;
gap1[g1]++;
g1--;
back_s2 << y[tj-1] << y[tj-2] ;
g2--;
g2--;
back_tr << " h";
}
for (int k=0; k<max_k; k++)
{
linkgap[g1]=g2;
if (i-k > tmpi)
{
back_s1 << x[i-1-k];
g1--;
}
else
{
back_s1 << " " ;
gap1[g1]++ ;
}
if (j-k > tmpj)
{
back_s2 << y[j-1-k];
g2--;
}
else
{
back_s2 << " " ;
gap2[g2]++ ;
}
if (k < max_k-1)
back_tr << " " ;
}
ti=tmpi;
tj=tmpj;
back_tr << B[i][j].type ;
i=tmpi;
j=tmpj;
/*
back_s1 << " " ;
back_s2 << " " ;
back_tr << " " ;
*/
}
// Format backtrack strings
string str1, str2, str3;
str1=back_s1.str();
str1 =string (str1.rbegin(), str1.rend());
str1 = str1;
......@@ -439,17 +421,17 @@ void DynProg::backtrack()
str3=back_s2.str();
str3 = string (str3.rbegin(), str3.rend());
back << ti << " >> " << str1.substr(0, BACKSIZE-8) << endl;
back << i << " >> " << str1.substr(0, BACKSIZE-8) << endl;
back << " diff " << str2.substr(0, BACKSIZE-8) << endl;
back << tj << " >> " << str3.substr(0, BACKSIZE-8) << endl << endl;
back << j << " >> " << str3.substr(0, BACKSIZE-8) << endl << endl;
for (size_t k=0 ; (BACKSIZE-8+k*BACKSIZE)< str1.length() ; k++){
back << str1.substr(BACKSIZE-8+k*BACKSIZE, BACKSIZE) << endl;
back << str2.substr(BACKSIZE-8+k*BACKSIZE, BACKSIZE) << endl;
back << str3.substr(BACKSIZE-8+k*BACKSIZE, BACKSIZE) << endl << endl;
}
first_i=ti;
first_j=tj;
first_i=i;
first_j=j;
str_back=back.str();
......@@ -488,8 +470,12 @@ ostream& operator<<(ostream& out, const DynProg& dp)
out << " " ;
for (int j=0; j<=dp.n; j++)
out << setw(4) << dp.S[i][j] << " ";
{
if (dp.S[i][j])
out << setw(4) << dp.S[i][j] << dp.B[i][j].type ;
else
out << " " << dp.B[i][j].type ;
}
out << endl ;
}
......
......@@ -16,10 +16,12 @@ using namespace std;
float identity_percent(int score);
typedef struct {
char type;
int i;
int j;
int type;
} backtrack_info;
int score;
} operation;
class Cost
{
......@@ -108,8 +110,8 @@ class DynProg
friend ostream& operator<<(ostream& out, const DynProg& dp);
int **S;
backtrack_info **B;
int **S; // Score matrix
operation **B; // Backtrack matrix
int *gap1 ;
int *linkgap ;
int *gap2 ;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment