From 016089aac16f76f213c7b24e17c9c080bbf58ace Mon Sep 17 00:00:00 2001 From: Christian Raymond <christian.raymond@irisa.fr> Date: Fri, 10 Jun 2022 13:29:43 +0200 Subject: [PATCH] better alignment with almost similar objects --- README.md | 2 ++ SequenceAligner.hpp | 34 +++++++++++++++++++++++++++++++++- example.cpp | 3 ++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1b82d63..0eaa119 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ The C++ version is template and can align any Objects with the folwing propertie 3) a `size()` method that return the number of character used to print the object 4) an `operator==()` to compare objects 5) the `operator<<` in order to print the object in `size()` character +6) the `operator-` to privilegiate substitution over insertion/deletion when different object share similarity: return the percentage of similarity ### Exemple @@ -109,6 +110,7 @@ class Integer int size() const {return std::to_string(_i).length();} //size() should provide the nbsymbols to write object on screen bool empty() const {return _i==std::numeric_limits<int>::max();}//empty() should say if the object is not empty (not constructed by the constructor without arg) bool operator==(const Integer& g) const {return _i==g._i;} //must provide a comparator + int operator-(const Integer& g) const {return std::min(_i,g._i)*100/std::max(_i,g._i);} //to privilegiate substitution error instead of insertion/deletion when operator== say false but objects share similarity (return PERCENTAGE of similarity so return always 0; to ignore this function) friend std::ostream& operator<<(std::ostream& o,const Integer& i) {return o<<i._i;} }; ``` diff --git a/SequenceAligner.hpp b/SequenceAligner.hpp index 507c0dd..44e824f 100644 --- a/SequenceAligner.hpp +++ b/SequenceAligner.hpp @@ -12,6 +12,8 @@ #include "asciitablestream.hpp" + + template<class T=std::string> class SequenceAligner { @@ -316,7 +318,9 @@ class SequenceAligner else { subOp = SUB; - cs = cost[i-1][j-1] + substitutionPenalty; + mint discountCost=abs(reference[i-1]-hypothesis[j-1]); + if(discountCost>0) discountCost = substitutionPenalty*discountCost/100; + cs = cost[i-1][j-1] + substitutionPenalty - discountCost ; } cmint ci = cost[i][j-1] + insertionPenalty; cmint cd = cost[i-1][j] + deletionPenalty; @@ -401,3 +405,31 @@ class SequenceAligner } }; +//utility for string + +class Char +{ + char _c; + public: + Char(): _c(0) {} //Default constructor should mark the object as empty in some way + Char(const char c): _c(c){} + int size() const {return 1;} //size() should provide the nbsymbols to write object on screen + bool empty() const {return _c==0;}//empty() should say if the object is not empty (not constructed by the constructor without arg) + bool operator==(const Char& g) const {return _c==g._c;} //must provide a comparator + int operator-(const Char& g) const {return 0;} //to privilegiate substitution error instead of insertion/deletion when operator== say false but objects share similarity (return 0; to ignore this function) + friend std::ostream& operator<<(std::ostream& o,const Char& i) + { + if(i._c==0) return o<<""; + return o<<i._c; + } +}; + + +int operator-(const std::string& one,const std::string& two) +{ + SequenceAligner<Char> wordAligner(false); + std::vector<Char> un{one.begin(),one.end()}; + std::vector<Char> deux{two.begin(),two.end()}; + const auto& res=wordAligner.align(un ,deux); + return res.f1(); +} diff --git a/example.cpp b/example.cpp index 2ce041d..f92aa63 100644 --- a/example.cpp +++ b/example.cpp @@ -15,6 +15,7 @@ class Integer int size() const {return std::to_string(_i).length();} //size() should provide the nbsymbols to write object on screen bool empty() const {return _i==std::numeric_limits<int>::max();}//empty() should say if the object is not empty (not constructed by the constructor without arg) bool operator==(const Integer& g) const {return _i==g._i;} //must provide a comparator + int operator-(const Integer& g) const {return std::min(_i,g._i)*100/std::max(_i,g._i);} //to privilegiate substitution error instead of insertion/deletion when operator== say false but objects share similarity (return 0; to ignore this function), otherwise return the percentage of similarity friend std::ostream& operator<<(std::ostream& o,const Integer& i) { if(i._i==std::numeric_limits<int>::max()) return o<<""; @@ -71,7 +72,7 @@ int main(int argc,char*argv[0]) std::cout<<"\n\n"<<ss<<std::endl; - std::vector<Integer> num1={1,2,3}; + std::vector<Integer> num1={400,2,3,300,308,306}; std::vector<Integer> num2={2,3,400}; SequenceAligner<Integer> numEval; -- GitLab