From 7cfcd2f9a31f5af7ee18daafb947db1bd7069261 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Laurent?= <clemence@opensquare.eu>
Date: Tue, 14 Feb 2023 11:53:53 +0100
Subject: [PATCH] =?UTF-8?q?debug=20python=20sentence=20splitting=20accolad?=
 =?UTF-8?q?es=20=C3=A0=20doubler=20dans=20format=20string?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 alSentenceSplitting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/alSentenceSplitting.py b/alSentenceSplitting.py
index 3c04792..3c48289 100644
--- a/alSentenceSplitting.py
+++ b/alSentenceSplitting.py
@@ -116,10 +116,10 @@ def altok_split_sentences(s, lang, weak_sbound = 0, less_lists = 0, noxml = 0, a
 		s = re.sub(rf"([…\.:;\?\!])(  +)([\"“”\˝] {maj.pattern}[^\"“”\˝<>]*[\.:;\?\!] [\"“”\˝])(  +)({maj.pattern})", r"\1</s>\2<s>\3</s>\4<s>\5", s) # detection of sentences entirely surrounded by double quotes
 		s = re.sub(rf"([…\.:;\?\!])(  +)([\"“”\˝] {maj.pattern}[^\"“”\˝<>]*[\.:;\?\!] [\"“”\˝])( +)$", r"\1</s>\2<s>\3</s>\4", s) # detection of sentences entirely surrounded by double quotes
 		#s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|…) \))( +)({maj.pattern}|[\[_{{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
-		s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|[…。]) \))( +)({maj.pattern}|[\[_\{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
+		s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|[…。]) \))( +)({maj.pattern}|[\[_{{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
 		#s = re.sub(rf"([^\.][0-9}} ](?:\.(?: \.)*|…))(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE
 		
-		s = re.sub(rf"([^\.0-9][\} ](?:\.(?: \.)*|[…。]))(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 1
+		s = re.sub(rf"([^\.0-9][}} ](?:\.(?: \.)*|[…。]))(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 1
 		s = re.sub(rf"([^\.][0-9](?:\.(?: \.)+|[…。]))(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 2
 		s = re.sub(rf"^ ([^\.][0-9]\.)(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 3
 		s = re.sub(rf"([\.;\!\?…]  [0-9]+ \.)(  +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 4
-- 
GitLab