From fbc4a9602bcf6b19a8f2d43bdad9da52ca0945ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Laurent?= <clemence@opensquare.eu> Date: Mon, 25 Jul 2022 18:10:12 +0200 Subject: [PATCH] debug --- alSentenceSplitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alSentenceSplitting.py b/alSentenceSplitting.py index 70976f5..f39b5c3 100644 --- a/alSentenceSplitting.py +++ b/alSentenceSplitting.py @@ -90,7 +90,7 @@ def altok_split_sentences(s, lang, weak_sbound = 0, less_lists = 0, noxml = 0, a s = re.sub(r" ([:;:;]|)( +)", r" \1</s>\2<s>", s) elif re.search(r"^km(_|$)", lang): s = re.sub(r" ([។៕])( +)", r" \1</s>\2<s>", s) - elif re.search(r"^th(_|$)", lang):: + elif re.search(r"^th(_|$)", lang): pass else: s = re.sub(rf"([…\.:;\?\!])( +)([\"“â€\Ë] {maj.pattern}[^\"“â€\Ë<>]*[\.:;\?\!] [\"“â€\Ë])( +)({maj.pattern})", r"\1</s>\2<s>\3</s>\4<s>\5", s) # detection of sentences entirely surrounded by double quotes -- GitLab