From e2b49433de14368a1d6f5ac4449e1e48c8b7c1d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mence=20Laurent?= <clemence@opensquare.eu>
Date: Wed, 4 Jan 2023 12:00:18 +0100
Subject: [PATCH] debug sentence splitting python

---
 alSentenceSplitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alSentenceSplitting.py b/alSentenceSplitting.py
index f39b5c3..11678cf 100644
--- a/alSentenceSplitting.py
+++ b/alSentenceSplitting.py
@@ -119,7 +119,7 @@ def altok_split_sentences(s, lang, weak_sbound = 0, less_lists = 0, noxml = 0, a
 
 	s = re.sub(r"(?<!TA_TEXTUAL_PONCT|_META_TEXTUAL_GN)(  ?)(\{[^\}]*\} _META_TEXTUAL[A-Z_]+)", r'</s>\1<s type="li">\2', s) # attention
 
-	if re.search(r"^(ja|zh|th|km)$", lang):
+	if re.search(r"^(ja|zh|th|km)$", lang) is None:
 		match_pattern = re.compile(r"^((?:[^\"“”]*[\"“”\˝][^\"“”]*[\"“”\˝])*[^\"“”]*[\.;\?\!])(  )([\"“”\˝])")
 		match_check = match_pattern.search(s)
 		while match_check is not None: # attention
-- 
GitLab