Mentions légales du service

Skip to content
Snippets Groups Projects
Commit e2b49433 authored by Clémence Laurent's avatar Clémence Laurent
Browse files

debug sentence splitting python

parent 766a09a6
No related branches found
No related tags found
No related merge requests found
......@@ -119,7 +119,7 @@ def altok_split_sentences(s, lang, weak_sbound = 0, less_lists = 0, noxml = 0, a
s = re.sub(r"(?<!TA_TEXTUAL_PONCT|_META_TEXTUAL_GN)( ?)(\{[^\}]*\} _META_TEXTUAL[A-Z_]+)", r'</s>\1<s type="li">\2', s) # attention
if re.search(r"^(ja|zh|th|km)$", lang):
if re.search(r"^(ja|zh|th|km)$", lang) is None:
match_pattern = re.compile(r"^((?:[^\"“”]*[\"“”\˝][^\"“”]*[\"“”\˝])*[^\"“”]*[\.;\?\!])( )([\"“”\˝])")
match_check = match_pattern.search(s)
while match_check is not None: # attention
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment