Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 7cfcd2f9 authored by Clémence Laurent's avatar Clémence Laurent
Browse files

debug python sentence splitting accolades à doubler dans format string

parent 8a82d73f
No related branches found
No related tags found
No related merge requests found
......@@ -116,10 +116,10 @@ def altok_split_sentences(s, lang, weak_sbound = 0, less_lists = 0, noxml = 0, a
s = re.sub(rf"([…\.:;\?\!])( +)([\"“”\˝] {maj.pattern}[^\"“”\˝<>]*[\.:;\?\!] [\"“”\˝])( +)({maj.pattern})", r"\1</s>\2<s>\3</s>\4<s>\5", s) # detection of sentences entirely surrounded by double quotes
s = re.sub(rf"([…\.:;\?\!])( +)([\"“”\˝] {maj.pattern}[^\"“”\˝<>]*[\.:;\?\!] [\"“”\˝])( +)$", r"\1</s>\2<s>\3</s>\4", s) # detection of sentences entirely surrounded by double quotes
#s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|…) \))( +)({maj.pattern}|[\[_{{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|[…。]) \))( +)({maj.pattern}|[\[_\{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
s = re.sub(rf"(\.(?: \.)*|…)( +)(\( (?:\. \. \.|[…。]) \))( +)({maj.pattern}|[\[_{{\.])", r"\1</s>\2<s>\3</s>\4<s>\5", s)
#s = re.sub(rf"([^\.][0-9}} ](?:\.(?: \.)*|…))( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE
s = re.sub(rf"([^\.0-9][\} ](?:\.(?: \.)*|[…。]))( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 1
s = re.sub(rf"([^\.0-9][}} ](?:\.(?: \.)*|[…。]))( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 1
s = re.sub(rf"([^\.][0-9](?:\.(?: \.)+|[…。]))( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 2
s = re.sub(rf"^ ([^\.][0-9]\.)( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 3
s = re.sub(rf"([\.;\!\?…] [0-9]+ \.)( +)({initialclass.pattern}|{special_split.pattern})", r"\1</s>\2<s>\3", s) # STANDARD CASE 4
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment