Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 766a09a6 authored by Clémence Laurent's avatar Clémence Laurent
Browse files

maj python

parent fbc4a960
No related branches found
No related tags found
No related merge requests found
......@@ -12,11 +12,13 @@ def altok_escape_metacharacters(s):
s = re.sub(r"([\\ {}])", r"\\\1", s)
return s
def altok_escape_xml(s):
s = re.sub(r"^\s*(<[\!\?]?[\w\.:_-]+(?: .*?)?\/?>(?:.*<\/[\w\.:_-]+>)?)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*((?:<\/?[\w\.:_-]+[^>]*\/?>)+)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*(<\!--[^>]+>)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*(<\/[\w\.:_-]+>)\s*$", r"{\1} _XML", s)
def altok_escape_xml(s, do_not_create_XML_special_tokens = 0):
if do_not_create_XML_special_tokens == 0:
s = re.sub(r"^\s*(<[\!\?]?[\w\.:_-]+(?: .*?)?\/?>(?:.*<\/[\w\.:_-]+>)?)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*((?:<\/?[\w\.:_-]+[^>]*\/?>)+)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*(<\!--[^>]+>)\s*$", r"{\1} _XML", s)
s = re.sub(r"^\s*(<\/[\w\.:_-]+>)\s*$", r"{\1} _XML", s)
s = re.sub(r"&", r"&amp;", s)
s = re.sub(r"<", r"&lt;", s)
s = re.sub(r">", r"&gt;", s)
......@@ -24,6 +26,10 @@ def altok_escape_xml(s):
s = re.sub(r"(?=\\)(\{[^\{}]*)&amp;", r"\1&", s)
s = re.sub(r"(?=\\)(\{[^\{}]*)&lt;", r"\1<", s)
s = re.sub(r"(?=\\)(\{[^\{}]*)&gt;", r"\1>", s)
if do_not_create_XML_special_tokens == 0:
if re.search(r"_XML", s):
s = re.sub(r"\{([^{}]+)\} _XML", lambda x : "{" + _double_whitespaces(x.group(1)) + "} _XML", s)
return s
def altok_interpret_entities(s):
......@@ -34,5 +40,9 @@ def altok_interpret_entities(s):
#s = s.encode("utf-8").decode("utf-8")
return s
def _double_whitespaces(s):
s = re.sub(r" ", r" ", s)
return s
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment