From 380439a3f9ea80f83119cdb26c582c87333ab2c9 Mon Sep 17 00:00:00 2001 From: Benoit Sagot <benoit.sagot@inria.fr> Date: Tue, 11 Apr 2017 13:49:34 +0000 Subject: [PATCH] git-svn-id: https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/MElt/trunk@5739 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4 --- sxpipe-melt/gl_url.pl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sxpipe-melt/gl_url.pl b/sxpipe-melt/gl_url.pl index 6eb618b..b54876b 100755 --- a/sxpipe-melt/gl_url.pl +++ b/sxpipe-melt/gl_url.pl @@ -40,13 +40,13 @@ $a = qr/[a-zà áâäåãßçèéêëìÃÃ®Ã¯Ã²Ã³Ã´Ã¶Ã¸Ã¹ÃºÃ»Ã¼Ã½Ã¿Ä…Ä‡Ä $pre = qr/(?<=[^\/ï¼\{\@\..0-9a-zà áâäåãßçèéêëìÃîïòóôöøùúûüýÿąćÄÄęěĺľłńňŕřśšťůźżñA-ZÀÃÂÄÅÆÃÇÈÉÊËÌÃÃŽÃÒÓÔÖØÙÚÛÜߥĆČĎĘĚĹĽÅŃŇŔŘŚŠŤŮŹŻÑ_<ï¼-])/o; $post = qr/(?=[^\'\/ï¼\}a-zà áâäåãßçèéêëìÃîïòóôöøùúûüýÿąćÄÄęěĺľłńňŕřśšťůźżñA-ZÀÃÂÄÅÆÃÇÈÉÊËÌÃÃŽÃÒÓÔÖØÙÚÛÜߥĆČĎĘĚĹĽÅŃŇŔŘŚŠŤŮŹŻÑ_>ï¼-])/o; $dirname = qr/\s*[\/ï¼]\s*(?:\s(?:_UNDERSCORE|_|\.|.|\~|\?|=)|(?:_UNDERSCORE|_|\.|.|\~|\?|=)\s|[\wï¼\-\..\~_\?=%&#])+/o; -$dom2 = qr/${a}[\wï¼\-_]+[\..]/o; # was: $dom2 = qr/${a}[\w\-_]{2,}\./o; +$dom1 = qr/${a}[\wï¼\-_]*[\..]/o; # was: $dom2 = qr/${a}[\w\-_]{2,}\./o; $dom2_allow_spaces = qr/${a}[\wï¼\-_]+\s*[\..]\s*/o; $domN = qr/${a}[\wï¼\-_]+[\..](?:[\wï¼\-_]{2,}[\..])+/o; $domN_allow_spaces = qr/${a}[\wï¼\-_]+\s*[\..]\s*(?:[\wï¼\-_]{2,}\s*[\..]\s*)+/o; $POSTvar = qr/(?:[a-zA-Z0-9\?=ï¼\-\_]+\s*=(?:\s*[a-zA-Z0-9ï¼\-\?=\_]+)?)/o; -$refOK = qr/(?:$domN(?:$safeExt|$unsafeExt)|$dom2$safeExt)(?:$dirname)*(?:\s*[\/ï¼])?(?:\s*\?\s*$POSTvar(?:\s*\&\s*$POSTvar)*)?/o; -$refOK_allow_spaces = qr/(?:$domN_allow_spaces(?:$safeExt|$unsafeExt)|$dom2$safeExt)/o; +$refOK = qr/(?:$domN(?:$safeExt|$unsafeExt)|$dom1$safeExt)(?:$dirname)*(?:\s*[\/ï¼])?(?:\s*\?\s*$POSTvar(?:\s*\&\s*$POSTvar)*)?/o; +$refOK_allow_spaces = qr/(?:$domN_allow_spaces(?:$safeExt|$unsafeExt)|$dom1$safeExt)/o; $op = qr/(?:\<|\<)/o; $cl = qr/(?:\>|\>)/o; @@ -76,7 +76,7 @@ while (<>) { s/($protocole)(:\/\/$a+\.$a+\.)\s([a-z]{2,})\b/$1$2$3/go; s/([\'\/\.\wà âäãéêèëîïöôùûüÿ-])($protocole):\/\//$1 $2:\/\//go; s/\. *(org|net|com|fr)\b/.$1/go; - s/((?:$dom2$safeExt|$domN(?:$safeExt|$unsafeExt))[\w-\/\~]*)\s([\w-]*(?:\/[\w-\/]*|[\w-\/]*\.html?))\b/$1$2/go; + s/((?:$dom1$safeExt|$domN(?:$safeExt|$unsafeExt))[\w-\/\~]*)\s([\w-]*(?:\/[\w-\/]*|[\w-\/]*\.html?))\b/$1$2/go; } s/($protocole)(:\/\/[^\s,;]*)[âà äã]([^\s,;]*\.$a)/$1$2a$3/go; s/($protocole)(:\/\/[^\s,;]*)[éèêë]([^\s,;]*\.$a)/$1$2e$3/go; @@ -97,9 +97,9 @@ while (<>) { ### urls faciles (deux points dans le nom de domaine ou alors extension autre que .et et .de) s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$refOK$cl?)$post/$1\{$2\}_URL/go; ### urls de type (http://)?nomsanspoint.(et|de)/deschosesobligatoirement - s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$dom2$unsafeExt(?:$dirname)+\/?$cl?)$post/$1\{$2\}_URL/go; + s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$dom1$unsafeExt(?:$dirname)+\/?$cl?)$post/$1\{$2\}_URL/go; ### urls de type http://nomsanspoint.(et|de)(/deschosesfacultatives)? - s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)$dom2$unsafeExt(?:$dirname)*\/?$cl?)$post/$1\{$2\}_URL/go; + s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)$dom1$unsafeExt(?:$dirname)*\/?$cl?)$post/$1\{$2\}_URL/go; s/(\s)\.\}_URL/\}_URL$1\./go || s/\.\}_URL/\}_URL\./go; ### twitter s/^ (.)([\@\#][a-zA-Z0-9])/ {$1$2}_URL/go; # so-called "fix-replies" @@ -112,10 +112,10 @@ while (<>) { s/$pre($op?(?:(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*)?$refOK$cl?)$post/\{$1\}_URL/go; s/$pre($op?(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*$refOK_allow_spaces(?: [\/ï¼])?$cl?)$post/\{$1\}_URL/go; ### urls de type (http://)?nomsanspoint.(et|de)/deschosesobligatoirement - s/$pre($op?(?:(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*)?$dom2$unsafeExt(?:$dirname)+[\/ï¼]?$cl?)$post/\{$1\}_URL/go; + s/$pre($op?(?:(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*)?$dom1$unsafeExt(?:$dirname)+[\/ï¼]?$cl?)$post/\{$1\}_URL/go; s/$pre($op?(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*$dom2_allow_spaces$unsafeExt(?: [\/ï¼])?$cl?)$post/\{$1\}_URL/go; ### urls de type http://nomsanspoint.(et|de)(/deschosesfacultatives)? - s/$pre($op?(?:(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*)$dom2$unsafeExt(?:$dirname)*[\/ï¼]?$cl?)$post/\{$1\}_URL/go; + s/$pre($op?(?:(?:$protocole)\s*:\s*[\/ï¼][\/ï¼]\s*)$dom1$unsafeExt(?:$dirname)*[\/ï¼]?$cl?)$post/\{$1\}_URL/go; s/(\s)([\..])\}_URL/\}_URL$1$2/go || s/([\..])\}_URL/\}_URL$1/go; ### twitter s/^ (.)([\@\#][a-zA-Z0-9])/ \1{\2}_URL/go; # so-called "fix-replies" -- GitLab