From 380439a3f9ea80f83119cdb26c582c87333ab2c9 Mon Sep 17 00:00:00 2001
From: Benoit Sagot <benoit.sagot@inria.fr>
Date: Tue, 11 Apr 2017 13:49:34 +0000
Subject: [PATCH] git-svn-id:
 https://scm.gforge.inria.fr/authscm/cfourrie/svn/lingwb/MElt/trunk@5739
 dc05b511-7f1d-0410-9f1c-d6f32a2df9e4

---
 sxpipe-melt/gl_url.pl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sxpipe-melt/gl_url.pl b/sxpipe-melt/gl_url.pl
index 6eb618b..b54876b 100755
--- a/sxpipe-melt/gl_url.pl
+++ b/sxpipe-melt/gl_url.pl
@@ -40,13 +40,13 @@ $a         = qr/[a-zàáâäåãßçèéêëìíîïòóôöøùúûüýÿąćč
 $pre       = qr/(?<=[^\//\{\@\..0-9a-zàáâäåãßçèéêëìíîïòóôöøùúûüýÿąćčďęěĺľłńňŕřśšťůźżñA-ZÀÁÂÄÅÆÃÇÈÉÊËÌÍÎÏÒÓÔÖØÙÚÛÜÝŸĄĆČĎĘĚĹĽŁŃŇŔŘŚŠŤŮŹŻÑ_<--])/o;
 $post      = qr/(?=[^\'\//\}a-zàáâäåãßçèéêëìíîïòóôöøùúûüýÿąćčďęěĺľłńňŕřśšťůźżñA-ZÀÁÂÄÅÆÃÇÈÉÊËÌÍÎÏÒÓÔÖØÙÚÛÜÝŸĄĆČĎĘĚĹĽŁŃŇŔŘŚŠŤŮŹŻÑ_>--])/o;
 $dirname   = qr/\s*[\//]\s*(?:\s(?:_UNDERSCORE|_|\.|.|\~|\?|=)|(?:_UNDERSCORE|_|\.|.|\~|\?|=)\s|[\w-\-\..\~_\?=%&#])+/o;
-$dom2      = qr/${a}[\w-\-_]+[\..]/o; # was: $dom2      = qr/${a}[\w\-_]{2,}\./o;
+$dom1      = qr/${a}[\w-\-_]*[\..]/o; # was: $dom2      = qr/${a}[\w\-_]{2,}\./o;
 $dom2_allow_spaces      = qr/${a}[\w-\-_]+\s*[\..]\s*/o;
 $domN      = qr/${a}[\w-\-_]+[\..](?:[\w-\-_]{2,}[\..])+/o;
 $domN_allow_spaces      = qr/${a}[\w-\-_]+\s*[\..]\s*(?:[\w-\-_]{2,}\s*[\..]\s*)+/o;
 $POSTvar   = qr/(?:[a-zA-Z0-9\?=-\-\_]+\s*=(?:\s*[a-zA-Z0-9-\-\?=\_]+)?)/o;
-$refOK     = qr/(?:$domN(?:$safeExt|$unsafeExt)|$dom2$safeExt)(?:$dirname)*(?:\s*[\//])?(?:\s*\?\s*$POSTvar(?:\s*\&amp;\s*$POSTvar)*)?/o;
-$refOK_allow_spaces     = qr/(?:$domN_allow_spaces(?:$safeExt|$unsafeExt)|$dom2$safeExt)/o;
+$refOK     = qr/(?:$domN(?:$safeExt|$unsafeExt)|$dom1$safeExt)(?:$dirname)*(?:\s*[\//])?(?:\s*\?\s*$POSTvar(?:\s*\&amp;\s*$POSTvar)*)?/o;
+$refOK_allow_spaces     = qr/(?:$domN_allow_spaces(?:$safeExt|$unsafeExt)|$dom1$safeExt)/o;
 $op        = qr/(?:\<|\&lt;)/o;
 $cl        = qr/(?:\>|\&gt;)/o;
 
@@ -76,7 +76,7 @@ while (<>) {
       s/($protocole)(:\/\/$a+\.$a+\.)\s([a-z]{2,})\b/$1$2$3/go;
       s/([\'\/\.\wàâäãéêèëîïöôùûüÿ-])($protocole):\/\//$1 $2:\/\//go;
       s/\. *(org|net|com|fr)\b/.$1/go;
-      s/((?:$dom2$safeExt|$domN(?:$safeExt|$unsafeExt))[\w-\/\~]*)\s([\w-]*(?:\/[\w-\/]*|[\w-\/]*\.html?))\b/$1$2/go;
+      s/((?:$dom1$safeExt|$domN(?:$safeExt|$unsafeExt))[\w-\/\~]*)\s([\w-]*(?:\/[\w-\/]*|[\w-\/]*\.html?))\b/$1$2/go;
     }
     s/($protocole)(:\/\/[^\s,;]*)[âàäã]([^\s,;]*\.$a)/$1$2a$3/go;
     s/($protocole)(:\/\/[^\s,;]*)[éèêë]([^\s,;]*\.$a)/$1$2e$3/go;
@@ -97,9 +97,9 @@ while (<>) {
     ### urls faciles (deux points dans le nom de domaine ou alors extension autre que .et et .de)
     s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$refOK$cl?)$post/$1\{$2\}_URL/go;
     ### urls de type (http://)?nomsanspoint.(et|de)/deschosesobligatoirement
-    s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$dom2$unsafeExt(?:$dirname)+\/?$cl?)$post/$1\{$2\}_URL/go;
+    s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)?$dom1$unsafeExt(?:$dirname)+\/?$cl?)$post/$1\{$2\}_URL/go;
     ### urls de type http://nomsanspoint.(et|de)(/deschosesfacultatives)?
-    s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)$dom2$unsafeExt(?:$dirname)*\/?$cl?)$post/$1\{$2\}_URL/go;
+    s/(^|\s)($op?(?:(?:$protocole)\s*:\s*\/\/\s*)$dom1$unsafeExt(?:$dirname)*\/?$cl?)$post/$1\{$2\}_URL/go;
     s/(\s)\.\}_URL/\}_URL$1\./go || s/\.\}_URL/\}_URL\./go;
     ### twitter
     s/^ (.)([\@\#][a-zA-Z0-9])/ {$1$2}_URL/go; # so-called "fix-replies"
@@ -112,10 +112,10 @@ while (<>) {
     s/$pre($op?(?:(?:$protocole)\s*:\s*[\//][\//]\s*)?$refOK$cl?)$post/\{$1\}_URL/go;
     s/$pre($op?(?:$protocole)\s*:\s*[\//][\//]\s*$refOK_allow_spaces(?: [\//])?$cl?)$post/\{$1\}_URL/go;
     ### urls de type (http://)?nomsanspoint.(et|de)/deschosesobligatoirement
-    s/$pre($op?(?:(?:$protocole)\s*:\s*[\//][\//]\s*)?$dom2$unsafeExt(?:$dirname)+[\//]?$cl?)$post/\{$1\}_URL/go;
+    s/$pre($op?(?:(?:$protocole)\s*:\s*[\//][\//]\s*)?$dom1$unsafeExt(?:$dirname)+[\//]?$cl?)$post/\{$1\}_URL/go;
     s/$pre($op?(?:$protocole)\s*:\s*[\//][\//]\s*$dom2_allow_spaces$unsafeExt(?: [\//])?$cl?)$post/\{$1\}_URL/go;
     ### urls de type http://nomsanspoint.(et|de)(/deschosesfacultatives)?
-    s/$pre($op?(?:(?:$protocole)\s*:\s*[\//][\//]\s*)$dom2$unsafeExt(?:$dirname)*[\//]?$cl?)$post/\{$1\}_URL/go;
+    s/$pre($op?(?:(?:$protocole)\s*:\s*[\//][\//]\s*)$dom1$unsafeExt(?:$dirname)*[\//]?$cl?)$post/\{$1\}_URL/go;
     s/(\s)([\..])\}_URL/\}_URL$1$2/go || s/([\..])\}_URL/\}_URL$1/go;
     ### twitter
     s/^ (.)([\@\#][a-zA-Z0-9])/ \1{\2}_URL/go; # so-called "fix-replies"
-- 
GitLab