Fix regression in handling of non-unicode characters in a plain text message (#9953)

This commit is contained in:
Aleksander Machniak 2025-08-13 19:40:36 +02:00
parent eff91a93ca
commit 2c3b46c1f2
3 changed files with 19 additions and 11 deletions

View File

@ -4,6 +4,7 @@
- Support IPv6 in database DSN (#9937)
- Use `htmleditor` setting also for indentity signature (#9954)
- Fix regression in handling of non-unicode characters in a plain text message (#9953)
- Fix parsing of inline styles that aren't well-formatted (#9948)
## Release 1.7-beta

View File

@ -47,24 +47,22 @@ class rcube_string_replacer
// Simplified domain expression for UTF8 characters handling
// Support unicode/punycode in top-level domain part
$utf_domain = '[^?&@"\'\/()<>\s\r\t\n]+\.?([^\x00-\x2f\x3b-\x40\x5b-\x60\x7b-\x7f]{2,}|xn--[a-zA-Z0-9]{2,})';
$utf_domain = '[^?&@"\'\/()<>\s\r\t\n#:,]+\.?([^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]{2,}|xn--[a-zA-Z0-9]{2,})';
// Path regexp based on WHATWG URL spec. (with some modifications)
$url1 = '.:;,';
$url2 = 'a-zA-Z0-9!$&#%\'\(\)\*+\/=?@_~\[\]{}|\x{00A0}-\x{D7FF}\x{F000}-\x{FDCF}\x{FDF0}-\x{10FFFD}-';
$ip_address = '([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|\[[a-f0-9A-F:]+\]';
// Supported link prefixes
$link_prefix = "([\\w]+:\\/\\/|{$this->noword}[Ww][Ww][Ww]\\.|^[Ww][Ww][Ww]\\.)";
$link_prefix = "[\\w]+:\\/\\/|{$this->noword}[Ww][Ww][Ww]\\.|^[Ww][Ww][Ww]\\.";
$this->options = $options;
$this->linkref_index = '/\[([^<>\]#]+)\](:?\s*' . substr($this->pattern, 1, -1) . ')/';
$this->linkref_pattern = '/\[([^<>\]#]+)\]/';
$this->link_pattern = "/{$link_prefix}({$utf_domain}([{$url1}]*[{$url2}]+)*)/u";
$this->link_pattern = "/({$link_prefix})(({$ip_address}(:[0-9]{1,5})?|{$utf_domain})([\\/?#]\\S*[^\\s.:;,]+)*[\\/?#]?)/";
$this->mailto_pattern = '/('
. '[-\w!\#$%&*+~\/^`|{}=]+(?:\.[-\w!\#$%&*+~\/^`|{}=]+)*' // local-part
. "@{$utf_domain}" // domain-part
. "(\\?[{$url1}{$url2}]+)?" // e.g. ?subject=test...
. ')/u';
. '@' . $utf_domain // domain-part
. '(\?\S+)?' // e.g. ?subject=test...
. ')/';
}
/**
@ -98,8 +96,7 @@ class rcube_string_replacer
*
* @param array $matches Matches result from preg_replace_callback
*
* @return string return valid link for recognized schemes, otherwise
* return the unmodified URL
* @return string A valid link for recognized schemes, otherwise the unmodified URL
*/
protected function link_callback($matches)
{

View File

@ -46,6 +46,10 @@ class StringReplacerTest extends TestCase
['www.domain.tld', '<a href="http://www.domain.tld">www.domain.tld</a>'],
['WWW.DOMAIN.TLD', '<a href="http://WWW.DOMAIN.TLD">WWW.DOMAIN.TLD</a>'],
['[http://link.com]', '[<a href="http://link.com">http://link.com</a>]'],
['http://link.com.', '<a href="http://link.com">http://link.com</a>.'],
["http://link.com\ttest", "<a href=\"http://link.com\">http://link.com</a>\ttest"],
['http://link.com:test', '<a href="http://link.com">http://link.com</a>:test'],
['http://link.com#test End', '<a href="http://link.com#test">http://link.com#test</a> End'],
['http://link.com?a[]=1', '<a href="http://link.com?a[]=1">http://link.com?a[]=1</a>'],
['http://link.com?a[]', '<a href="http://link.com?a[]">http://link.com?a[]</a>'],
['(http://link.com)', '(<a href="http://link.com">http://link.com</a>)'],
@ -64,6 +68,12 @@ class StringReplacerTest extends TestCase
['https://www.google.com/maps/place/New+York,+État+de+New+York/@40.7056308,-73.9780035,11z/data=!3m1!4b1!4m2!3m1!1s0x89c24fa5d33f083b:0xc80b8f06e177fe62',
'<a href="https://www.google.com/maps/place/New+York,+État+de+New+York/@40.7056308,-73.9780035,11z/data=!3m1!4b1!4m2!3m1!1s0x89c24fa5d33f083b:0xc80b8f06e177fe62">https://www.google.com/maps/place/New+York,+État+de+New+York/@40.7056308,-73.9780035,11z/data=!3m1!4b1!4m2!3m1!1s0x89c24fa5d33f083b:0xc80b8f06e177fe62</a>',
],
['https://192.168.56.1.', '<a href="https://192.168.56.1">https://192.168.56.1</a>.'],
['http://192.168.56.1/.', '<a href="http://192.168.56.1/">http://192.168.56.1/</a>.'],
['ftp://1.1.1.101/test.', '<a href="ftp://1.1.1.101/test">ftp://1.1.1.101/test</a>.'],
['http://[::1]:8000/test.', '<a href="http://[::1]:8000/test">http://[::1]:8000/test</a>.'],
// Non-unicode characters should be supported
['http://link.com ' . chr(206) . 'a', '<a href="http://link.com">http://link.com</a> ' . chr(206) . 'a'],
// #9538: unicode Fullwidth Left Parenthesis (U+FF08)
// ['http://www.domain.tld/abc哇哇', '<a href="http://www.domain.tld/abc">http://www.domain.tld/abc</a>(哇哇)'],
];