Browse Source

Fixed issue #1902 - Spaces in link URLs are removed.

Thorsten Eckel 7 years ago
parent
commit
0c917e13aa

+ 15 - 7
lib/html_sanitizer.rb

@@ -46,12 +46,15 @@ satinize html string based on whiltelist
 
       # prepare links
       if node['href']
-        href = cleanup_target(node['href'])
-        if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
-          node['href'] = "http://#{node['href']}"
-          href = node['href']
+        href                = cleanup_target(node['href'], keep_spaces: true)
+        href_without_spaces = href.gsub(/[[:space:]]/, '')
+        if external && href_without_spaces.present? && !href_without_spaces.downcase.start_with?('//') && href_without_spaces.downcase !~ %r{^.{1,6}://.+?}
+          node['href']        = "http://#{node['href']}"
+          href                = node['href']
+          href_without_spaces = href.gsub(/[[:space:]]/, '')
         end
-        next if !href.downcase.start_with?('http', 'ftp', '//')
+
+        next if !href_without_spaces.downcase.start_with?('http', 'ftp', '//')
         node.set_attribute('href', href)
         node.set_attribute('rel', 'nofollow noreferrer noopener')
         node.set_attribute('target', '_blank')
@@ -372,9 +375,14 @@ cleanup html string:
     string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ')
   end
 
-  def self.cleanup_target(string)
+  def self.cleanup_target(string, keep_spaces: false)
     string = CGI.unescape(string).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
-    string.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000")
+    blank_regex = if keep_spaces
+                    /\t|\n|\r/
+                  else
+                    /[[:space:]]|\t|\n|\r/
+                  end
+    string.strip.gsub(blank_regex, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000")
   end
 
   def self.url_same?(url_new, url_old)

+ 1 - 1
test/unit/aaa_string_test.rb

@@ -648,7 +648,7 @@ Men-----------------------'
     assert_equal(result, html.html2html_strict)
 
     html   = '<a href="http://example.com %22test%22">http://what-different.example.com</a>'
-    result = "<a href=\"http://example.com%22test%22\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title='http://example.com\"test\"'>http://what-different.example.com</a>"
+    result = "<a href=\"http://example.com%20%22test%22\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title='http://example.com \"test\"'>http://what-different.example.com</a>"
     assert_equal(result, html.html2html_strict)
 
     html   = '<a href="http://example.com">http://EXAMPLE.com</a>'

+ 2 - 2
test/unit/email_parser_test.rb

@@ -940,7 +940,7 @@ end
       },
       {
         data: IO.binread('test/fixtures/mail43.box'),
-        body_md5: '23cb094f443f41069b9f347a551e2e4d',
+        body_md5: 'a3b91a8969b54a67dd2154e70f74cc30',
         params: {
           from: 'Paula <databases.en@example.com>',
           from_email: 'databases.en@example.com',
@@ -973,7 +973,7 @@ end
 <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\"><span style=\"color: rgb(0, 0, 0);\">Polen</span></a></li>
 <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\"><span style=\"color: rgb(0, 0, 0);\">Russland</span></a></li>
 <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\"><span style=\"color: rgb(0, 0, 0);\">Slowenien</span></a></li>
-<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\"><span style=\"color: rgb(0, 0, 0);\">Slowakei</span></a></li>
+<li><a href=\"http://busine%20ss-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://busine ss-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\"><span style=\"color: rgb(0, 0, 0);\">Slowakei</span></a></li>
 <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\"><span style=\"color: rgb(0, 0, 0);\">Ukraine</span></a></li> </ul> </td> </tr> </tbody> </table> </td> </tr> <tr> <td> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td style=\"border-top-style:solid; border-top-width:3px;\"> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td> </td> </tr> </tbody> </table> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td align=\"left\" valign=\"top\" style=\"font-size:15px;color:#222222;\"> <p>Anwendungsmöglichkeiten für Geschäftskontakte<br> <br> </p><ul> <li>
 <i>Newsletter senden</i> - Senden von Werbung per E-Mail (besonders effizient).</li>
 <li>

+ 5 - 3
test/unit/html_sanitizer_test.rb

@@ -54,9 +54,9 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
     assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), '  +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
     assert_equal(HtmlSanitizer.strict('<SCRIPT a=">" SRC="httx://xss.rocks/xss.js"></SCRIPT>'), '')
     assert_equal(HtmlSanitizer.strict('<A HREF="h
-tt  p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="http://66.000146.0x7.147/">XSS</a>')
+tt  p://6 6.000146.0x7.147/">XSS</A>'), '<a href="htt%20%20p://6%206.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="htt  p://6 6.000146.0x7.147/">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<A HREF="h
-tt  p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="http://66.000146.0x7.147/">XSS</a>')
+tt  p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="htt%20%20p://6%206.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="htt  p://6 6.000146.0x7.147/">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')
@@ -113,7 +113,9 @@ test 123
     assert_equal(HtmlSanitizer.strict('<table><tr style=" Font-size:0%"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
     assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;display: none;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
     assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;visibility:hidden;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
-
+    assert_equal(HtmlSanitizer.strict('<a href="/some/path%20test.pdf">test</a>'), '<a href="/some/path%20test.pdf">test</a>')
+    assert_equal(HtmlSanitizer.strict('<a href="https://somehost.domain/path%20test.pdf">test</a>'), '<a href="https://somehost.domain/path%20test.pdf" rel="nofollow noreferrer noopener" target="_blank" title="https://somehost.domain/path test.pdf">test</a>')
+    assert_equal(HtmlSanitizer.strict('<a href="https://somehost.domain/zaihan%20test">test</a>'), '<a href="https://somehost.domain/zaihan%20test" rel="nofollow noreferrer noopener" target="_blank" title="https://somehost.domain/zaihan test">test</a>')
   end
 
 end