Просмотр исходного кода

Fixed html sanitizer loop with own generated link injection. Extended tests.

Martin Edenhofer 7 лет назад
Родитель
Сommit
98a1ba8a62

+ 55 - 52
lib/html_sanitizer.rb

@@ -19,7 +19,58 @@ satinize html string based on whiltelist
     classes_whitelist = ['js-signatureMarker']
     attributes_2_css = %w(width height)
 
-    scrubber = Loofah::Scrubber.new do |node|
+    scrubber_link = Loofah::Scrubber.new do |node|
+
+      # check if href is different to text
+      if external && node.name == 'a' && !url_same?(node['href'], node.text)
+        if node['href'].blank?
+          node.replace node.children.to_s
+          Loofah::Scrubber::STOP
+        elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
+          text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
+          node.add_previous_sibling(text)
+          node['href'] = cleanup_target(node.text)
+          text = Nokogiri::XML::Text.new(')', node.document)
+          node.add_next_sibling(text)
+        else
+          node.content = cleanup_target(node['href'])
+        end
+      end
+
+      # check if text has urls which need to be clickable
+      if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
+        if node.class == Nokogiri::XML::Text
+          urls = []
+          node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
+            if match[0]
+              urls.push match[0].to_s.strip
+            end
+          }
+          node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
+            if match[1]
+              urls.push match[1].to_s.strip
+            end
+          }
+          next if urls.empty?
+          add_link(node.content, urls, node)
+        end
+      end
+
+      # prepare links
+      if node['href']
+        href = cleanup_target(node['href'])
+        if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
+          node['href'] = "http://#{node['href']}"
+          href = node['href']
+        end
+        next if !href.downcase.start_with?('http', 'ftp', '//')
+        node.set_attribute('href', href)
+        node.set_attribute('rel', 'nofollow noreferrer noopener')
+        node.set_attribute('target', '_blank')
+      end
+    end
+
+    scrubber_wipe = Loofah::Scrubber.new do |node|
 
       # remove tags with subtree
       if tags_remove_content.include?(node.name)
@@ -128,67 +179,19 @@ satinize html string based on whiltelist
           Loofah::Scrubber::STOP
         end
       end
-
-      # prepare links
-      if node['href']
-        href = cleanup_target(node['href'])
-        if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
-          node['href'] = "http://#{node['href']}"
-          href = node['href']
-        end
-        next if !href.downcase.start_with?('http', 'ftp', '//')
-        node.set_attribute('href', href)
-        node.set_attribute('rel', 'nofollow noreferrer noopener')
-        node.set_attribute('target', '_blank')
-      end
-
-      # check if href is different to text
-      if external && node.name == 'a' && !url_same?(node['href'], node.text)
-        if node['href'].blank?
-          node.replace node.children.to_s
-          Loofah::Scrubber::STOP
-        elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
-          text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
-          node.add_previous_sibling(text)
-          node['href'] = cleanup_target(node.text)
-          text = Nokogiri::XML::Text.new(')', node.document)
-          node.add_next_sibling(text)
-        else
-          node.content = cleanup_target(node['href'])
-        end
-      end
-
-      # check if text has urls which need to be clickable
-      if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
-        if node.class == Nokogiri::XML::Text
-          urls = []
-          node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
-            if match[0]
-              urls.push match[0].to_s.strip
-            end
-          }
-          node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
-            if match[1]
-              urls.push match[1].to_s.strip
-            end
-          }
-          next if urls.empty?
-          add_link(node.content, urls, node)
-        end
-      end
-
     end
 
     new_string = ''
     done = true
     while done
-      new_string = Loofah.fragment(string).scrub!(scrubber).to_s
+      new_string = Loofah.fragment(string).scrub!(scrubber_wipe).to_s
       if string == new_string
         done = false
       end
       string = new_string
     end
-    string
+
+    Loofah.fragment(string).scrub!(scrubber_link).to_s
   end
 
 =begin

+ 18 - 15
test/unit/aaa_string_test.rb

@@ -552,8 +552,7 @@ Men-----------------------'
     assert_equal(result, html.html2html_strict)
 
     html   = '<div>https://www.facebook.com/test</div>'
-    result = '<div>
-<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
+    result = '<div><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
 </div>'
     assert_equal(result, html.html2html_strict)
 
@@ -641,11 +640,11 @@ Men-----------------------'
     assert_equal(result, html.html2html_strict)
 
     html   = "<div>http://example.com</div>"
-    result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
+    result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
     assert_equal(result, html.html2html_strict)
 
     html   = "<div>http://example.com.</div>"
-    result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
+    result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
     assert_equal(result, html.html2html_strict)
 
     html   = "<div>lala http://example.com.</div>"
@@ -653,11 +652,11 @@ Men-----------------------'
     assert_equal(result, html.html2html_strict)
 
     html   = "<div>http://example.com, and so on</div>"
-    result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
+    result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
     assert_equal(result, html.html2html_strict)
 
     html   = "<div>http://example.com?lala=me, and so on</div>"
-    result = "<div>\n<a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
+    result = "<div><a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
     assert_equal(result, html.html2html_strict)
 
     html   = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
@@ -665,12 +664,12 @@ Men-----------------------'
     assert_equal(result, html.html2html_strict)
 
     html   = "<span style=\"font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE\">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-<a href=\"http://www.example.de\"><span style=\"color:blue\">www.example.de</span></a><o:p></o:p></span>"
-    result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
+<a href=\"http://www.example.com\"><span style=\"color:blue\">www.example.com</span></a><o:p></o:p></span>"
+    result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
     assert_equal(result, html.html2html_strict)
 
-    html   = "web <a href=\"www.example.de\"><span style=\"color:blue\">www.example.de</span></a>"
-    result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
+    html   = "web <a href=\"www.example.com\"><span style=\"color:blue\">www.example.com</span></a>"
+    result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
     assert_equal(result, html.html2html_strict)
 
     html   = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href=\"http://newsletters.cylex.de/\" class=\"\">Link des Adventkalenders</a> in<br class=\"\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class=\"\">&nbsp;"
@@ -913,9 +912,9 @@ christian.schaefer@example.com'
     result = '<img style="width: 181px; height: 125px;" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">'
     assert_equal(result, html.html2html_strict)
 
-    html   = '<p class="MsoNormal"><a href="http://www.example.de/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
-    #result = '<p>http://www.example.de/ <a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
-    result = '<p><a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.de/</a></p>'
+    html   = '<p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
+    #result = '<p>http://www.example.com/ <a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
+    result = '<p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/</a></p>'
     assert_equal(result, html.html2html_strict)
 
     html   = '<p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>'
@@ -940,6 +939,10 @@ christian.schaefer@example.com'
     result = '<p>oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</p><p>&nbsp;</p>'
     assert_equal(result, html.html2html_strict)
 
+    html   = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>'
+    result = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a> (<a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a>)</div>'
+    assert_equal(result, html.html2html_strict)
+
   end
 
   test 'inline attachment replace' do
@@ -1106,10 +1109,10 @@ christian.schaefer@example.com'
     html   = '<div><br>
 <br>
 <br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
-&nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.de&gt;</font>
+&nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
 <br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
 &nbsp;</font></div>'
-    result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel &lt;info@example.de&gt;
+    result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel &lt;info@example.com&gt;
 <br>An: 
 </div>'
     assert_equal(result, html.html2html_strict)

+ 42 - 105
test/unit/cache_test.rb

@@ -3,112 +3,49 @@ require 'test_helper'
 
 class CacheTest < ActiveSupport::TestCase
   test 'cache' do
-    tests = [
 
-      # test 1
-      {
-        set: {
-          key: '123',
-          data: {
-            key: 'some value',
-          }
-        },
-        verify: {
-          key: '123',
-          data: {
-            key: 'some value',
-          }
-        },
-      },
-
-      # test 2
-      {
-        set: {
-          key: '123',
-          data: {
-            key: 'some valueöäüß',
-          }
-        },
-        verify: {
-          key: '123',
-          data: {
-            key: 'some valueöäüß',
-          }
-        },
-      },
-
-      # test 3
-      {
-        delete: {
-          key: '123',
-        },
-        verify: {
-          key: '123',
-          data: nil
-        },
-      },
-
-      # test 4
-      {
-        set: {
-          key: '123',
-          data: {
-            key: 'some valueöäüß2',
-          }
-        },
-        verify: {
-          key: '123',
-          data: {
-            key: 'some valueöäüß2',
-          }
-        },
-      },
-
-      # test 5
-      {
-        cleanup: true,
-        verify: {
-          key: '123',
-          data: nil
-        },
-      },
-
-      # test 6
-      {
-        set: {
-          key: '123',
-          data: {
-            key: 'some valueöäüß2',
-          },
-          param: {
-            expires_in: 3.seconds,
-          }
-        },
-        sleep: 5,
-        verify: {
-          key: '123',
-          data: nil
-        },
-      },
-    ]
-    tests.each { |test|
-      if test[:set]
-        Cache.write(test[:set], test[:set][:data])
-      end
-      if test[:delete]
-        Cache.delete(test[:delete][:key])
-      end
-      if test[:cleanup]
-        Cache.clear
-      end
-      if test[:sleep]
-        sleep test[:sleep]
-      end
-      if test[:verify]
-        cache = Cache.get(test[:verify])
-        assert_equal(cache, test[:verify][:data], 'verify')
-      end
-    }
+    # test 1
+    Cache.write('123', 'some value')
+    cache = Cache.get('123')
+    assert_equal(cache, 'some value')
+
+    Cache.write('123', { key: 'some value' })
+    cache = Cache.get('123')
+    assert_equal(cache, { key: 'some value' })
+
+    # test 2
+    Cache.write('123', { key: 'some valueöäüß' })
+    cache = Cache.get('123')
+    assert_equal(cache, { key: 'some valueöäüß' })
+
+    # test 3
+    Cache.delete('123')
+    cache = Cache.get('123')
+    assert_nil(cache)
+
+    # test 4
+    Cache.write('123', { key: 'some valueöäüß2' })
+    cache = Cache.get('123')
+    assert_equal(cache, { key: 'some valueöäüß2' })
+
+    Cache.delete('123')
+    cache = Cache.get('123')
+    assert_nil(cache)
+
+    # test 5
+    Cache.clear
+    cache = Cache.get('123')
+    assert_nil(cache)
+
+    Cache.delete('123')
+    cache = Cache.get('123')
+    assert_nil(cache)
+
+    # test 6
+    Cache.write('123', { key: 'some valueöäüß2' }, expires_in: 3.seconds)
+    sleep 5
+    cache = Cache.get('123')
+    assert_nil(cache)
   end
 
   # verify if second cache write overwrite first one

+ 1 - 1
test/unit/email_parser_test.rb

@@ -552,7 +552,7 @@ Newsletter abbestellen (<a href="http://newsletters.cylex.de/ref/www.cylex.de/si
       },
       {
         data: IO.binread('test/fixtures/mail19.box'),
-        body_md5: '29a8a50c2931346296f8b8fe782e115c',
+        body_md5: '0a9da3fd3da7a5779fb711fe04818ccd',
         params: {
           from: '"我" <>',
           from_email: '"我" <>',

+ 2 - 2
test/unit/html_sanitizer_test.rb

@@ -48,7 +48,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
     assert_equal(HtmlSanitizer.strict('<DIV STYLE="background-image: url(javascript:alert(\'XSS\'), \'\')">'), '<div></div>')
     assert_equal(HtmlSanitizer.strict('<a href="/some/path">test</a>'), '<a href="/some/path">test</a>')
     assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>'), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">test</a>')
-    assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">https://some/path</a> (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
+    assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), 'https://some/path (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
     assert_equal(HtmlSanitizer.strict('<XML ID="xss"><I><B><IMG SRC="javas<!-- -->cript:alert(\'XSS\')"></B></I></XML>'), '<i><b></b></i>')
     assert_equal(HtmlSanitizer.strict('<IMG SRC="javas<!-- -->cript:alert(\'XSS\')">'), '')
     assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), '  +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
@@ -56,7 +56,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
     assert_equal(HtmlSanitizer.strict('<A HREF="h
 tt  p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<A HREF="h
-tt  p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">http://66.000146.0x7.147/</a> (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
+tt  p://6 6.000146.0x7.147/">XSS</A>', true), 'h%0Att%20%20p://6%206.000146.0x7.147/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
     assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
     assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '//www.google.com/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
     assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')