Browse Source

Improved html sanitizer with a tags without href attributes.

Martin Edenhofer 8 years ago
parent
commit
6d808ff3eb
2 changed files with 12 additions and 2 deletions
  1. 8 2
      lib/html_sanitizer.rb
  2. 4 0
      test/unit/aaa_string_test.rb

+ 8 - 2
lib/html_sanitizer.rb

@@ -145,7 +145,10 @@ satinize html string based on whiltelist
 
       # check if href is different to text
       if external && node.name == 'a' && !url_same?(node['href'], node.text)
-        if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
+        if node['href'].blank?
+          node.replace strict(node.children.to_s)
+          Loofah::Scrubber::STOP
+        elsif node.children.empty? || node.children.first.class == Nokogiri::XML::Text
           text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
           node.add_previous_sibling(text)
           node['href'] = cleanup_target(node.text)
@@ -257,7 +260,10 @@ cleanup html string:
 
       # check if href is different to text
       if node.name == 'a' && !url_same?(node['href'], node.text)
-        if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
+        if node['href'].blank?
+          node.replace cleanup_structure(node.children.to_s)
+          Loofah::Scrubber::STOP
+        elsif node.children.empty? || node.children.first.class == Nokogiri::XML::Text
           text = Nokogiri::XML::Text.new("#{node.text} (", node.document)
           node.add_previous_sibling(text)
           node.content = cleanup_target(node['href'])

+ 4 - 0
test/unit/aaa_string_test.rb

@@ -620,6 +620,10 @@ Men-----------------------'
     result = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders (<a href=\"http://newsletters.cylex.de/\" rel=\"nofollow\" target=\"_blank\">http://newsletters.cylex.de/</a>) in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>"
     assert_equal(result, html.html2html_strict)
 
+    html   = '<a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>'
+    result = 'Hello Mr Smith,'
+    assert_equal(result, html.html2html_strict)
+
     html   = "<div>
 abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>"
     result = "<div>abc<span class=\"js-signatureMarker\"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"