html_sanitizer.rb 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. class HtmlSanitizer
  2. def self.strict(string)
  3. # config
  4. tags_remove_content = Rails.configuration.html_sanitizer_tags_remove_content
  5. tags_whitelist = Rails.configuration.html_sanitizer_tags_whitelist
  6. attributes_whitelist = Rails.configuration.html_sanitizer_attributes_whitelist
  7. css_properties_whitelist = Rails.configuration.html_sanitizer_css_properties_whitelist
  8. scrubber = Loofah::Scrubber.new do |node|
  9. # remove tags with subtree
  10. if tags_remove_content.include?(node.name)
  11. node.remove
  12. end
  13. # replace tags, keep subtree
  14. if !tags_whitelist.include?(node.name)
  15. traversal(node, scrubber)
  16. end
  17. # prepare src attribute
  18. if node['src']
  19. src = cleanup(node['src'])
  20. if src =~ /(javascript|livescript|vbscript):/i || src.start_with?('http', 'ftp', '//')
  21. traversal(node, scrubber)
  22. end
  23. end
  24. # clean style / only use allowed style properties
  25. if node['style']
  26. pears = node['style'].downcase.gsub(/\t|\n|\r/, '').split(';')
  27. style = ''
  28. pears.each { |pear|
  29. prop = pear.split(':')
  30. next if !prop[0]
  31. key = prop[0].strip
  32. next if !css_properties_whitelist.include?(key)
  33. style += "#{pear};"
  34. }
  35. node['style'] = style
  36. if style == ''
  37. node.delete('style')
  38. end
  39. end
  40. # scan for invalid link content
  41. %w(href style).each { |attribute_name|
  42. next if !node[attribute_name]
  43. href = cleanup(node[attribute_name])
  44. next if href !~ /(javascript|livescript|vbscript):/i
  45. node.delete(attribute_name)
  46. }
  47. # remove attributes if not whitelisted
  48. node.each { |attribute, _value|
  49. attribute_name = attribute.downcase
  50. next if attributes_whitelist[:all].include?(attribute_name) || (attributes_whitelist[node.name] && attributes_whitelist[node.name].include?(attribute_name))
  51. node.delete(attribute)
  52. }
  53. # prepare links
  54. if node['href']
  55. href = cleanup(node['href'])
  56. next if !href.start_with?('http', 'ftp', '//')
  57. node.set_attribute('rel', 'nofollow')
  58. node.set_attribute('target', '_blank')
  59. end
  60. end
  61. Loofah.fragment(string).scrub!(scrubber).to_s
  62. end
  63. def self.traversal(node, scrubber)
  64. node.children.each { |child|
  65. if child.class == Nokogiri::XML::CDATA
  66. node.before Nokogiri::XML::Text.new(node.content, node.document)
  67. else
  68. node.before Loofah.fragment(child.to_s).scrub!(scrubber)
  69. end
  70. }
  71. node.remove
  72. end
  73. def self.cleanup(string)
  74. string.downcase.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
  75. end
  76. private_class_method :traversal
  77. private_class_method :cleanup
  78. end