wipe.rb 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. # Copyright (C) 2012-2025 Zammad Foundation, https://zammad-foundation.org/
  2. class HtmlSanitizer
  3. module Scrubber
  4. class Wipe < Base
  5. attr_reader :remote_content_removed
  6. def initialize # rubocop:disable Lint/MissingSuper
  7. @direction = :bottom_up
  8. @remote_content_removed = false
  9. end
  10. def scrub(node)
  11. return STOP if clear_tags_allowlist(node)
  12. return STOP if remove_unsafe_src(node)
  13. clear_css_classes(node)
  14. move_attrs_to_css(node)
  15. clear_style(node)
  16. remove_invalid_links(node)
  17. remove_attributes_not_in_allowlist(node)
  18. end
  19. private
  20. def remove_attributes_not_in_allowlist(node)
  21. node.each do |attribute, _value| # rubocop:disable Style/HashEachMethods
  22. attribute_name = attribute.downcase
  23. next if attributes_allowlist[:all].include?(attribute_name) || attributes_allowlist[node.name]&.include?(attribute_name)
  24. node.delete(attribute)
  25. end
  26. end
  27. def remove_invalid_links(node)
  28. %w[href style].each do |attribute_name|
  29. next if !node[attribute_name]
  30. href = cleanup_target(node[attribute_name])
  31. next if !href.match?(%r{(javascript|livescript|vbscript):}i)
  32. node.delete(attribute_name)
  33. end
  34. end
  35. def clear_style(node)
  36. return if !node['style']
  37. style = clear_style_pairs(node)
  38. .each_with_object('') do |elem, memo|
  39. memo << "#{elem};" if clear_style_pair_valid?(node, elem)
  40. end
  41. node['style'] = style
  42. node.delete('style') if style.blank?
  43. end
  44. def clear_style_pairs(node)
  45. node['style'].downcase.gsub(%r{\t|\n|\r}, '').split(';')
  46. end
  47. def clear_style_pair_valid?(node, pair)
  48. prop = pair.split(':')
  49. return if prop.first.blank?
  50. return if !clear_style_allowed?(node, prop)
  51. return if clear_style_blocked?(node, pair)
  52. true
  53. end
  54. def clear_style_allowed?(node, prop)
  55. return if css_properties_allowlist.exclude?(node.name)
  56. return if css_properties_allowlist[node.name].exclude?(prop.first.strip)
  57. true
  58. end
  59. def clear_style_blocked?(node, pair)
  60. css_values_blocklist[node.name]&.include?(pair.gsub(%r{[[:space:]]}, '').strip)
  61. end
  62. def move_attrs_to_css(node)
  63. attributes_2_css.each do |key|
  64. next if !node[key]
  65. value = node[key]
  66. node.delete(key)
  67. next if value.blank?
  68. next if node_has_css?(node, key)
  69. node_set_style(node, key, value)
  70. end
  71. end
  72. def node_has_css?(node, key)
  73. return false if node['style'].blank?
  74. return false if node['style'].split(';').blank?
  75. node['style'].split(';').filter_map { |attr| attr.split(':')&.first&.strip }.include?(key)
  76. end
  77. def node_init_style(node)
  78. if node['style'].blank?
  79. node['style'] = ''
  80. else
  81. node['style'] += ';'
  82. end
  83. end
  84. def node_set_style(node, key, value)
  85. node_init_style(node)
  86. value += 'px' if !value.match?(%r{%|px|em}i)
  87. node['style'] += "#{key}:#{value}"
  88. end
  89. def clear_css_classes(node)
  90. return if !node['class']
  91. classes = node['class'].gsub(%r{\t|\n|\r}, '').split
  92. class_new = ''
  93. classes.each do |local_class|
  94. next if classes_allowlist.exclude?(local_class.to_s.strip)
  95. if class_new != ''
  96. class_new += ' '
  97. end
  98. class_new += local_class
  99. end
  100. if class_new == ''
  101. node.delete('class')
  102. else
  103. node['class'] = class_new
  104. end
  105. end
  106. def remove_unsafe_src(node)
  107. return if !node['src']
  108. src = cleanup_target(CGI.unescape(node['src']))
  109. return if src !~ %r{(javascript|livescript|vbscript):}i && !src.downcase.start_with?('http', 'ftp', '//')
  110. node.remove
  111. @remote_content_removed = true if !src.match?(%r{javascript|livescript|vbscript:}i)
  112. true
  113. end
  114. def clear_tags_allowlist(node)
  115. return if tags_allowlist.include?(node.name)
  116. node.before(node.children)
  117. node.remove
  118. true
  119. end
  120. def tags_allowlist
  121. @tags_allowlist ||= Rails.configuration.html_sanitizer_tags_allowlist
  122. end
  123. def attributes_allowlist
  124. @attributes_allowlist ||= Rails.configuration.html_sanitizer_attributes_allowlist
  125. end
  126. def css_properties_allowlist
  127. @css_properties_allowlist ||= Rails.configuration.html_sanitizer_css_properties_allowlist
  128. end
  129. def css_values_blocklist
  130. @css_values_blocklist ||= Rails.application.config.html_sanitizer_css_values_blocklist
  131. end
  132. # We allowlist yahoo_quoted because Yahoo Mail marks quoted email content using
  133. # <div class='yahoo_quoted'> and we rely on this class to identify quoted messages
  134. def classes_allowlist
  135. %w[js-signatureMarker yahoo_quoted]
  136. end
  137. def attributes_2_css
  138. %w[width height]
  139. end
  140. def cleanup_target(string, **options)
  141. cleaned_string = string.utf8_encode(fallback: :read_as_sanitized_binary)
  142. cleaned_string = cleaned_string.gsub(%r{[[:space:]]}, '') if !options[:keep_spaces]
  143. cleaned_string = cleaned_string.strip
  144. .delete("\t\n\r\u0000")
  145. .gsub(%r{/\*.*?\*/}, '')
  146. .gsub(%r{<!--.*?-->}, '')
  147. sanitize_attachment_disposition(cleaned_string)
  148. end
  149. def sanitize_attachment_disposition(url)
  150. @fqdn ||= Setting.get('fqdn')
  151. uri = URI(url)
  152. if uri.host == @fqdn && uri.query.present?
  153. params = CGI.parse(uri.query || '')
  154. .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
  155. uri.query = URI.encode_www_form(params)
  156. end
  157. uri.to_s
  158. rescue
  159. url
  160. end
  161. end
  162. end
  163. end