2 лет назад · 455e6b79b3
--- a/lib/html_sanitizer.rb
+++ b/lib/html_sanitizer.rb
@@ -1,8 +1,7 @@
 
				 # Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				 
			
 
				 class HtmlSanitizer
			
 
				-  LINKABLE_URL_SCHEMES = URI.scheme_list.keys.map(&:downcase) - ['mailto'] + ['tel']
			
 
				-  PROCESSING_TIMEOUT = 20
			
 
				+  PROCESSING_TIMEOUT     = 20.seconds
			
 
				   UNPROCESSABLE_HTML_MSG = __('This message cannot be displayed due to HTML processing issues. Download the raw message below and open it via an Email client if you still wish to view it.').freeze
			
 
				 
			
 
				 =begin
			
@@ -14,201 +13,7 @@ sanitize html string based on whiltelist
 
				 =end
			
 
				 
			
 
				   def self.strict(string, external = false, timeout: true)
			
 
				-    Timeout.timeout(timeout ? PROCESSING_TIMEOUT : nil) do
			
 
				-      @fqdn              = Setting.get('fqdn')
			
 
				-      http_type          = Setting.get('http_type')
			
 
				-      web_app_url_prefix = "#{http_type}://#{@fqdn}/\#".downcase
			
 
				-
			
 
				-      # config
			
 
				-      tags_remove_content = Rails.configuration.html_sanitizer_tags_remove_content
			
 
				-      tags_quote_content = Rails.configuration.html_sanitizer_tags_quote_content
			
 
				-      tags_allowlist = Rails.configuration.html_sanitizer_tags_allowlist
			
 
				-      attributes_allowlist = Rails.configuration.html_sanitizer_attributes_allowlist
			
 
				-      css_properties_allowlist = Rails.configuration.html_sanitizer_css_properties_allowlist
			
 
				-      css_values_blocklist = Rails.application.config.html_sanitizer_css_values_blocklist
			
 
				-
			
 
				-      # We allowlist yahoo_quoted because Yahoo Mail marks quoted email content using
			
 
				-      # <div class='yahoo_quoted'> and we rely on this class to identify quoted messages
			
 
				-      classes_allowlist = %w[js-signatureMarker yahoo_quoted]
			
 
				-      attributes_2_css = %w[width height]
			
 
				-
			
 
				-      # remove tags with subtree
			
 
				-      scrubber_tag_remove = Loofah::Scrubber.new do |node|
			
 
				-        next if tags_remove_content.exclude?(node.name)
			
 
				-
			
 
				-        node.remove
			
 
				-        Loofah::Scrubber::STOP
			
 
				-      end
			
 
				-      string = Loofah.fragment(string).scrub!(scrubber_tag_remove).to_s
			
 
				-
			
 
				-      # remove tag, insert quoted content
			
 
				-      scrubber_wipe_quote_content = Loofah::Scrubber.new do |node|
			
 
				-        next if tags_quote_content.exclude?(node.name)
			
 
				-
			
 
				-        string = html_decode(node.content)
			
 
				-        text = Nokogiri::XML::Text.new(string, node.document)
			
 
				-        node.add_next_sibling(text)
			
 
				-        node.remove
			
 
				-        Loofah::Scrubber::STOP
			
 
				-      end
			
 
				-      string = Loofah.fragment(string).scrub!(scrubber_wipe_quote_content).to_s
			
 
				-
			
 
				-      scrubber_wipe = Loofah::Scrubber.new do |node|
			
 
				-
			
 
				-        # replace tags, keep subtree
			
 
				-        if tags_allowlist.exclude?(node.name)
			
 
				-          node.replace node.children.to_s
			
 
				-          Loofah::Scrubber::STOP
			
 
				-        end
			
 
				-
			
 
				-        # prepare src attribute
			
 
				-        if node['src']
			
 
				-          src = cleanup_target(CGI.unescape(node['src']))
			
 
				-          if src =~ %r{(javascript|livescript|vbscript):}i || src.downcase.start_with?('http', 'ftp', '//')
			
 
				-            node.remove
			
 
				-            Loofah::Scrubber::STOP
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        # clean class / only use allowed classes
			
 
				-        if node['class']
			
 
				-          classes = node['class'].gsub(%r{\t|\n|\r}, '').split
			
 
				-          class_new = ''
			
 
				-          classes.each do |local_class|
			
 
				-            next if classes_allowlist.exclude?(local_class.to_s.strip)
			
 
				-
			
 
				-            if class_new != ''
			
 
				-              class_new += ' '
			
 
				-            end
			
 
				-            class_new += local_class
			
 
				-          end
			
 
				-          if class_new == ''
			
 
				-            node.delete('class')
			
 
				-          else
			
 
				-            node['class'] = class_new
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        # move style attributes to css attributes
			
 
				-        attributes_2_css.each do |key|
			
 
				-          next if !node[key]
			
 
				-
			
 
				-          if node['style'].blank?
			
 
				-            node['style'] = ''
			
 
				-          else
			
 
				-            node['style'] += ';'
			
 
				-          end
			
 
				-          value = node[key]
			
 
				-          node.delete(key)
			
 
				-          next if value.blank?
			
 
				-
			
 
				-          value += 'px' if !value.match?(%r{%|px|em}i)
			
 
				-          node['style'] += "#{key}:#{value}"
			
 
				-        end
			
 
				-
			
 
				-        # clean style / only use allowed style properties
			
 
				-        if node['style']
			
 
				-          pears = node['style'].downcase.gsub(%r{\t|\n|\r}, '').split(';')
			
 
				-          style = ''
			
 
				-          pears.each do |local_pear|
			
 
				-            prop = local_pear.split(':')
			
 
				-            next if !prop[0]
			
 
				-
			
 
				-            key = prop[0].strip
			
 
				-            next if css_properties_allowlist.exclude?(node.name)
			
 
				-            next if css_properties_allowlist[node.name].exclude?(key)
			
 
				-            next if css_values_blocklist[node.name]&.include?(local_pear.gsub(%r{[[:space:]]}, '').strip)
			
 
				-
			
 
				-            style += "#{local_pear};"
			
 
				-          end
			
 
				-          node['style'] = style
			
 
				-          if style == ''
			
 
				-            node.delete('style')
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        # scan for invalid link content
			
 
				-        %w[href style].each do |attribute_name|
			
 
				-          next if !node[attribute_name]
			
 
				-
			
 
				-          href = cleanup_target(node[attribute_name])
			
 
				-          next if !href.match?(%r{(javascript|livescript|vbscript):}i)
			
 
				-
			
 
				-          node.delete(attribute_name)
			
 
				-        end
			
 
				-
			
 
				-        # remove attributes if not allowlisted
			
 
				-        node.each do |attribute, _value|
			
 
				-          attribute_name = attribute.downcase
			
 
				-          next if attributes_allowlist[:all].include?(attribute_name) || attributes_allowlist[node.name]&.include?(attribute_name)
			
 
				-
			
 
				-          node.delete(attribute)
			
 
				-        end
			
 
				-
			
 
				-      end
			
 
				-
			
 
				-      done = true
			
 
				-      while done
			
 
				-        new_string = Loofah.fragment(string).scrub!(scrubber_wipe).to_s
			
 
				-        if string == new_string
			
 
				-          done = false
			
 
				-        end
			
 
				-        string = new_string
			
 
				-      end
			
 
				-
			
 
				-      scrubber_link = Loofah::Scrubber.new do |node|
			
 
				-
			
 
				-        # wrap plain-text URLs in <a> tags
			
 
				-        if node.is_a?(Nokogiri::XML::Text) && node.content.present? && node.content.include?(':')
			
 
				-          node_ancestor_names = node.ancestors.map(&:name)
			
 
				-          if node_ancestor_names.exclude?('a') && node_ancestor_names.exclude?('pre')
			
 
				-            urls = URI.extract(node.content, LINKABLE_URL_SCHEMES)
			
 
				-                    .map { |u| u.sub(%r{[,.]$}, '') } # URI::extract captures trailing dots/commas
			
 
				-                    .grep_v(%r{^[^:]+:$}) # URI::extract will match, e.g., 'tel:'
			
 
				-
			
 
				-            next if urls.blank?
			
 
				-
			
 
				-            add_link(node.content, urls, node)
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        # prepare links
			
 
				-        if node['href']
			
 
				-          href                = cleanup_target(node['href'], keep_spaces: true)
			
 
				-          href_without_spaces = href.gsub(%r{[[:space:]]}, '')
			
 
				-          if external && href_without_spaces.present? && !href_without_spaces.downcase.start_with?('mailto:') && !href_without_spaces.downcase.start_with?('//') && href_without_spaces.downcase !~ %r{^.{1,6}://.+?}
			
 
				-            node['href']        = "http://#{node['href']}"
			
 
				-            href                = node['href']
			
 
				-            href_without_spaces = href.gsub(%r{[[:space:]]}, '')
			
 
				-          end
			
 
				-
			
 
				-          next if !CGI.unescape(href_without_spaces).utf8_encode(fallback: :read_as_sanitized_binary).gsub(%r{[[:space:]]}, '').downcase.start_with?('http', 'ftp', '//')
			
 
				-
			
 
				-          node.set_attribute('href', href)
			
 
				-          node.set_attribute('rel', 'nofollow noreferrer noopener')
			
 
				-
			
 
				-          # do not "target=_blank" WebApp URLs (e.g. mentions)
			
 
				-          if !href.downcase.start_with?(web_app_url_prefix)
			
 
				-            node.set_attribute('target', '_blank')
			
 
				-          end
			
 
				-        end
			
 
				-
			
 
				-        if node.name == 'a' && node['href'].blank?
			
 
				-          node.replace node.children.to_s
			
 
				-          Loofah::Scrubber::STOP
			
 
				-        end
			
 
				-
			
 
				-        # check if href is different to text
			
 
				-        if node.name == 'a' && !url_same?(node['href'], node.text) && node['title'].blank?
			
 
				-          node['title'] = node['href']
			
 
				-        end
			
 
				-      end
			
 
				-
			
 
				-      Loofah.fragment(string).scrub!(scrubber_link).to_s
			
 
				-    end
			
 
				-  rescue Timeout::Error
			
 
				-    Rails.logger.error "Could not process string via HtmlSanitizer.strict in #{PROCESSING_TIMEOUT} seconds. Current state: #{string}"
			
 
				-    UNPROCESSABLE_HTML_MSG
			
 
				+    HtmlSanitizer::Strict.new.sanitize(string, external: external, timeout: timeout)
			
 
				   end
			
 
				 
			
 
				 =begin
			
@@ -223,191 +28,7 @@ cleanup html string:
 
				 =end
			
 
				 
			
 
				   def self.cleanup(string, timeout: true)
			
 
				-    Timeout.timeout(timeout ? PROCESSING_TIMEOUT : nil) do
			
 
				-      string.gsub!(%r{<[A-z]:[A-z]>}, '')
			
 
				-      string.gsub!(%r{</[A-z]:[A-z]>}, '')
			
 
				-      string.delete!("\t")
			
 
				-
			
 
				-      # remove all new lines
			
 
				-      string.gsub!(%r{(\n\r|\r\r\n|\r\n|\n)}, "\n")
			
 
				-
			
 
				-      # remove double multiple empty lines
			
 
				-      string.gsub!(%r{\n\n\n+}, "\n\n")
			
 
				-
			
 
				-      string = cleanup_structure(string, 'pre')
			
 
				-      string = cleanup_structure(string)
			
 
				-      string
			
 
				-    end
			
 
				-  rescue Timeout::Error
			
 
				-    Rails.logger.error "Could not process string via HtmlSanitizer.cleanup in #{PROCESSING_TIMEOUT} seconds. Current state: #{string}"
			
 
				-    UNPROCESSABLE_HTML_MSG
			
 
				-  end
			
 
				-
			
 
				-  def self.remove_last_empty_node(node, remove_empty_nodes, remove_empty_last_nodes)
			
 
				-    if node.children.present?
			
 
				-      if node.children.size == 1
			
 
				-        local_name = node.name
			
 
				-        child = node.children.first
			
 
				-
			
 
				-        # replace not needed node (parent <- child)
			
 
				-        if local_name == child.name && node.attributes.present? && node.children.first.attributes.blank?
			
 
				-          local_node_child = node.children.first
			
 
				-          node.attributes.each do |k|
			
 
				-            local_node_child.set_attribute(k[0], k[1])
			
 
				-          end
			
 
				-          node.replace local_node_child.to_s
			
 
				-          Loofah::Scrubber::STOP
			
 
				-
			
 
				-        # replace not needed node (parent replace with child node)
			
 
				-        elsif (local_name == 'span' || local_name == child.name) && node.attributes.blank?
			
 
				-          node.replace node.children.to_s
			
 
				-          Loofah::Scrubber::STOP
			
 
				-        end
			
 
				-      else
			
 
				-
			
 
				-        # loop through nodes
			
 
				-        node.children.each do |local_node|
			
 
				-          remove_last_empty_node(local_node, remove_empty_nodes, remove_empty_last_nodes)
			
 
				-        end
			
 
				-      end
			
 
				-    # remove empty nodes
			
 
				-    elsif (remove_empty_nodes.include?(node.name) || remove_empty_last_nodes.include?(node.name)) && node.content.blank? && node.attributes.blank?
			
 
				-      node.remove
			
 
				-      Loofah::Scrubber::STOP
			
 
				-    end
			
 
				-  end
			
 
				-
			
 
				-  def self.cleanup_structure(string, type = 'all')
			
 
				-    remove_empty_nodes = if type == 'pre'
			
 
				-                           %w[span]
			
 
				-                         else
			
 
				-                           %w[p div span small table]
			
 
				-                         end
			
 
				-    remove_empty_last_nodes = %w[b i u small table]
			
 
				-
			
 
				-    # remove last empty nodes and empty -not needed- parrent nodes
			
 
				-    scrubber_structure = Loofah::Scrubber.new do |node|
			
 
				-      remove_last_empty_node(node, remove_empty_nodes, remove_empty_last_nodes)
			
 
				-    end
			
 
				-
			
 
				-    done = true
			
 
				-    while done
			
 
				-      new_string = Loofah.fragment(string).scrub!(scrubber_structure).to_s
			
 
				-      if string == new_string
			
 
				-        done = false
			
 
				-      end
			
 
				-      string = new_string
			
 
				-    end
			
 
				-
			
 
				-    scrubber_cleanup = Loofah::Scrubber.new do |node|
			
 
				-
			
 
				-      # remove not needed new lines
			
 
				-      if node.instance_of?(Nokogiri::XML::Text)
			
 
				-        if !node.parent || (node.parent.name != 'pre' && node.parent.name != 'code') # rubocop:disable Style/SoleNestedConditional
			
 
				-          content = node.content
			
 
				-          if content
			
 
				-            if content != ' ' && content != "\n"
			
 
				-              content.gsub!(%r{[[:space:]]+}, ' ')
			
 
				-            end
			
 
				-            if node.previous
			
 
				-              if node.previous.name == 'div' || node.previous.name == 'p'
			
 
				-                content.strip!
			
 
				-              end
			
 
				-            elsif node.parent && !node.previous && (!node.next || node.next.name == 'div' || node.next.name == 'p' || node.next.name == 'br')
			
 
				-              if (node.parent.name == 'div' || node.parent.name == 'p') && content != ' ' && content != "\n"
			
 
				-                content.strip!
			
 
				-              end
			
 
				-            end
			
 
				-            node.content = content
			
 
				-          end
			
 
				-        end
			
 
				-      end
			
 
				-    end
			
 
				-    Loofah.fragment(string).scrub!(scrubber_cleanup).to_s
			
 
				-  end
			
 
				-
			
 
				-  def self.add_link(content, urls, node)
			
 
				-    if urls.blank?
			
 
				-      text = Nokogiri::XML::Text.new(content, node.document)
			
 
				-      node.add_next_sibling(text)
			
 
				-      return
			
 
				-    end
			
 
				-    url = urls.shift
			
 
				-
			
 
				-    if content =~ %r{^(.*)#{Regexp.quote(url)}(.*)$}mx
			
 
				-      pre = $1
			
 
				-      post = $2
			
 
				-
			
 
				-      if url.match?(%r{^www}i)
			
 
				-        url = "http://#{url}"
			
 
				-      end
			
 
				-
			
 
				-      a = Nokogiri::XML::Node.new 'a', node.document
			
 
				-      a['href'] = url
			
 
				-      a['rel'] = 'nofollow noreferrer noopener'
			
 
				-      a['target'] = '_blank'
			
 
				-      a.content = url
			
 
				-
			
 
				-      if node.class != Nokogiri::XML::Text
			
 
				-        text = Nokogiri::XML::Text.new(pre, node.document)
			
 
				-        node.add_next_sibling(text).add_next_sibling(a)
			
 
				-        return if post.blank?
			
 
				-
			
 
				-        add_link(post, urls, a)
			
 
				-        return
			
 
				-      end
			
 
				-      node.content = pre
			
 
				-      node.add_next_sibling(a)
			
 
				-      return if post.blank?
			
 
				-
			
 
				-      add_link(post, urls, a)
			
 
				-    end
			
 
				-
			
 
				-    true
			
 
				-  end
			
 
				-
			
 
				-  def self.html_decode(string)
			
 
				-    string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ')
			
 
				-  end
			
 
				-
			
 
				-  def self.cleanup_target(string, **options)
			
 
				-    cleaned_string = string.utf8_encode(fallback: :read_as_sanitized_binary)
			
 
				-    cleaned_string = cleaned_string.gsub(%r{[[:space:]]}, '') if !options[:keep_spaces]
			
 
				-    cleaned_string = cleaned_string.strip
			
 
				-                                   .delete("\t\n\r\u0000")
			
 
				-                                   .gsub(%r{/\*.*?\*/}, '')
			
 
				-                                   .gsub(%r{<!--.*?-->}, '')
			
 
				-
			
 
				-    sanitize_attachment_disposition(cleaned_string)
			
 
				-  end
			
 
				-
			
 
				-  def self.sanitize_attachment_disposition(url)
			
 
				-    @fqdn ||= Setting.get('fqdn')
			
 
				-    uri = URI(url)
			
 
				-
			
 
				-    if uri.host == @fqdn && uri.query.present?
			
 
				-      params = CGI.parse(uri.query || '')
			
 
				-                  .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
			
 
				-      uri.query = URI.encode_www_form(params)
			
 
				-    end
			
 
				-
			
 
				-    uri.to_s
			
 
				-  rescue
			
 
				-    url
			
 
				-  end
			
 
				-
			
 
				-  def self.url_same?(url_new, url_old)
			
 
				-    url_new = CGI.unescape(url_new.to_s).utf8_encode(fallback: :read_as_sanitized_binary).downcase.delete_suffix('/').gsub(%r{[[:space:]]|\t|\n|\r}, '').strip
			
 
				-    url_old = CGI.unescape(url_old.to_s).utf8_encode(fallback: :read_as_sanitized_binary).downcase.delete_suffix('/').gsub(%r{[[:space:]]|\t|\n|\r}, '').strip
			
 
				-    url_new = html_decode(url_new).sub('/?', '?')
			
 
				-    url_old = html_decode(url_old).sub('/?', '?')
			
 
				-    return true if url_new == url_old
			
 
				-    return true if url_old == "http://#{url_new}"
			
 
				-    return true if url_new == "http://#{url_old}"
			
 
				-    return true if url_old == "https://#{url_new}"
			
 
				-    return true if url_new == "https://#{url_old}"
			
 
				-
			
 
				-    false
			
 
				+    HtmlSanitizer::Cleanup.new.sanitize(string, timeout: timeout)
			
 
				   end
			
 
				 
			
 
				 =begin
			
@@ -419,36 +40,7 @@ replace inline images with cid images
 
				 =end
			
 
				 
			
 
				   def self.replace_inline_images(string, prefix = SecureRandom.uuid)
			
 
				-    fqdn = Setting.get('fqdn')
			
 
				-    attachments_inline = []
			
 
				-    filename_counter = 0
			
 
				-    scrubber = Loofah::Scrubber.new do |node|
			
 
				-      if node.name == 'img'
			
 
				-        if node['src'] && node['src'] =~ %r{^(data:image/(jpeg|png);base64,.+?)$}i
			
 
				-          filename_counter += 1
			
 
				-          file_attributes = StaticAssets.data_url_attributes($1)
			
 
				-          cid = "#{prefix}.#{SecureRandom.uuid}@#{fqdn}"
			
 
				-          filename = cid
			
 
				-          if file_attributes[:file_extention].present?
			
 
				-            filename = "image#{filename_counter}.#{file_attributes[:file_extention]}"
			
 
				-          end
			
 
				-          attachment = {
			
 
				-            data:        file_attributes[:content],
			
 
				-            filename:    filename,
			
 
				-            preferences: {
			
 
				-              'Content-Type'        => file_attributes[:mime_type],
			
 
				-              'Mime-Type'           => file_attributes[:mime_type],
			
 
				-              'Content-ID'          => cid,
			
 
				-              'Content-Disposition' => 'inline',
			
 
				-            },
			
 
				-          }
			
 
				-          attachments_inline.push attachment
			
 
				-          node['src'] = "cid:#{cid}"
			
 
				-        end
			
 
				-        Loofah::Scrubber::STOP
			
 
				-      end
			
 
				-    end
			
 
				-    [Loofah.fragment(string).scrub!(scrubber).to_s, attachments_inline]
			
 
				+    HtmlSanitizer::ReplaceInlineImages.new.sanitize(string, prefix)
			
 
				   end
			
 
				 
			
 
				 =begin
			
@@ -460,35 +52,7 @@ sanitize style of img tags
 
				 =end
			
 
				 
			
 
				   def self.dynamic_image_size(string)
			
 
				-    scrubber = Loofah::Scrubber.new do |node|
			
 
				-      if node.name == 'img'
			
 
				-        if node['src']
			
 
				-          style = 'max-width:100%;'
			
 
				-          if node['style']
			
 
				-            pears = node['style'].downcase.gsub(%r{\t|\n|\r}, '').split(';')
			
 
				-            pears.each do |local_pear|
			
 
				-              prop = local_pear.split(':')
			
 
				-              next if !prop[0]
			
 
				-
			
 
				-              key = prop[0].strip
			
 
				-              if key == 'height'
			
 
				-                key = 'max-height'
			
 
				-              end
			
 
				-              style += "#{key}:#{prop[1]};"
			
 
				-            end
			
 
				-          end
			
 
				-          node['style'] = style
			
 
				-        end
			
 
				-        Loofah::Scrubber::STOP
			
 
				-      end
			
 
				-    end
			
 
				-    Loofah.fragment(string).scrub!(scrubber).to_s
			
 
				+    HtmlSanitizer::DynamicImageSize.new.sanitize(string)
			
 
				   end
			
 
				 
			
 
				-  private_class_method :cleanup_target
			
 
				-  private_class_method :sanitize_attachment_disposition
			
 
				-  private_class_method :add_link
			
 
				-  private_class_method :url_same?
			
 
				-  private_class_method :html_decode
			
 
				-
			
 
				 end
			
--- a/lib/html_sanitizer/base.rb
+++ b/lib/html_sanitizer/base.rb
@@ -0,0 +1,24 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  class Base
			
 
				+    def with_timeout(string, &block)
			
 
				+      Timeout.timeout(PROCESSING_TIMEOUT, &block)
			
 
				+    rescue Timeout::Error
			
 
				+      Rails.logger.error "Could not process string via #{self.class.name} in #{PROCESSING_TIMEOUT} seconds. Current state: #{string}"
			
 
				+      UNPROCESSABLE_HTML_MSG
			
 
				+    end
			
 
				+
			
 
				+    def loop(string, scrubber)
			
 
				+      old_string = nil
			
 
				+
			
 
				+      while string != old_string
			
 
				+        old_string = string
			
 
				+
			
 
				+        string = Loofah.fragment(string).scrub!(scrubber).to_html
			
 
				+      end
			
 
				+
			
 
				+      string
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/cleanup.rb
+++ b/lib/html_sanitizer/cleanup.rb
@@ -0,0 +1,41 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  class Cleanup < Base
			
 
				+    def sanitize(string, timeout: true)
			
 
				+      return run_sanitization(string) if !timeout
			
 
				+
			
 
				+      with_timeout(string) do
			
 
				+        run_sanitization(string)
			
 
				+      end
			
 
				+    end
			
 
				+
			
 
				+    private
			
 
				+
			
 
				+    def run_sanitization(string)
			
 
				+      string = clean_string(string)
			
 
				+
			
 
				+      string = cleanup_structure(string, 'pre')
			
 
				+
			
 
				+      cleanup_structure(string)
			
 
				+    end
			
 
				+
			
 
				+    def clean_string(input)
			
 
				+      output = input.gsub(%r{<(|/)[A-z]:[A-z]>}, '')
			
 
				+
			
 
				+      output = output.delete("\t")
			
 
				+      # remove all new lines
			
 
				+      output
			
 
				+        .gsub(%r{(\n\r|\r\r\n|\r\n|\n)}, "\n")
			
 
				+        .gsub(%r{\n\n\n+}, "\n\n")
			
 
				+    end
			
 
				+
			
 
				+    def cleanup_structure(string, type = 'all')
			
 
				+      empty_node_scrubber = HtmlSanitizer::Scrubber::RemoveLastEmptyNode.new(type)
			
 
				+
			
 
				+      string = loop(string, empty_node_scrubber)
			
 
				+
			
 
				+      Loofah.fragment(string).scrub!(HtmlSanitizer::Scrubber::Cleanup.new).to_html
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/dynamic_image_size.rb
+++ b/lib/html_sanitizer/dynamic_image_size.rb
@@ -0,0 +1,12 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  class DynamicImageSize
			
 
				+    def sanitize(string)
			
 
				+      Loofah
			
 
				+        .fragment(string)
			
 
				+        .scrub!(HtmlSanitizer::Scrubber::ImageSize.new)
			
 
				+        .to_html
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/replace_inline_images.rb
+++ b/lib/html_sanitizer/replace_inline_images.rb
@@ -0,0 +1,15 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  class ReplaceInlineImages
			
 
				+    def sanitize(string, prefix)
			
 
				+      scrubber = HtmlSanitizer::Scrubber::InlineImages.new(prefix)
			
 
				+
			
 
				+      sanitized = Loofah
			
 
				+        .fragment(string)
			
 
				+        .scrub!(scrubber)
			
 
				+
			
 
				+      [sanitized.to_html, scrubber.attachments_inline]
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/scrubber/base.rb
+++ b/lib/html_sanitizer/scrubber/base.rb
@@ -0,0 +1,23 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  module Scrubber
			
 
				+    class Base < Loofah::Scrubber
			
 
				+      HTML_DECODABLE = {
			
 
				+        '&amp;'  => '&',
			
 
				+        '&lt;'   => '<',
			
 
				+        '&gt;'   => '>',
			
 
				+        '&quot;' => '"',
			
 
				+        '&nbsp;' => ' '
			
 
				+      }.freeze
			
 
				+
			
 
				+      HTML_DECODABLE_REGEXP = Regexp.union(HTML_DECODABLE.keys).freeze
			
 
				+
			
 
				+      protected
			
 
				+
			
 
				+      def html_decode(string)
			
 
				+        string.gsub HTML_DECODABLE_REGEXP, HTML_DECODABLE
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/scrubber/cleanup.rb
+++ b/lib/html_sanitizer/scrubber/cleanup.rb
@@ -0,0 +1,60 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  module Scrubber
			
 
				+    class Cleanup < Base
			
 
				+      def scrub(node)
			
 
				+        return if !node.instance_of?(Nokogiri::XML::Text)
			
 
				+        return if %w[pre code].include? node.parent&.name
			
 
				+
			
 
				+        update_node_content(node)
			
 
				+      end
			
 
				+
			
 
				+      private
			
 
				+
			
 
				+      def update_node_content(node)
			
 
				+        content = node.content
			
 
				+
			
 
				+        return if !content
			
 
				+
			
 
				+        content = remove_space_if_needed(content)
			
 
				+        content = strip_if_needed_previous(node, content)
			
 
				+        content = strip_if_needed_next(node, content)
			
 
				+
			
 
				+        node.content = content
			
 
				+      end
			
 
				+
			
 
				+      def remove_space_if_needed(content)
			
 
				+        return content if space_or_nl?(content)
			
 
				+
			
 
				+        content.gsub(%r{[[:space:]]+}, ' ')
			
 
				+      end
			
 
				+
			
 
				+      def strip_if_needed_previous(node, content)
			
 
				+        return content if !node.previous
			
 
				+        return content if !div_or_p?(node.previous)
			
 
				+
			
 
				+        content.strip
			
 
				+      end
			
 
				+
			
 
				+      def strip_if_needed_next(node, content)
			
 
				+        return content if !node.parent
			
 
				+        return content if node.previous
			
 
				+        return content if node.next && %w[div p br].exclude?(node.next.name)
			
 
				+
			
 
				+        return content if !div_or_p?(node.parent)
			
 
				+        return content if space_or_nl?(content)
			
 
				+
			
 
				+        content.strip
			
 
				+      end
			
 
				+
			
 
				+      def space_or_nl?(string)
			
 
				+        [' ', "\n"].include?(string)
			
 
				+      end
			
 
				+
			
 
				+      def div_or_p?(node)
			
 
				+        %w[div p].include? node.name
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/scrubber/image_size.rb
+++ b/lib/html_sanitizer/scrubber/image_size.rb
@@ -0,0 +1,45 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  module Scrubber
			
 
				+    class ImageSize < Base
			
 
				+      def scrub(node)
			
 
				+        return CONTINUE if node.name != 'img'
			
 
				+
			
 
				+        if node['src']
			
 
				+          update_style(node)
			
 
				+        end
			
 
				+
			
 
				+        STOP
			
 
				+      end
			
 
				+
			
 
				+      private
			
 
				+
			
 
				+      def update_style(node)
			
 
				+        node['style'] = build_style(node['style'])
			
 
				+      end
			
 
				+
			
 
				+      def build_style(input)
			
 
				+        style = 'max-width:100%;'
			
 
				+
			
 
				+        return style if input.blank?
			
 
				+
			
 
				+        input
			
 
				+          .downcase
			
 
				+          .gsub(%r{\t|\n|\r}, '')
			
 
				+          .split(';')
			
 
				+          .each_with_object(style) do |elem, memo|
			
 
				+            key, value = elem.split(':')
			
 
				+
			
 
				+            key.strip!
			
 
				+
			
 
				+            next if key.blank?
			
 
				+
			
 
				+            key = 'max-height' if key == 'height'
			
 
				+
			
 
				+            memo << "#{key}:#{value};"
			
 
				+          end
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/scrubber/inline_images.rb
+++ b/lib/html_sanitizer/scrubber/inline_images.rb
@@ -0,0 +1,70 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  module Scrubber
			
 
				+    class InlineImages < Base
			
 
				+      attr_reader :attachments_inline, :prefix
			
 
				+
			
 
				+      def initialize(prefix = SecureRandom.uuid) # rubocop:disable Lint/MissingSuper
			
 
				+        @direction = :top_down
			
 
				+
			
 
				+        @attachments_inline = []
			
 
				+        @prefix             = prefix
			
 
				+      end
			
 
				+
			
 
				+      def scrub(node)
			
 
				+        return CONTINUE if node.name != 'img'
			
 
				+
			
 
				+        if node['src'] && node['src'] =~ %r{^(data:image/(jpeg|png);base64,.+?)$}i
			
 
				+          process_inline_image(node, $1)
			
 
				+        end
			
 
				+
			
 
				+        STOP
			
 
				+      end
			
 
				+
			
 
				+      private
			
 
				+
			
 
				+      def inline_image_data(src)
			
 
				+        return if src.blank?
			
 
				+
			
 
				+        matchdata = src.match %r{^(data:image/(jpeg|png);base64,.+?)$}i
			
 
				+
			
 
				+        return if !matchdata
			
 
				+
			
 
				+        matchdata[0]
			
 
				+      end
			
 
				+
			
 
				+      def process_inline_image(node, data)
			
 
				+        cid        = generate_cid
			
 
				+        attachment = parse_inline_image(data, cid)
			
 
				+
			
 
				+        @attachments_inline.push attachment
			
 
				+        node['src'] = "cid:#{cid}"
			
 
				+      end
			
 
				+
			
 
				+      def parse_inline_image(data, cid)
			
 
				+        file_attributes = StaticAssets.data_url_attributes(data)
			
 
				+        filename        = "image#{@attachments_inline.length + 1}.#{file_attributes[:file_extention]}"
			
 
				+
			
 
				+        {
			
 
				+          data:        file_attributes[:content],
			
 
				+          filename:    filename,
			
 
				+          preferences: {
			
 
				+            'Content-Type'        => file_attributes[:mime_type],
			
 
				+            'Mime-Type'           => file_attributes[:mime_type],
			
 
				+            'Content-ID'          => cid,
			
 
				+            'Content-Disposition' => 'inline',
			
 
				+          }
			
 
				+        }
			
 
				+      end
			
 
				+
			
 
				+      def generate_cid
			
 
				+        "#{prefix}.#{SecureRandom.uuid}@#{fqdn}"
			
 
				+      end
			
 
				+
			
 
				+      def fqdn
			
 
				+        @fqdn ||= Setting.get('fqdn')
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end
			
--- a/lib/html_sanitizer/scrubber/link.rb
+++ b/lib/html_sanitizer/scrubber/link.rb
@@ -0,0 +1,218 @@
 
				+# Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
			
 
				+
			
 
				+class HtmlSanitizer
			
 
				+  module Scrubber
			
 
				+    class Link < Base
			
 
				+      LINKABLE_URL_SCHEMES = URI.scheme_list.keys.map(&:downcase) - ['mailto'] + ['tel']
			
 
				+
			
 
				+      attr_reader :external, :web_app_url_prefix
			
 
				+
			
 
				+      def initialize(web_app_url_prefix:, external: false) # rubocop:disable Lint/MissingSuper
			
 
				+        @direction = :top_down
			
 
				+
			
 
				+        @external = external
			
 
				+        @web_app_url_prefix = web_app_url_prefix
			
 
				+      end
			
 
				+
			
 
				+      def scrub(node)
			
 
				+        if (urls = node_urls(node))
			
 
				+          return if urls.blank?
			
 
				+
			
 
				+          add_link(node.content, urls, node)
			
 
				+        end
			
 
				+
			
 
				+        # prepare links
			
 
				+        return if href_cleanup(node)
			
 
				+
			
 
				+        return STOP if ensure_href_present(node)
			
 
				+
			
 
				+        update_node_title(node)
			
 
				+      end
			
 
				+
			
 
				+      private
			
 
				+
			
 
				+      def href_cleanup(node)
			
 
				+        return if !node['href']
			
 
				+
			
 
				+        href                = cleanup_target(node['href'], keep_spaces: true)
			
 
				+        href_without_spaces = href.gsub(%r{[[:space:]]}, '')
			
 
				+
			
 
				+        if href_retry_protocol?(href_without_spaces)
			
 
				+          node['href']        = "http://#{node['href']}"
			
 
				+          href                = node['href']
			
 
				+          href_without_spaces = href.gsub(%r{[[:space:]]}, '')
			
 
				+        end
			
 
				+
			
 
				+        return true if !href_starts_with_protocol?(href_without_spaces)
			
 
				+
			
 
				+        href_set_values(node, href)
			
 
				+
			
 
				+        false
			
 
				+      end
			
 
				+
			
 
				+      def href_retry_protocol?(href_without_spaces)
			
 
				+        return if !external
			
 
				+        return if href_without_spaces.blank?
			
 
				+        return if href_without_spaces.downcase.start_with?('mailto:')
			
 
				+        return if href_without_spaces.downcase.start_with?('//')
			
 
				+        return if href_without_spaces.downcase.match? %r{^.{1,6}://.+?}
			
 
				+
			
 
				+        true
			
 
				+      end
			
 
				+
			
 
				+      def href_starts_with_protocol?(href_without_spaces)
			
 
				+        CGI
			
 
				+          .unescape(href_without_spaces)
			
 
				+          .utf8_encode(fallback: :read_as_sanitized_binary)
			
 
				+          .gsub(%r{[[:space:]]}, '')
			
 
				+          .downcase
			
 
				+          .start_with?('http', 'ftp', '//')
			
 
				+      end
			
 
				+
			
 
				+      def href_set_values(node, value)
			
 
				+        node.set_attribute('href', value)
			
 
				+        node.set_attribute('rel', 'nofollow noreferrer noopener')
			
 
				+
			
 
				+        # do not "target=_blank" WebApp URLs (e.g. mentions)
			
 
				+        return if value.downcase.start_with?(web_app_url_prefix)
			
 
				+
			
 
				+        node.set_attribute('target', '_blank')
			
 
				+      end
			
 
				+
			
 
				+      def node_urls(node)
			
 
				+        return if !node.is_a?(Nokogiri::XML::Text)
			
 
				+        return if node.content.blank?
			
 
				+        return if node.content.exclude?(':')
			
 
				+        return if node.ancestors.map(&:name).intersection(%w[a pre]).any?
			
 
				+
			
 
				+        URI.extract(node.content, LINKABLE_URL_SCHEMES)
			
 
				+          .map { |u| u.sub(%r{[,.]$}, '') } # URI::extract captures trailing dots/commas
			
 
				+          .grep_v(%r{^[^:]+:$}) # URI::extract will match, e.g., 'tel:'
			
 
				+      end
			
 
				+
			
 
				+      def ensure_href_present(node)
			
 
				+        return if node.name != 'a'
			
 
				+        return if node['href'].present?
			
 
				+
			
 
				+        node.replace node.children.to_s
			
 
				+
			
 
				+        true
			
 
				+      end
			
 
				+
			
 
				+      def update_node_title(node)
			
 
				+        return if node.name != 'a'
			
 
				+        return if url_same?(node['href'], node.text)
			
 
				+        return if node['title'].present?
			
 
				+
			
 
				+        node['title'] = node['href']
			
 
				+      end
			
 
				+
			
 
				+      def add_link(content, urls, node)
			
 
				+        return if add_link_blank_text(content, urls, node)
			
 
				+
			
 
				+        url = urls.shift
			
 
				+
			
 
				+        return if content !~ %r{^(.*)#{Regexp.quote(url)}(.*)$}mx
			
 
				+
			
 
				+        pre  = $1
			
 
				+        post = $2
			
 
				+
			
 
				+        a_elem = add_link_build_node(node, url)
			
 
				+
			
 
				+        if node.class != Nokogiri::XML::Text
			
 
				+          text = Nokogiri::XML::Text.new(pre, node.document)
			
 
				+          node.add_next_sibling(text).add_next_sibling(a_elem)
			
 
				+          return if post.blank?
			
 
				+
			
 
				+          add_link(post, urls, a_elem)
			
 
				+          return
			
 
				+        end
			
 
				+
			
 
				+        add_link_apply_to_node(node, pre, a_elem)
			
 
				+        return if post.blank?
			
 
				+
			
 
				+        add_link(post, urls, a_elem)
			
 
				+      end
			
 
				+
			
 
				+      def add_link_apply_to_node(node, pre, a_elem)
			
 
				+        node.content = pre
			
 
				+        node.add_next_sibling(a_elem)
			
 
				+      end
			
 
				+
			
 
				+      def add_link_blank_text(content, urls, node)
			
 
				+        return false if urls.present?
			
 
				+
			
 
				+        text = Nokogiri::XML::Text.new(content, node.document)
			
 
				+        node.add_next_sibling(text)
			
 
				+
			
 
				+        true
			
 
				+      end
			
 
				+
			
 
				+      def add_link_build_node(node, url)
			
 
				+        if url.match?(%r{^www}i)
			
 
				+          url = "http://#{url}"
			
 
				+        end
			
 
				+
			
 
				+        a = Nokogiri::XML::Node.new 'a', node.document
			
 
				+        a['href'] = url
			
 
				+        a['rel'] = 'nofollow noreferrer noopener'
			
 
				+        a['target'] = '_blank'
			
 
				+        a.content = url
			
 
				+
			
 
				+        a
			
 
				+      end
			
 
				+
			
 
				+      def cleanup_target(string, **options)
			
 
				+        cleaned_string = string.utf8_encode(fallback: :read_as_sanitized_binary)
			
 
				+        cleaned_string = cleaned_string.gsub(%r{[[:space:]]}, '') if !options[:keep_spaces]
			
 
				+        cleaned_string = cleaned_string.strip
			
 
				+                                       .delete("\t\n\r\u0000")
			
 
				+                                       .gsub(%r{/\*.*?\*/}, '')
			
 
				+                                       .gsub(%r{<!--.*?-->}, '')
			
 
				+
			
 
				+        sanitize_attachment_disposition(cleaned_string)
			
 
				+      end
			
 
				+
			
 
				+      def sanitize_attachment_disposition(url)
			
 
				+        @fqdn ||= Setting.get('fqdn')
			
 
				+        uri = URI(url)
			
 
				+
			
 
				+        if uri.host == @fqdn && uri.query.present?
			
 
				+          params = CGI.parse(uri.query || '')
			
 
				+                      .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
			
 
				+          uri.query = URI.encode_www_form(params)
			
 
				+        end
			
 
				+
			
 
				+        uri.to_s
			
 
				+      rescue
			
 
				+        url
			
 
				+      end
			
 
				+
			
 
				+      def url_same?(url_new, url_old)
			
 
				+        url_new = url_same_build(url_new)
			
 
				+        url_old = url_same_build(url_old)
			
 
				+
			
 
				+        return true if url_new == url_old
			
 
				+        return true if url_old == "http://#{url_new}"
			
 
				+        return true if url_new == "http://#{url_old}"
			
 
				+        return true if url_old == "https://#{url_new}"
			
 
				+        return true if url_new == "https://#{url_old}"
			
 
				+
			
 
				+        false
			
 
				+      end
			
 
				+
			
 
				+      def url_same_build(input)
			
 
				+        url = CGI
			
 
				+          .unescape(input.to_s)
			
 
				+          .utf8_encode(fallback: :read_as_sanitized_binary)
			
 
				+          .downcase
			
 
				+          .delete_suffix('/')
			
 
				+          .gsub(%r{[[:space:]]|\t|\n|\r}, '')
			
 
				+          .strip
			
 
				+
			
 
				+        html_decode(url)
			
 
				+          .sub('/?', '?')
			
 
				+      end
			
 
				+    end
			
 
				+  end
			
 
				+end