cleanup.rb 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # Copyright (C) 2012-2025 Zammad Foundation, https://zammad-foundation.org/
  2. class HtmlSanitizer
  3. module Scrubber
  4. class Cleanup < Base
  5. def scrub(node)
  6. return if !node.instance_of?(Nokogiri::XML::Text)
  7. return if %w[pre code].include? node.parent&.name
  8. update_node_content(node)
  9. end
  10. private
  11. def update_node_content(node)
  12. content = node.content
  13. return if !content
  14. content = remove_space_if_needed(content)
  15. content = strip_if_needed_previous(node, content)
  16. content = strip_if_needed_next(node, content)
  17. node.content = content
  18. end
  19. def remove_space_if_needed(content)
  20. return content if space_or_nl?(content)
  21. # https://github.com/zammad/zammad/issues/4223
  22. # We are converting multiple line breaks into a more readable format.
  23. # All other whitespace is treated as a single space character.
  24. content.gsub(%r{[[:space:]]+}) do |match|
  25. match.include?("\n\n") ? "\n\n" : ' '
  26. end
  27. end
  28. def strip_if_needed_previous(node, content)
  29. return content if !node.previous
  30. return content if !div_or_p?(node.previous)
  31. content.strip
  32. end
  33. def strip_if_needed_next(node, content)
  34. return content if !node.parent
  35. return content if node.previous
  36. return content if node.next && %w[div p br].exclude?(node.next.name)
  37. return content if !div_or_p?(node.parent)
  38. return content if space_or_nl?(content)
  39. content.strip
  40. end
  41. def space_or_nl?(string)
  42. [' ', "\n"].include?(string)
  43. end
  44. def div_or_p?(node)
  45. %w[div p].include? node.name
  46. end
  47. end
  48. end
  49. end