htmlCleanup.ts 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. // Copyright (C) 2012-2023 Zammad Foundation, https://zammad-foundation.org/
  2. import { wordFilter } from './wordFilter'
  3. const replaceWithContent = (parent: Element, selector: string) => {
  4. parent.querySelectorAll(selector).forEach((element) => {
  5. element.replaceWith(...Array.from(element.childNodes))
  6. })
  7. }
  8. const removeElements = (parent: Element, selector: string) => {
  9. parent.querySelectorAll(selector).forEach((element) => {
  10. element.remove()
  11. })
  12. }
  13. const removeComments = (parent: Node) => {
  14. if (!parent.hasChildNodes()) return
  15. parent.childNodes.forEach((node) => {
  16. if (node.nodeType === Node.COMMENT_NODE) {
  17. node.remove()
  18. }
  19. removeComments(node)
  20. })
  21. }
  22. const removeWordMarkup = (parent: Element) => {
  23. const html = parent.outerHTML
  24. const regexpTagsW = /<(\/w|w):[A-Za-z]/
  25. const regexpTagsO = /<(\/o|o):[A-Za-z]/
  26. const match = regexpTagsW.test(html) || regexpTagsO.test(html)
  27. if (match) return wordFilter(parent)
  28. return parent
  29. }
  30. export const htmlCleanup = (html: string, removeImages = false) => {
  31. let element = document.createElement('div') as Element
  32. element.innerHTML = html
  33. if (element.children.length === 1) {
  34. element = element.children.item(0) as Element
  35. }
  36. removeComments(element)
  37. removeWordMarkup(element)
  38. replaceWithContent(element, 'small, time, form, label')
  39. if (removeImages) {
  40. replaceWithContent(element, 'img')
  41. }
  42. removeElements(
  43. element,
  44. 'svg, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe, meta, link, title, head, fieldset',
  45. )
  46. // we don't need to remove attributes here, because the editor doesn't put unknown attributes on html elements,
  47. return element.innerHTML
  48. }