htmlCleanup.ts 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. // Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/
  2. import { wordFilter } from './wordFilter.ts'
  3. const replaceWithContent = (parent: Element, selector: string) => {
  4. parent.querySelectorAll(selector).forEach((element) => {
  5. element.replaceWith(...Array.from(element.childNodes))
  6. })
  7. }
  8. const removeElements = (parent: Element, selector: string) => {
  9. parent.querySelectorAll(selector).forEach((element) => {
  10. element.remove()
  11. })
  12. }
  13. const removeComments = (parent: Node) => {
  14. if (!parent.hasChildNodes()) return
  15. parent.childNodes.forEach((node) => {
  16. if (node.nodeType === Node.COMMENT_NODE) {
  17. node.remove()
  18. }
  19. removeComments(node)
  20. })
  21. }
  22. // editor always renders an additional line break, because prose mirror requires it
  23. // but if there is another line break, it will be rendered as two line breaks
  24. // this should remove a line break at the end of a paragraph, so editor can safely add "visual" one
  25. const removeTrailingLineBreaks = (parent: Element) => {
  26. parent.querySelectorAll('br').forEach((element) => {
  27. // keep paragraphs with just a line break, but convert them into <p> tags
  28. if (element.parentElement?.childNodes.length === 1) {
  29. if (element.parentElement.tagName !== 'DIV') {
  30. return
  31. }
  32. const p = document.createElement('p')
  33. for (const attr of element.parentElement.attributes) {
  34. p.setAttribute(attr.name, attr.value)
  35. }
  36. element.parentElement.replaceWith(p)
  37. return
  38. }
  39. const { nextSibling } = element
  40. if (
  41. // if <br> is the last element, remove it because editor will add one anyway
  42. !nextSibling ||
  43. // if next element is a block element, remove <br>, because it will be converted into a paragraph with a line break
  44. (nextSibling.nodeType !== Node.TEXT_NODE &&
  45. (nextSibling as Element).tagName !== 'BR') ||
  46. // if the next element is an empty text, remove <br>
  47. (nextSibling.nodeType === Node.TEXT_NODE &&
  48. !nextSibling.nextSibling &&
  49. nextSibling.textContent?.trim().length === 0)
  50. ) {
  51. element.remove()
  52. }
  53. })
  54. }
  55. const removeWordMarkup = (parent: Element) => {
  56. const html = parent.outerHTML
  57. const regexpTagsW = /<(\/w|w):[A-Za-z]/
  58. const regexpTagsO = /<(\/o|o):[A-Za-z]/
  59. const match = regexpTagsW.test(html) || regexpTagsO.test(html)
  60. if (match) return wordFilter(parent)
  61. return parent
  62. }
  63. export const htmlCleanup = (html: string, removeImages = false): string => {
  64. const element = document.createElement('div') as Element
  65. element.innerHTML = html
  66. removeComments(element)
  67. removeWordMarkup(element)
  68. replaceWithContent(element, 'small, time, form, label')
  69. if (removeImages) {
  70. replaceWithContent(element, 'img')
  71. }
  72. removeElements(
  73. element,
  74. 'svg, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe, meta, link, title, head, fieldset',
  75. )
  76. removeTrailingLineBreaks(element)
  77. // we don't need to remove attributes here, because the editor doesn't put unknown attributes on html elements
  78. // remove empty new lines, editor considers them actual new lines
  79. // and this will affect lists, where new line is a new list item
  80. return element.innerHTML.replace(/\n\s*</g, '<').replace(/>\n/g, '>')
  81. }