html_sanitizer_spec.rb 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. # Copyright (C) 2012-2022 Zammad Foundation, https://zammad-foundation.org/
  2. # frozen_string_literal: true
  3. require 'rails_helper'
  4. RSpec.describe HtmlSanitizer do
  5. describe '.replace_inline_images' do
  6. let(:body) { described_class.replace_inline_images(html).first }
  7. let(:inline_attachments) { described_class.replace_inline_images(html).last }
  8. context 'for image at absolute path' do
  9. let(:html) { '<img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">' }
  10. it 'keeps src attr as-is' do
  11. expect(body).to match(%r{<img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">})
  12. end
  13. it 'extracts no attachments' do
  14. expect(inline_attachments).to be_empty
  15. end
  16. end
  17. context 'for base64-encoded inline images' do
  18. context 'with src attr last' do
  19. let(:html) { '<img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">' }
  20. it 'converts embedded image to cid' do
  21. expect(body).to match(%r{<img style="width: 181px; height: 125px" src="cid:.+?">})
  22. end
  23. it 'extracts one attachment' do
  24. expect(inline_attachments).to be_one
  25. end
  26. it 'sets filename to image1.jpeg' do
  27. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  28. end
  29. it 'sets Content-Type to image/jpeg' do
  30. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  31. end
  32. it 'sets Content-ID based on Zammad fqdn' do
  33. expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}})
  34. end
  35. it 'sets Content-Disposition to inline' do
  36. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  37. end
  38. end
  39. context 'with src attr first' do
  40. let(:html) { '<img src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/..." style="width: 181px; height: 125px" alt="abc">' }
  41. it 'converts embedded image to cid' do
  42. expect(body).to match(%r{<img src="cid:.+?" style="width: 181px; height: 125px" alt="abc">})
  43. end
  44. it 'extracts one attachment' do
  45. expect(inline_attachments).to be_one
  46. end
  47. it 'sets filename to image1.jpeg' do
  48. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  49. end
  50. it 'sets Content-Type to image/jpeg' do
  51. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  52. end
  53. it 'sets Content-ID based on Zammad fqdn' do
  54. expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}})
  55. end
  56. it 'sets Content-Disposition to inline' do
  57. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  58. end
  59. end
  60. context 'followed by an incomplete/invalid HTML tag' do
  61. let(:html) { '<img src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/..." style="width: 181px; height: 125px" alt="abc"><invalid what ever' }
  62. it 'converts embedded image to cid' do
  63. expect(body).to match(%r{<img src="cid:.+?" style="width: 181px; height: 125px" alt="abc">})
  64. end
  65. it 'extracts one attachment' do
  66. expect(inline_attachments).to be_one
  67. end
  68. it 'sets filename to image1.jpeg' do
  69. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  70. end
  71. it 'sets Content-Type to image/jpeg' do
  72. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  73. end
  74. it 'sets Content-ID based on Zammad fqdn' do
  75. expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}})
  76. end
  77. it 'sets Content-Disposition to inline' do
  78. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  79. end
  80. end
  81. context 'nested in a <div>, mixed with other HTML elements' do
  82. let(:html) { '<div><img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/..."><p>123</p><img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/..."></div>' }
  83. it 'converts embedded image to cid' do
  84. expect(body).to match(%r{<div>\s+<img style="width: 181px; height: 125px" src="cid:.+?"><p>123</p>\s+<img style="width: 181px; height: 125px" src="cid:.+?">\s+</div>})
  85. end
  86. it 'extracts two attachments' do
  87. expect(inline_attachments.length).to be(2)
  88. end
  89. it 'sets filenames sequentially (as imageN.jpeg)' do
  90. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  91. expect(inline_attachments.second[:filename]).to eq('image2.jpeg')
  92. end
  93. it 'sets Content-Types to image/jpeg' do
  94. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  95. expect(inline_attachments.second[:preferences]['Content-Type']).to eq('image/jpeg')
  96. end
  97. it 'sets Content-IDs based on Zammad fqdn' do
  98. expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}})
  99. expect(inline_attachments.second[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}})
  100. end
  101. it 'sets Content-Dispositions to inline' do
  102. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  103. expect(inline_attachments.second[:preferences]['Content-Disposition']).to eq('inline')
  104. end
  105. end
  106. end
  107. context 'correctly processing of pre elements' do
  108. let(:html) do
  109. '<pre><code>apt-get update
  110. Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
  111. Hit:2 http://de.archive.ubuntu.com/ubuntu focal InRelease
  112. Building dependency tree...</code></pre>'
  113. end
  114. it 'does not convert links' do
  115. expect(body).to eq(html)
  116. end
  117. end
  118. end
  119. describe '.dynamic_image_size' do
  120. context 'for image at absolute path' do
  121. context 'with src attr last' do
  122. it 'add max-width: 100% rule to style attr' do
  123. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  124. <img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  125. HTML
  126. <img style="max-width:100%;width: 181px;max-height: 125px;" src="data:image.+?">
  127. REGEX
  128. end
  129. end
  130. context 'with src attr first' do
  131. it 'add max-width: 100% rule to style attr' do
  132. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  133. <img src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/..." style="width: 181px; height: 125px" alt="abc">
  134. HTML
  135. <img src="data:image.+?" style="max-width:100%;width: 181px;max-height: 125px;" alt="abc">
  136. REGEX
  137. end
  138. end
  139. end
  140. context 'for base64-encoded inline images' do
  141. context 'with src attr last' do
  142. it 'add max-width: 100% rule to style attr' do
  143. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  144. <img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">
  145. HTML
  146. <img src="/some_one.png" style="max-width:100%;width: 181px;max-height: 125px;" alt="abc">
  147. REGEX
  148. end
  149. end
  150. context 'with src attr first' do
  151. it 'add max-width: 100% rule to style attr' do
  152. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  153. <img src="/some_one.png" alt="abc">
  154. HTML
  155. <img src="/some_one.png" alt="abc" style="max-width:100%;">
  156. REGEX
  157. end
  158. end
  159. end
  160. end
  161. # Issue #2416 - html_sanitizer goes into loop for specific content
  162. describe '.strict' do
  163. context 'with strings that take a long time (>10s) to parse' do
  164. before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) }
  165. it 'returns a timeout error message for the user' do
  166. expect(described_class.strict(+'<img src="/some_one.png">', true))
  167. .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG)
  168. end
  169. end
  170. context 'with href links that contain square brackets' do
  171. it 'correctly URL encodes them' do
  172. expect(described_class.strict(+'<a href="https://example.com/?foo=bar&baz[x]=y">example</a>', true))
  173. .to eq('<a href="https://example.com/?foo=bar&amp;baz%5Bx%5D=y" rel="nofollow noreferrer noopener" target="_blank" title="https://example.com/?foo=bar&amp;baz%5Bx%5D=y">example</a>')
  174. end
  175. end
  176. context 'with href links that contain http urls' do
  177. it 'correctly URL encodes them' do
  178. expect(described_class.strict(+'<a href="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123">example</a>', true))
  179. .to eq('<a href="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123" rel="nofollow noreferrer noopener" target="_blank" title="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123">example</a>')
  180. end
  181. end
  182. end
  183. describe '.cleanup' do
  184. context 'with strings that take a long time (>10s) to parse' do
  185. before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) }
  186. it 'returns a timeout error message for the user' do
  187. expect(described_class.cleanup(+'<img src="/some_one.png">'))
  188. .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG)
  189. end
  190. end
  191. end
  192. end