html_sanitizer_spec.rb 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. RSpec.describe HtmlSanitizer do
  4. describe '.replace_inline_images' do
  5. let(:body) { described_class.replace_inline_images(html).first }
  6. let(:inline_attachments) { described_class.replace_inline_images(html).last }
  7. context 'for image at absolute path' do
  8. let(:html) { '<img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">' }
  9. it 'keeps src attr as-is' do
  10. expect(body).to match(%r{<img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">})
  11. end
  12. it 'extracts no attachments' do
  13. expect(inline_attachments).to be_empty
  14. end
  15. end
  16. context 'for base64-encoded inline images' do
  17. context 'with src attr last' do
  18. let(:html) { '<img style="width: 181px; height: 125px" src="...">' }
  19. it 'converts embedded image to cid' do
  20. expect(body).to match(/<img style="width: 181px; height: 125px" src="cid:.+?">/)
  21. end
  22. it 'extracts one attachment' do
  23. expect(inline_attachments).to be_one
  24. end
  25. it 'sets filename to image1.jpeg' do
  26. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  27. end
  28. it 'sets Content-Type to image/jpeg' do
  29. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  30. end
  31. it 'sets Content-ID based on Zammad fqdn' do
  32. expect(inline_attachments.first[:preferences]['Content-ID']).to match(/@#{Setting.get('fqdn')}/)
  33. end
  34. it 'sets Content-Disposition to inline' do
  35. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  36. end
  37. end
  38. context 'with src attr first' do
  39. let(:html) { '<img src="..." style="width: 181px; height: 125px" alt="abc">' }
  40. it 'converts embedded image to cid' do
  41. expect(body).to match(/<img src="cid:.+?" style="width: 181px; height: 125px" alt="abc">/)
  42. end
  43. it 'extracts one attachment' do
  44. expect(inline_attachments).to be_one
  45. end
  46. it 'sets filename to image1.jpeg' do
  47. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  48. end
  49. it 'sets Content-Type to image/jpeg' do
  50. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  51. end
  52. it 'sets Content-ID based on Zammad fqdn' do
  53. expect(inline_attachments.first[:preferences]['Content-ID']).to match(/@#{Setting.get('fqdn')}/)
  54. end
  55. it 'sets Content-Disposition to inline' do
  56. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  57. end
  58. end
  59. context 'followed by an incomplete/invalid HTML tag' do
  60. let(:html) { '<img src="..." style="width: 181px; height: 125px" alt="abc"><invalid what ever' }
  61. it 'converts embedded image to cid' do
  62. expect(body).to match(/<img src="cid:.+?" style="width: 181px; height: 125px" alt="abc">/)
  63. end
  64. it 'extracts one attachment' do
  65. expect(inline_attachments).to be_one
  66. end
  67. it 'sets filename to image1.jpeg' do
  68. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  69. end
  70. it 'sets Content-Type to image/jpeg' do
  71. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  72. end
  73. it 'sets Content-ID based on Zammad fqdn' do
  74. expect(inline_attachments.first[:preferences]['Content-ID']).to match(/@#{Setting.get('fqdn')}/)
  75. end
  76. it 'sets Content-Disposition to inline' do
  77. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  78. end
  79. end
  80. context 'nested in a <div>, mixed with other HTML elements' do
  81. let(:html) { '<div><img style="width: 181px; height: 125px" src="..."><p>123</p><img style="width: 181px; height: 125px" src="..."></div>' }
  82. it 'converts embedded image to cid' do
  83. expect(body).to match(%r{<div>\s+<img style="width: 181px; height: 125px" src="cid:.+?"><p>123</p>\s+<img style="width: 181px; height: 125px" src="cid:.+?">\s+</div>})
  84. end
  85. it 'extracts two attachments' do
  86. expect(inline_attachments.length).to be(2)
  87. end
  88. it 'sets filenames sequentially (as imageN.jpeg)' do
  89. expect(inline_attachments.first[:filename]).to eq('image1.jpeg')
  90. expect(inline_attachments.second[:filename]).to eq('image2.jpeg')
  91. end
  92. it 'sets Content-Types to image/jpeg' do
  93. expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg')
  94. expect(inline_attachments.second[:preferences]['Content-Type']).to eq('image/jpeg')
  95. end
  96. it 'sets Content-IDs based on Zammad fqdn' do
  97. expect(inline_attachments.first[:preferences]['Content-ID']).to match(/@#{Setting.get('fqdn')}/)
  98. expect(inline_attachments.second[:preferences]['Content-ID']).to match(/@#{Setting.get('fqdn')}/)
  99. end
  100. it 'sets Content-Dispositions to inline' do
  101. expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline')
  102. expect(inline_attachments.second[:preferences]['Content-Disposition']).to eq('inline')
  103. end
  104. end
  105. end
  106. end
  107. describe '.dynamic_image_size' do
  108. context 'for image at absolute path' do
  109. context 'with src attr last' do
  110. it 'add max-width: 100% rule to style attr' do
  111. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  112. <img style="width: 181px; height: 125px" src="...">
  113. HTML
  114. <img style="max-width:100%;width: 181px;max-height: 125px;" src="data:image.+?">
  115. REGEX
  116. end
  117. end
  118. context 'with src attr first' do
  119. it 'add max-width: 100% rule to style attr' do
  120. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  121. <img src="..." style="width: 181px; height: 125px" alt="abc">
  122. HTML
  123. <img src="data:image.+?" style="max-width:100%;width: 181px;max-height: 125px;" alt="abc">
  124. REGEX
  125. end
  126. end
  127. end
  128. context 'for base64-encoded inline images' do
  129. context 'with src attr last' do
  130. it 'add max-width: 100% rule to style attr' do
  131. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  132. <img src="/some_one.png" style="width: 181px; height: 125px" alt="abc">
  133. HTML
  134. <img src="/some_one.png" style="max-width:100%;width: 181px;max-height: 125px;" alt="abc">
  135. REGEX
  136. end
  137. end
  138. context 'with src attr first' do
  139. it 'add max-width: 100% rule to style attr' do
  140. expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp))
  141. <img src="/some_one.png" alt="abc">
  142. HTML
  143. <img src="/some_one.png" alt="abc" style="max-width:100%;">
  144. REGEX
  145. end
  146. end
  147. end
  148. end
  149. # Issue #2416 - html_sanitizer goes into loop for specific content
  150. describe '.strict' do
  151. context 'with strings that take a long time (>10s) to parse' do
  152. before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) }
  153. it 'returns a timeout error message for the user' do
  154. expect(described_class.strict(+'<img src="/some_one.png">', true))
  155. .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG)
  156. end
  157. end
  158. context 'with href links that contain square brackets' do
  159. it 'correctly URL encodes them' do
  160. expect(described_class.strict(+'<a href="https://example.com/?foo=bar&baz[x]=y">example</a>', true))
  161. .to eq('<a href="https://example.com/?foo=bar&amp;baz%5Bx%5D=y" rel="nofollow noreferrer noopener" target="_blank" title="https://example.com/?foo=bar&amp;baz%5Bx%5D=y">example</a>')
  162. end
  163. end
  164. context 'with href links that contain http urls' do
  165. it 'correctly URL encodes them' do
  166. expect(described_class.strict(+'<a href="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123">example</a>', true))
  167. .to eq('<a href="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123" rel="nofollow noreferrer noopener" target="_blank" title="https://example.com/?foo=https%3A%2F%2Fexample.com%3Flala%3A123">example</a>')
  168. end
  169. end
  170. end
  171. describe '.cleanup' do
  172. context 'with strings that take a long time (>10s) to parse' do
  173. before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) }
  174. it 'returns a timeout error message for the user' do
  175. expect(described_class.cleanup(+'<img src="/some_one.png">'))
  176. .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG)
  177. end
  178. end
  179. end
  180. end