# Copyright (C) 2012-2025 Zammad Foundation, https://zammad-foundation.org/ require 'rails_helper' RSpec.describe HtmlSanitizer, :aggregate_failures do describe '.replace_inline_images' do let(:body) { described_class.replace_inline_images(html).first } let(:inline_attachments) { described_class.replace_inline_images(html).last } context 'when called for image at absolute path' do let(:html) { 'abc' } it 'keeps src attr as-is' do expect(body).to match(%r{abc}) end it 'extracts no attachments' do expect(inline_attachments).to be_empty end end context 'when called for base64-encoded inline images' do context 'with src attr last' do let(:html) { '' } it 'converts embedded image to cid' do expect(body).to match(%r{}) end it 'extracts one attachment' do expect(inline_attachments).to be_one end it 'sets filename to image1.jpeg' do expect(inline_attachments.first[:filename]).to eq('image1.jpeg') end it 'sets Content-Type to image/jpeg' do expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg') end it 'sets Content-ID based on Zammad fqdn' do expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}}) end it 'sets Content-Disposition to inline' do expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline') end end context 'with src attr first' do let(:html) { 'abc' } it 'converts embedded image to cid' do expect(body).to match(%r{abc}) end it 'extracts one attachment' do expect(inline_attachments).to be_one end it 'sets filename to image1.jpeg' do expect(inline_attachments.first[:filename]).to eq('image1.jpeg') end it 'sets Content-Type to image/jpeg' do expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg') end it 'sets Content-ID based on Zammad fqdn' do expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}}) end it 'sets Content-Disposition to inline' do expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline') end end context 'when followed by an incomplete/invalid HTML tag' do let(:html) { 'abc}) end it 'extracts one attachment' do expect(inline_attachments).to be_one end it 'sets filename to image1.jpeg' do expect(inline_attachments.first[:filename]).to eq('image1.jpeg') end it 'sets Content-Type to image/jpeg' do expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg') end it 'sets Content-ID based on Zammad fqdn' do expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}}) end it 'sets Content-Disposition to inline' do expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline') end end context 'when nested in a
, mixed with other HTML elements' do let(:html) { '

123

' } it 'converts embedded image to cid' do expect(body).to match(%r{
\s+

123

\s+\s+
}) end it 'extracts two attachments' do expect(inline_attachments.length).to be(2) end it 'sets filenames sequentially (as imageN.jpeg)' do expect(inline_attachments.first[:filename]).to eq('image1.jpeg') expect(inline_attachments.second[:filename]).to eq('image2.jpeg') end it 'sets Content-Types to image/jpeg' do expect(inline_attachments.first[:preferences]['Content-Type']).to eq('image/jpeg') expect(inline_attachments.second[:preferences]['Content-Type']).to eq('image/jpeg') end it 'sets Content-IDs based on Zammad fqdn' do expect(inline_attachments.first[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}}) expect(inline_attachments.second[:preferences]['Content-ID']).to match(%r{@#{Setting.get('fqdn')}}) end it 'sets Content-Dispositions to inline' do expect(inline_attachments.first[:preferences]['Content-Disposition']).to eq('inline') expect(inline_attachments.second[:preferences]['Content-Disposition']).to eq('inline') end end end context 'when processing pre elements' do let(:html) do '
apt-get update
Get:1 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Hit:2 http://de.archive.ubuntu.com/ubuntu focal InRelease
Building dependency tree...
' end it 'does not convert links' do expect(body).to eq(html) end end end describe '.dynamic_image_size' do context 'when called for image at absolute path' do context 'with src attr last' do it 'add max-width: 100% rule to style attr' do expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp)) HTML REGEX end end context 'with src attr first' do it 'add max-width: 100% rule to style attr' do expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp)) abc HTML abc REGEX end end end context 'when called for base64-encoded inline images' do context 'with src attr last' do it 'add max-width: 100% rule to style attr' do expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp)) abc HTML abc REGEX end end context 'with src attr first' do it 'add max-width: 100% rule to style attr' do expect(described_class.dynamic_image_size(<<~HTML.chomp)).to match(Regexp.new(<<~REGEX.chomp)) abc HTML abc REGEX end end end end # Issue #2416 - html_sanitizer goes into loop for specific content describe '.strict' do context 'with strings that take a long time (>10s) to parse' do before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) } it 'returns a timeout error message for the user' do expect(described_class.strict(+'', true)) .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG) end end context 'with href links that contain square brackets' do it 'correctly URL encodes them' do expect(described_class.strict(+'example', true)) .to eq('example') end end context 'with href links that contain http urls' do it 'correctly URL encodes them' do expect(described_class.strict(+'example', true)) .to eq('example') end end context 'when HTML sanitizer is removing attributes/styles which are white listed. #4605' do it 'does not remove whitelisted attributes width' do expect(described_class.strict('
123
')).to eq('
123
') end end context 'when handling tags' do let(:source) { '<title>some title

actual content

' } let(:target) { '

actual content

' } it 'removes them' do expect(described_class.strict(source)).to eq(target) end end end describe '.cleanup' do context 'with strings that take a long time (>10s) to parse' do before { allow(Timeout).to receive(:timeout).and_raise(Timeout::Error) } it 'returns a timeout error message for the user' do expect(described_class.cleanup(+'')) .to match(HtmlSanitizer::UNPROCESSABLE_HTML_MSG) end end end end