# Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/ # frozen_string_literal: true require 'rails_helper' RSpec.describe String do describe '#strip' do context 'default behavior' do it 'removes leading/trailing spaces' do expect(' test '.strip).to eq('test') end it 'removes trailing newlines' do expect("test\n".strip).to eq('test') end it 'does not remove internal spaces / newlines' do expect("test \n test".strip).to eq("test \n test") end end context 'monkey-patched behavior' do it 'removes leading/trailing zero-width spaces, but not internal ones' do expect(" \r\n test \u{200B} \n test\u{200B} \u{200B}".strip) .to eq("test \u{200B} \n test") end it 'does not break on non-unicode strings' do expect(described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT').strip) .to eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT')) end end end describe '#strip!' do context 'default behavior' do it 'removes leading/trailing spaces (in place)' do str = +' test ' expect(str.strip!).to be(str).and eq('test') end it 'removes trailing newlines (in place)' do str = +"test\n" expect(str.strip!).to be(str).and eq('test') end it 'does not remove internal spaces / newlines (in place)' do str = +"test \n test " expect(str.strip!).to be(str).and eq(str) end end context 'monkey-patched behavior' do it 'removes leading/trailing zero-width spaces, but not internal ones (in place)' do str = +" \r\n test \u{200B} \n test\u{200B} \u{200B}" expect(str.strip!).to be(str).and eq("test \u{200B} \n test") end it 'does not break on invalid-unicode strings (in place)' do str = described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT') expect(str.strip!) .to be(str).and eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT')) end end end describe '#to_filename' do it 'does not modify strings in place' do %w[test Some::File].each do |str| expect { str.to_filename }.not_to change { str } end end it 'leaves all-downcase strings as-is' do expect('test'.to_filename).to eq('test') end it 'converts camelcase Ruby constant paths to snakecase file paths' do expect('Some::File'.to_filename).to eq('some/file') end end describe '#to_classname' do it 'does not modify strings in place' do %w[test some/file].each do |str| expect { str.to_classname }.not_to change { str } end end it 'capitalizes all-downcase strings' do expect('test'.to_classname).to eq('Test') end it 'converts snakecase file paths to camelcase Ruby constant paths' do expect('some/file'.to_classname).to eq('Some::File') end context 'unlike ActiveSupport’s #classify' do it 'preserves pluralized names' do expect('some/files'.to_classname).to eq('Some::Files') expect('some_test/files'.to_classname).to eq('SomeTest::Files') end end end describe '#html2text' do it 'does not modify strings in place' do %w[test
test
].each do |str| expect { str.html2text }.not_to change { str } end end it 'leaves human-readable text as-is' do expect('test'.html2text).to eq('test') end it 'strips leading/trailing spaces' do expect(' test '.html2text).to eq('test') end it 'also strips leading/trailing newlines' do expect("\n\n test \n\n\n".html2text).to eq('test') end it 'strips HTML tags around text content' do expect('
test
'.html2text).to eq('test') end it 'strips trailing
inside last
' do expect('
test
'.html2text).to eq('test') end it 'strips trailing
and newlines inside last
' do expect("
test


\n
\n
\n
".html2text).to eq('test') end it 'strips trailing
, newlines, and spaces inside last
' do expect("
test


\n
\n
\n
".html2text).to eq('test') end it 'strips trailing
, newlines, and   inside last
' do expect("
test

 
 \n
 \n
 \n
".html2text).to eq('test') end it 'strips trailing whitespace (including   &
) both inside and after last tag' do expect("
test

 
 \n
 \n
 \n
 ".html2text).to eq('test') end it 'also strips nested HTML tags' do expect("

Was\nsoll verbessert werden:

".html2text) .to eq('Was soll verbessert werden:') end it 'in
 elements, collapses multiple newlines into one' do
      expect("
test\n\ntest
".html2text).to eq("test\ntest") end it 'in elements, collapses multiple newlines into one' do expect("test\n\ntest".html2text).to eq("test\ntest") end it 'converts cells and row to space-separated lines' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
testcol
test4711
HTML test col test 4711 TEXT end it 'strips HTML comments' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
test




HTML test TEXT end it 'converts elements to plain text with numerical references' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
Best Tool of the World some other text
HTML [1] Best Tool of the Worldsome other text [1] https://zammad.org TEXT end it 'converts
elements to separate paragraphs containing only "___"' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
test




HTML test ___ TEXT end it 'converts
elements to newlines (max. 2)' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) test


--
abc
HTML test -- abc TEXT end it 'strips Microsoft Outlook conditional comments' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) Ihr RZ-Team

HTML Ihr RZ-Team TEXT end it 'strips elements' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) Neues Fax von 1234-93900 HTML TEXT end it 'handles sample input 11' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
Dear Bob:Mr/Mrs

We are one of the leading manufacturer and supplier of conduits and cars since 3000.

Could you inform me the specification you need?

May I sent you our products catalogues for your reference?

Best regards!

Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.
Bob Smith
Exp. & Imp.
Town Example Electric Co., Ltd.
Tel: 0000-11-12345678 (Ext-220)  Fax: 0000-11-12345678 
Room1234, NO. 638, Smith Road, Town, 200000, Somewhere
Web: www.example.com
HTML
\n
Dear Bob:Mr/Mrs
 
We are one of the leading manufacturer and supplier of conduits and cars since 3000.
 
Could you inform me the specification you need?
 
May I sent you our products catalogues for your reference?
 
Best regards!
 
Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.
\n
Bob Smith
\n
Exp. & Imp.
Town Example Electric Co., Ltd.
Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678
Room1234, NO. 638, Smith Road, Town, 200000, Somewhere
Web: www.example.com
TEXT end it 'handles sample input 12' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  • Luxemburg
  • HTML
  • Luxemburg
  • TEXT end # https://github.com/zammad/zammad/issues/4112 it 'converts lists from MS Outlook correctly' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    ·         1

    ·         2

    HTML

    • 1

    • 2

    TEXT end # https://github.com/zammad/zammad/issues/4184 it 'deletes downlevel revealed conditional comments' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    dummy1 dummy2

    HTML

    dummy1 dummy2

    TEXT end end context 'signature recognition' do let(:marker) { '' } it 'does not trim trailing whitespace and keeps the signature block working' do expect('Hello

    --
    This is the signature'.html2html_strict.first).to eq("Hello
    #{marker}
    --
    This is the signature") end it 'places marker before "--" line (surrounded by
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) lalala
    --
    Max Mix HTML lalala#{marker}
    --
    Max Mix TEXT end it 'places marker before "--" line (surrounded by
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) lalala
    --
    Max Mix HTML lalala#{marker}
    --
    Max Mix TEXT end it 'places marker before "--" line (preceded by
    \n)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) lalala
    --
    Max Mix HTML lalala#{marker}
    --
    Max Mix TEXT end it 'places marker before "--" line (surrounded by

    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) lalala

    --

    Max Mix HTML lalala#{marker}

    --

    Max Mix TEXT end it 'places marker before "__" line (surrounded by
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) lalala
    __
    Max Mix HTML lalala#{marker}
    __
    Max Mix TEXT end it 'places marker before quoted reply’s "Von:" header (in German)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) den.

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    Hello,

    ich versuche an den Punkten HTML den.
    #{marker}
    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    Hello,

    ich versuche an den Punkten TEXT end it 'places marker before quoted reply’s "Von:" header (as

    with stripped parent

    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Von: Martin Edenhofer via Zammad Helpdesk [mailto:support@zammad.com]
    Gesendet:\u0020 HTML

    #{marker}

    Von: Martin Edenhofer via Zammad Helpdesk [mailto:support@zammad.com]
    Gesendet:

    TEXT end it 'places marker before quoted reply’s "Von:" header (as

    with parent

    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Von: Johanna Kiefer via Zammad Projects <projects@example.com>
    Organisation: Zammad GmbH
    Datum: Montag, 6. März 2017 um 13:32
    HTML

    #{marker}

    Von: Johanna Kiefer via Zammad Projects <projects@example.com>
    Organisation: Zammad GmbH
    Datum: Montag, 6. März 2017 um 13:32

    TEXT end it 'places marker before quoted reply’s "Von:" header (as
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)



    Von:        Hotel <info@example.com>
    An:        
    HTML #{marker}

    Von: Hotel <info@example.com>
    An:
    TEXT end it 'places marker before English quoted text intro (as
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    On 04 Mar 2017, at 14:47, Oliver Ruhm <oliver@example.com> wrote:

    HTML
    #{marker}
    On 04 Mar 2017, at 14:47, Oliver Ruhm <oliver@example.com> wrote:

    TEXT end it 'does not place marker if blockquote doesn’t contain a quoted text intro' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    some note

    HTML
    some note

    TEXT end it 'does not place marker if quoted text intro isn’t followed by a
    ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk <support@example.com>:

    HTML

    Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk <support@example.com>:

    TEXT end it 'places marker before German quoted text intro (before
    )' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk <support@example.com>:

    Dear Mr. Smith,
    HTML #{marker}

    Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk <support@example.com>:

    Dear Mr. Smith,
    TEXT end end end describe '#signature_identify' do let(:marker) { '######SIGNATURE_MARKER######' } context 'with no signature present' do it 'leaves string as-is' do expect((+'foo').signature_identify('text', true)).to eq('foo') end end context 'with signature present' do it 'places marker at start of "--" line' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) foo -- bar SRC foo #{marker}-- bar MARKED end it 'places marker before English quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) On 01/04/15 10:55, Bob Smith wrote: SRC #{marker}On 01/04/15 10:55, Bob Smith wrote: MARKED end it 'places marker before German quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) Am 03.04.2015 um 20:58 schrieb Martin Edenhofer : SRC #{marker}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer : MARKED end it 'ignores trailing empty line' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123 test 123 -- Bob Smith SRC test 123 test 123 #{marker}-- Bob Smith MARKED end it 'ignores trailing double empty lines' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123 test 123 -- Bob Smith SRC test 123 test 123 #{marker}-- Bob Smith MARKED end it 'ignores leading/trailing empty lines' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 1 2 3 4 5 6 7 8 9 -- Bob Smith SRC test 123\u0020 1 2 3 4 5 6 7 8 9 #{marker}-- Bob Smith MARKED end it 'ignores lines starting with "--" but containing more text' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- -- Bob Smith SRC test 123\u0020 --no not match-- #{marker}-- Bob Smith MARKED end it 'places marker at start of " -- " line' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- --\u0020 Bob Smith SRC test 123\u0020 --no not match-- #{marker} --\u0020 Bob Smith MARKED end it 'places marker on empty line if possible / only places one marker' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 -- Bob Smith -- Bob Smith SRC test 123\u0020 #{marker} -- Bob Smith -- Bob Smith MARKED end context 'for Apple email quote text' do context 'in English' do it 'places two markers, one before quoted text intro and one at start of "--" line' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- Bob Smith On 01/04/15 10:55, Bob Smith wrote: lalala -- some test SRC test 123\u0020 --no not match-- Bob Smith #{marker}On 01/04/15 10:55, Bob Smith wrote: lalala #{marker}-- some test MARKED end end context 'auf Deutsch' do it 'places marker before quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- Bob Smith Am 03.04.2015 um 20:58 schrieb Bob Smith : lalala SRC test 123\u0020 --no not match-- Bob Smith #{marker}Am 03.04.2015 um 20:58 schrieb Bob Smith : lalala MARKED end end end context 'for MS email quote text' do context 'in English' do it 'places marker before quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123test 123\u0020 --no not match-- Bob Smith From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Sent: Donnerstag, 2. April 2015 10:00 lalala
    SRC test 123test 123\u0020 --no not match-- Bob Smith #{marker}From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Sent: Donnerstag, 2. April 2015 10:00 lalala
    MARKED end end context 'auf Deutsch' do it 'places marker before quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- Bob Smith Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Gesendet: Donnerstag, 2. April 2015 10:00 Betreff: lalala SRC test 123\u0020 --no not match-- Bob Smith #{marker}Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Gesendet: Donnerstag, 2. April 2015 10:00 Betreff: lalala MARKED end end context 'en francais' do it 'places marker before quoted text intro' do expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp) test 123\u0020 --no not match-- Bob Smith De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Envoyé : mercredi 29 avril 2015 17:31 Objet : lalala SRC test 123\u0020 --no not match-- Bob Smith #{marker}De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc] Envoyé : mercredi 29 avril 2015 17:31 Objet : lalala MARKED end end end end end describe '#utf8_encode' do context 'on valid, UTF-8-encoded strings' do subject(:string) { 'hello' } it 'returns an identical copy' do expect(string.utf8_encode).to eq(string) expect(string.utf8_encode.encoding).to be(string.encoding) expect(string.utf8_encode).not_to be(string) end context 'which are incorrectly set to other, technically valid encodings' do subject(:string) { described_class.new('ö', encoding: 'tis-620') } it 'sets input encoding to UTF-8 instead of attempting conversion' do expect(string.utf8_encode).to eq(string.dup.force_encoding('utf-8')) end end end context 'on strings in other encodings' do subject(:string) { original_string.encode(input_encoding) } context 'with no from: option' do let(:original_string) { 'Tschüss!' } let(:input_encoding) { Encoding::ISO_8859_2 } it 'detects the input encoding' do expect(string.utf8_encode).to eq(original_string) end end context 'with a valid from: option' do let(:original_string) { 'Tschüss!' } let(:input_encoding) { Encoding::ISO_8859_2 } it 'uses the specified input encoding' do expect(string.utf8_encode(from: 'iso-8859-2')).to eq(original_string) end it 'uses any valid input encoding, even if not correct' do expect(string.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!') end end context 'with an invalid from: option' do let(:original_string) { '―陈志' } let(:input_encoding) { Encoding::GB18030 } it 'does not try it' do expect { string.encode('utf-8', 'gb2312') } .to raise_error(Encoding::InvalidByteSequenceError) expect { string.utf8_encode(from: 'gb2312') } .not_to raise_error end it 'uses the detected input encoding instead' do expect(string.utf8_encode(from: 'gb2312')).to eq(original_string) end end end context 'performance' do subject(:string) { original_string.encode(input_encoding) } context 'with utf8_encode in iso-8859-1' do let(:original_string) { 'äöü0' * 999_999 } let(:input_encoding) { Encoding::ISO_8859_1 } it 'detects the input encoding' do Timeout.timeout(1) do expect(string.utf8_encode(from: 'iso-8859-1')).to eq(original_string) end end end context 'with utf8_encode in utf-8' do let(:original_string) { 'äöü0' * 999_999 } let(:input_encoding) { Encoding::UTF_8 } it 'detects the input encoding' do Timeout.timeout(1) do expect(string.utf8_encode(from: 'utf-8')).to eq(original_string) end end end context 'with utf8_encode in iso-8859-1 and charset detection' do let(:original_string) { 'äöü0' * 199_999 } let(:input_encoding) { Encoding::ISO_8859_1 } it 'detects the input encoding' do Timeout.timeout(18) do expect(string.utf8_encode(from: 'utf-8')).to eq(original_string) end end end end end end


    HTML Neues Fax von 1234-93900 Neues Fax Ihre Kundennummer: 12345678 TEXT end it 'converts characters written in HTML ampersand code' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) line 1
    you
    -----& HTML line\u00A01 you -----& TEXT end it 'converts
      to asterisk-demarcated list' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) \u0020
      • #1
      • #2
      HTML * #1 * #2 TEXT end it 'strips HTML frontmatter and element' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
      > Welcome!
      >
      > Thank you for installing Zammad.
      >
      HTML > Welcome! > > Thank you for installing Zammad. > TEXT end it 'strips

      some other content

      HTML some other content TEXT end it 'strips elements' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp) \u0020 IT-Infrastruktur
      HTML IT-Infrastruktur TEXT end it 'separates block-level elements by one newline (

      following a non-

      block gets two)' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)

      some head

      some content

      line 1

      line 2

      some text later

      HTML some head some content > line 1 > line 2 some text later TEXT end it 'formats
      contents with leading "> "' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)

      some head

      some content
      line 1
      line 2

      some text later

      HTML some head some content > line 1 > line 2 some text later TEXT end it 'adds max. 2 newlines between block-level
      contents' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)

      some head

      some content
      line 1

      line 2

      some text later HTML some head some content > line 1 > > line 2 some text later TEXT end it 'places numerical references at end of text string' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)

      Best regards,

      Your Team Team

      P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click here to unsubscribe from further e-mails.

      -----------------------------
      HTML Best regards, Your Team Team P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click [1] here to unsubscribe from further e-mails. ----------------------------- [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx TEXT end it 'handles elements with missing closing tags' do expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)

      Dave and leaned her days adam.
      Maybe we want any help me that.
      Next morning charlie saw at their father.
      Well as though adam took out here. Melvin will be more money.\u0020 Called him into this one last thing.
      Men-----------------------
      HTML Dave and leaned her days adam. Maybe we want any help me that. Next morning charlie saw at their father. Well as though adam took out here. Melvin will be more money. Called him into this one last thing. Men----------------------- TEXT end context 'html encoding' do it 'converts Ä in Ä' do expect('
      test something.Ä
      '.html2text) .to eq('test something.Ä') end it 'strips invalid html encoding chars' do expect('
      test something.�
      '.html2text) .to eq('test something.í ˝') end end context 'performance tests' do let(:filler) do %(#{%(

      some word some url and the end.

      \n) * 11}\n) end it 'converts a 1076-byte unicode file in under 2s' do expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error some title
      hello
      #{filler} HTML end it 'converts a 2.21 MiB unicode file in under 2s' do expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error some title
      hello
      #{filler * 2312} HTML end end end describe '#html2html_strict' do it 'leaves human-readable text as-is' do expect('test'.html2html_strict.first).to eq('test') end it 'strips leading/trailing spaces' do expect(' test '.html2html_strict.first).to eq('test') end it 'also strips leading/trailing newlines' do expect("\n\n test \n\n\n".html2html_strict.first).to eq('test') end it 'also strips leading
      ' do expect('

      abc
      '.html2html_strict.first).to eq('
      abc
      ') end it 'also strips trailing
      & spaces' do expect('
      abc


      '.html2html_strict.first).to eq('
      abc
      ') end it 'leaves as-is' do expect('test'.html2html_strict.first).to eq('test') end it 'downcases tag names' do expect('test'.html2html_strict.first).to eq('test') end it 'leaves as-is' do expect('test'.html2html_strict.first).to eq('test') end it 'leaves

      as-is' do expect('

      test

      '.html2html_strict.first).to eq('

      test

      ') end it 'leaves

      as-is' do expect('

      test

      '.html2html_strict.first).to eq('

      test

      ') end it 'leaves

      as-is' do expect('

      test

      '.html2html_strict.first).to eq('

      test

      ') end it 'leaves
       as-is' do
            expect("
      a\nb\nc
      ".html2html_strict.first).to eq("
      a\nb\nc
      ") end it 'leaves
       nested inside 
      as-is' do expect("
      a\nb\nc
      ".html2html_strict.first).to eq("
      a\nb\nc
      ") end it 'strips HTML comments' do expect('

      test

      '.html2html_strict.first).to eq('

      test

      ') end it 'strips / tags & elements' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
      Hello Martin,
      HTML
      Hello Martin,
      TEXT end it 'strips tags' do expect(''.html2html_strict.first).to eq('') end it 'keeps style with color in ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) Hello Martin, HTML Hello Martin, TEXT end it 'remove style=#ffffff with color in ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) Hello Martin, HTML Hello Martin, TEXT end it 'strips tags, id/class attrs, and (MS Office) tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      Guten Morgen, Frau Koppenhagen,

       

      vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?

       

      Nochmals vielen Dank und herzliche Grüße

      HTML

      Guten Morgen, Frau Koppenhagen,

      vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?

      Nochmals vielen Dank und herzliche Grüße

      TEXT end it 'strips tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      HTML

      TEXT end it 'strips extraneous whitespace from end of opening tag' do expect('test'.html2html_strict.first).to eq('test') end it 'strips extraneous whitespace from closing tag' do expect('test'.html2html_strict.first).to eq('test') end it 'does not detect < /b > as closing tag; converts chars and auto-closes tag' do expect('test< /b >'.html2html_strict.first).to eq('test< /b >') end it 'does not detect <\n/b> as closing tag; converts chars and auto-closes tag' do expect("test<\n/b>".html2html_strict.first).to eq('test< /b>') end it 'collapses multiple whitespace-only

      into one with  ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      HTML

       

      TEXT end it 'keeps lang attr on

      ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      HTML

      TEXT end it 'strips inside

      ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      Hello Martin,

      HTML

      Hello Martin,

      TEXT end it 'strips empty

      keep

      s with content' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

      123

      HTML

       

      123

      TEXT end it 'strips
      between

      ' do expect('

       



       

      '.html2html_strict.first).to eq('

       

       

      ') end it 'auto-adds missing closing brackets on tags, but not opening brackets' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) test< /b> HTML test< /b> TEXT end it 'auto-adds missing closing tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
      • test
      • test
      • < /ul> HTML
        • test
        • test
        • < /ul>
        TEXT end it 'auto-closes
        with missing closing tag; removes

        with missing opening tag' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders in
              Ihrer Lesezeichen-Symbolleiste zu ergänzen.

          HTML Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders in
        Ihrer Lesezeichen-Symbolleiste zu ergänzen.
        TEXT end it 'intelligently inserts missing & tags (and ignores misplaced
    Neues Fax
    Ihre Kundennummer: 12345678
    tags)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Franz Schäfer
    Manager Information Systems

    Telefon   +49 000 000 8565
    christian.schaefer@example.com

    HTML
    Franz Schäfer
    Manager Information Systems

    Telefon +49 000 000 8565
    christian.schaefer@example.com
    TEXT end it 'ignores invalid (misspelled) attrs' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) test HTML test TEXT end it 'strips incomplete CSS rules' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    HTML

    TEXT end context 'for whitespace-only
    ' do it 'preserves a single space' do expect('
    '.html2html_strict.first).to eq('
    ') end it 'converts a lone
    to  ' do expect('

    '.html2html_strict.first).to eq('
     
    ') end it 'converts three
    to one  ' do expect('



    '.html2html_strict.first).to eq('
     
    ') end it 'collapses two nested, whitespace-only
    into a single  ' do expect('
    '.html2html_strict.first).to eq('
     
    ') end it 'collapses three nested, whitespace-only
    into a single  ' do expect('
    '.html2html_strict.first).to eq('
     
    ') end it 'collapses 2+ nested, whitespace-only

    into \n

     

    ' do expect('

    '.html2html_strict.first).to eq("
    \n

     

    ") end end context 'for
    with content' do it 'also strips trailing/leading newlines inside
    ' do expect("
    \n\n\ntest\n\n\n
    ".html2html_strict.first).to eq('
    test
    ') end it 'also strips trailing/leading newlines & tabs inside
    ' do expect("
    \n\t\ntest\n\t\n
    ".html2html_strict.first).to eq('
    test
    ') end it 'also strips trailing/leading newlines & tabs inside
    , but not internal spaces' do expect("
    \n\t\ntest 123\n\t\n
    ".html2html_strict.first).to eq('
    test 123
    ') end it 'strips newlines from trailing whitespace; leaves up to two
    (with spaces) as-is' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Description



    HTML

    Description



    TEXT end it 'strips newlines from trailing whitespace; collapses 3+
    into two' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Description




    HTML

    Description



    TEXT end it 'removes unnecessary
    nesting' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Hello Martin,
    HTML
    Hello Martin,
    TEXT end it 'keeps innermost
    when removing nesting' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Hello Martin,
    HTML
    Hello Martin,
    TEXT end it 'keeps style with color in
    ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Hello Martin,
    HTML
    Hello Martin,
    TEXT end it 'remove style=#ffffff with color in
    ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Hello Martin,
    HTML
    Hello Martin,
    TEXT end it 'rearranges whitespace in nested
    ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Hello Martin,
    HTML
    Hello Martin,
    TEXT end it 'adds newline where
    starts or ends
    content' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    abc

    HTML

    abc

    TEXT end it 'leaves nested in
    as-is (?)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    abc
    HTML
    abc
    TEXT end it 'collapses multiple whitespace-only

    into one with  ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    HTML

     

    TEXT end it 'strips
    tags when they contain only

    ' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    lala

    Hello Martin,

    HTML
    lala

    Hello Martin,

    TEXT end end context 'link handling' do it 'adds rel & target attrs to tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) web.de HTML web.de TEXT end it 'removes id attrs' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) web.de HTML web.de TEXT end it 'removes class/id attrs' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://example.com HTML http://example.com TEXT end it 'downcases tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://example.com?a=1; HTML http://example.com?a=1; TEXT end it 'doesn’t downcase href attr or inner text' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://example.com/withSoMeUpper/And/downCase HTML http://example.com/withSoMeUpper/And/downCase TEXT end it 'automatically wraps tags around valid URLs' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    https://www.facebook.com/test
    HTML
    TEXT end it 'does not wrap URLs if leading https?:// is missing' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) some text www.example.com some other text HTML some text www.example.com some other text TEXT end it 'adds missing http:// to href attr (but not inner text)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) web www.example.com HTML web www.example.com TEXT end it 'includes URL parameters when wrapping URL in tag' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap

    HTML

    https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap

    TEXT end it 'does not rewrap valid URLs that already have tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://example.com HTML http://example.com TEXT end it 'recognizes URL parameters when matching href to inner text' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap

    HTML

    https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap

    TEXT end it 'recognizes
    as URL boundary' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    https://www.facebook.com/test
    HTML TEXT end it 'recognizes space as URL boundary' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) some text http://example.com some other text HTML some text http://example.com some other text TEXT end it 'wraps valid URLs from
    elements in tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    http://example.com
    HTML
    TEXT end it 'recognizes trailing dot as URL boundary' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    http://example.com.
    HTML TEXT end it 'does not add a leading newline if
    begins with non-URL text' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    lala http://example.com.
    HTML TEXT end it 'recognizes trailing comma as URL boundary' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    http://example.com, and so on
    HTML
    http://example.com, and so on
    TEXT end it 'recognizes trailing comma as URL boundary (immediately following URL parameters)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    http://example.com?lala=me, and so on
    HTML TEXT end it 'strips tags when no href is present' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) Hello Mr Smith, HTML Hello Mr Smith, TEXT end context 'when inner text is HTML elements' do it 'leaves elements as-is' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) HTML TEXT end it 'strips tags, but not content' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://facebook.de/examplesrbog HTML http://facebook.de/examplesrbog TEXT end it 'also strips surrounding and tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) web                        www.example.com HTML web www.example.com TEXT end end context 'when inner text and href do not match' do it 'adds title attr' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://what-different.example.com HTML http://what-different.example.com TEXT end it 'converts unsafe characters in href attr and title' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://what-different.example.com HTML http://what-different.example.com TEXT end it 'does not add title attr (for different capitalization)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://EXAMPLE.com HTML http://EXAMPLE.com TEXT end it 'does not add title attr (for URL-safe/unsafe characters)' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) http://example.com?abc=123&123=abc HTML http://example.com?abc=123&123=abc TEXT end end context 'for email links' do it 'strips tags' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) john.smith@example.com HTML john.smith@example.com TEXT end it 'strips tags (even with upcased "MAILTO:")' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) john.smith@example.com HTML john.smith@example.com TEXT end it 'extracts destination address when it differs from innertext' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) john.smith@example.com HTML john.smith@example.com TEXT end end end context 'for tags' do it 'removes color CSS rule from style attr' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) HTML TEXT end it 'converts width/height attrs to CSS rules' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) HTML TEXT end it 'automatically adds terminal semicolons to CSS rules' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) HTML TEXT end context 'when nested in , nested in

    ' do it 'sanitizes those elements as normal' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    HTML

    TEXT end end end context 'sample email input' do it 'handles sample input 1' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    HTML
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    TEXT end it 'handles sample input 2' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    HTML
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    TEXT end it 'handles sample input 3' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    HTML
    abc

    Von: Fritz Bauer [mailto:me@example.com]
    Gesendet: Donnerstag, 3. Mai 2012 11:51
    An: John Smith
    Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
    Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

    TEXT end it 'handles sample input 4' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Mit freundlichem Gruß 

    John Smith
    Service und Support

    Example Service AG & Co.
    Management OHG
    Someware-Str. 4
    xxxxx Someware

    Tel.: +49 001 7601 462
    Fax: +49 001 7601 472
    HTML
    Mit freundlichem Gruß

    John Smith
    Service und Support

    Example Service AG & Co.
    Management OHG
    Someware-Str. 4
    xxxxx Someware

    Tel.: +49 001 7601 462
    Fax: +49 001 7601 472
    TEXT end it 'handles sample input 5' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    Guten Morgen, Frau ABC,

     

    vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?

     

    Nochmals vielen Dank und herzliche Grüße

     

    Anna Smith

    art abc SEV GmbH

    art abc TRAV

    Marktstätte 123

    123456 Dorten

    T: +49 (0) 12345/1234560-1

    T: +49 (0) 12345/1234560-0

    F: +49 (0) 12345/1234560-2

    annad@example.com

    www.example.com          www.ABC.com

    Geschäftsführer Vor Nach, VorUndZu Nach     -     Amtsgericht Dort HRB 12345    -    Ein Unternehmer der ABC Gruppe

    HTML

    Guten Morgen, Frau ABC,

    vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?

    Nochmals vielen Dank und herzliche Grüße

    Anna Smith

    art abc SEV GmbH

    art abc TRAV

    Marktstätte 123

    123456 Dorten

    T: +49 (0) 12345/1234560-1

    T: +49 (0) 12345/1234560-0

    F: +49 (0) 12345/1234560-2

    annad@example.com

    www.example.com www.ABC.com

    Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe

    TEXT end it 'handles sample input 6' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

     

    Von: Besucherbüro, MKuk [mailto:besucherbuero@example.com]
    Gesendet: Freitag, 16. Dezember 2016 08:05
    An: 'Amaia Epalza'
    Betreff: AW: Gruppe vtb Kultuur // 28.06.2017

     

    Reservierungsbestätigung Führung Skulptur-Projekte 2017 am

     

    Guten Morgen Frau Epalza,

    HTML

    Von: Besucherbüro, MKuk [mailto:besucherbuero@example.com]
    Gesendet: Freitag, 16. Dezember 2016 08:05
    An: 'Amaia Epalza'
    Betreff: AW: Gruppe vtb Kultuur // 28.06.2017

     

    Reservierungsbestätigung Führung Skulptur-Projekte 2017 am

    Guten Morgen Frau Epalza,

    TEXT end it 'handles sample input 7' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
    Wir brauchen also die Instanz example.zammad.com, kann die aber nicht mehr nutzen.

    Bitte um Freischaltung.


    HTML
    Wir brauchen also die Instanz example.zammad.com, kann die aber nicht mehr nutzen.
     
    Bitte um Freischaltung.
     
    TEXT end it 'handles sample input 8' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

    oh jeee … Zauberwort vergessen ;-) Können Sie mir bitte noch meine Testphase verlängern?

     

    HTML

    oh jeee … Zauberwort vergessen ;-) Können Sie mir bitte noch meine Testphase verlängern?

    TEXT end it 'handles sample input 9' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp) HTML TEXT end it 'handles sample input 10' do expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)

     

    20-29
    200
    -1
    201
    country
    Target (gross)
    Remaining Recruits
    Total Recruits

     

    20-29
    200
    -1
    201
    country
    Target (gross)
    Remaining Recruits
    Total Recruits