string_spec.rb 93 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001
  1. # Copyright (C) 2012-2023 Zammad Foundation, https://zammad-foundation.org/
  2. # frozen_string_literal: true
  3. require 'rails_helper'
  4. RSpec.describe String do
  5. describe '#strip' do
  6. context 'default behavior' do
  7. it 'removes leading/trailing spaces' do
  8. expect(' test '.strip).to eq('test')
  9. end
  10. it 'removes trailing newlines' do
  11. expect("test\n".strip).to eq('test')
  12. end
  13. it 'does not remove internal spaces / newlines' do
  14. expect("test \n test".strip).to eq("test \n test")
  15. end
  16. end
  17. context 'monkey-patched behavior' do
  18. it 'removes leading/trailing zero-width spaces, but not internal ones' do
  19. expect(" \r\n test \u{200B} \n test\u{200B} \u{200B}".strip)
  20. .to eq("test \u{200B} \n test")
  21. end
  22. it 'does not break on non-unicode strings' do
  23. expect(described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT').strip)
  24. .to eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  25. end
  26. end
  27. end
  28. describe '#strip!' do
  29. context 'default behavior' do
  30. it 'removes leading/trailing spaces (in place)' do
  31. str = +' test '
  32. expect(str.strip!).to be(str).and eq('test')
  33. end
  34. it 'removes trailing newlines (in place)' do
  35. str = +"test\n"
  36. expect(str.strip!).to be(str).and eq('test')
  37. end
  38. it 'does not remove internal spaces / newlines (in place)' do
  39. str = +"test \n test "
  40. expect(str.strip!).to be(str).and eq(str)
  41. end
  42. end
  43. context 'monkey-patched behavior' do
  44. it 'removes leading/trailing zero-width spaces, but not internal ones (in place)' do
  45. str = +" \r\n test \u{200B} \n test\u{200B} \u{200B}"
  46. expect(str.strip!).to be(str).and eq("test \u{200B} \n test")
  47. end
  48. it 'does not break on invalid-unicode strings (in place)' do
  49. str = described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT')
  50. expect(str.strip!)
  51. .to be(str).and eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  52. end
  53. end
  54. end
  55. describe '#to_filename' do
  56. it 'does not modify strings in place' do
  57. %w[test Some::File].each do |str|
  58. expect { str.to_filename }.not_to change { str }
  59. end
  60. end
  61. it 'leaves all-downcase strings as-is' do
  62. expect('test'.to_filename).to eq('test')
  63. end
  64. it 'converts camelcase Ruby constant paths to snakecase file paths' do
  65. expect('Some::File'.to_filename).to eq('some/file')
  66. end
  67. end
  68. describe '#to_classname' do
  69. it 'does not modify strings in place' do
  70. %w[test some/file].each do |str|
  71. expect { str.to_classname }.not_to change { str }
  72. end
  73. end
  74. it 'capitalizes all-downcase strings' do
  75. expect('test'.to_classname).to eq('Test')
  76. end
  77. it 'converts snakecase file paths to camelcase Ruby constant paths' do
  78. expect('some/file'.to_classname).to eq('Some::File')
  79. end
  80. context 'unlike ActiveSupport’s #classify' do
  81. it 'preserves pluralized names' do
  82. expect('some/files'.to_classname).to eq('Some::Files')
  83. expect('some_test/files'.to_classname).to eq('SomeTest::Files')
  84. end
  85. end
  86. end
  87. describe '#html2text' do
  88. it 'does not modify strings in place' do
  89. %w[test <div>test</div>].each do |str|
  90. expect { str.html2text }.not_to change { str }
  91. end
  92. end
  93. it 'leaves human-readable text as-is' do
  94. expect('test'.html2text).to eq('test')
  95. end
  96. it 'strips leading/trailing spaces' do
  97. expect(' test '.html2text).to eq('test')
  98. end
  99. it 'also strips leading/trailing newlines' do
  100. expect("\n\n test \n\n\n".html2text).to eq('test')
  101. end
  102. it 'strips HTML tags around text content' do
  103. expect('<div>test</div>'.html2text).to eq('test')
  104. end
  105. it 'strips trailing <br> inside last <div>' do
  106. expect('<div>test<br></div>'.html2text).to eq('test')
  107. end
  108. it 'strips trailing <br> and newlines inside last <div>' do
  109. expect("<div>test<br><br><br>\n<br>\n<br>\n</div>".html2text).to eq('test')
  110. end
  111. it 'strips trailing <br>, newlines, and spaces inside last <div>' do
  112. expect("<div>test<br><br> <br> \n<br> \n<br> \n</div>".html2text).to eq('test')
  113. end
  114. it 'strips trailing <br>, newlines, and &nbsp; inside last <div>' do
  115. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>".html2text).to eq('test')
  116. end
  117. it 'strips trailing whitespace (including &nbsp; & <br>) both inside and after last tag' do
  118. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>&nbsp;".html2text).to eq('test')
  119. end
  120. it 'also strips nested HTML tags' do
  121. expect("<p><span>Was\nsoll verbessert werden:</span></p>".html2text)
  122. .to eq('Was soll verbessert werden:')
  123. end
  124. it 'in <pre> elements, collapses multiple newlines into one' do
  125. expect("<pre>test\n\ntest</pre>".html2text).to eq("test\ntest")
  126. end
  127. it 'in <code> elements, collapses multiple newlines into one' do
  128. expect("<code>test\n\ntest</code>".html2text).to eq("test\ntest")
  129. end
  130. it 'converts <table> cells and row to space-separated lines' do
  131. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  132. <table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>
  133. HTML
  134. test col
  135. test 4711
  136. TEXT
  137. end
  138. it 'strips HTML comments' do
  139. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  140. <!-- some comment -->
  141. <div>
  142. test<br><br><br>
  143. <br>
  144. <br>
  145. </div>
  146. HTML
  147. test
  148. TEXT
  149. end
  150. it 'converts <a> elements to plain text with numerical references' do
  151. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  152. <div><a href="https://zammad.org">Best Tool of the World</a>
  153. some other text</div>
  154. <div>
  155. HTML
  156. [1] Best Tool of the Worldsome other text
  157. [1] https://zammad.org
  158. TEXT
  159. end
  160. it 'converts <hr> elements to separate paragraphs containing only "___"' do
  161. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  162. <!-- some comment -->
  163. <div>
  164. test<br><br><br>
  165. <hr/>
  166. <br>
  167. </div>
  168. HTML
  169. test
  170. ___
  171. TEXT
  172. end
  173. it 'converts <br> elements to newlines (max. 2)' do
  174. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  175. test<br><br><br>--<br>abc</div>
  176. HTML
  177. test
  178. --
  179. abc
  180. TEXT
  181. end
  182. it 'strips Microsoft Outlook conditional comments' do
  183. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  184. Ihr RZ-Team<br />
  185. <br />
  186. <!--[if gte mso 9]><xml> <o:DocumentProperties> <o:Author>test</o:Author> =
  187. <o:Template>A75DB76E.dotm</o:Template> <o:LastAuthor>test</o:LastAuthor> =
  188. <o:Revision>5</o:Revision> <o:Created>2011-05-18T07:08:00Z</o:Created> <=
  189. o:LastSaved>2011-07-04T17:59:00Z</o:LastSaved> <o:Pages>1</o:Pages> <o:Wo=
  190. rds>189</o:Words> <o:Characters>1192</o:Characters> <o:Lines>9</o:Lines> =
  191. <o:Paragraphs>2</o:Paragraphs> <o:CharactersWithSpaces>1379</o:Characters=
  192. WithSpaces> <o:Version>11.5606</o:Version> </o:DocumentProperties></xml><!=
  193. [endif]-->
  194. HTML
  195. Ihr RZ-Team
  196. TEXT
  197. end
  198. it 'strips <img> elements' do
  199. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  200. <html>
  201. <head>
  202. <title>Neues Fax von 1234-93900</title>
  203. </head>
  204. <body style="margin: 0px;padding: 0px;font-family: Arial, sans-serif;font-size: 12px;">
  205. <table cellpadding="0" cellspacing="0" width="100%" height="100%" bgcolor="#d9e7f0" id="mailbg"
  206. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 100%;height: 100%;background-color: #d9e7f0;padding: 0px;margin: 0px;">
  207. <tr>
  208. <td valign="top">
  209. <center>
  210. <br><br>
  211. <table width="560" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF" id="mailcontainer"
  212. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 560px;margin: 0px auto;padding: 0px;background-color: #FFFFFF;">
  213. <tr>
  214. <td colspan="3" width="560" id="mail_header" valign="top" style="width: 560px;background-color: #FFFFFF;font-family: Arial, sans-serif;color: #000000;padding: 0px;margin: 0px;">
  215. <table width="560" cellpadding="0" cellspacing="0" style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;">
  216. <tr>
  217. <td height="10" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;height:10px;">
  218. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_header.gif" style="padding: 0px;margin: 0px;">
  219. </td>
  220. </tr>
  221. <tr>
  222. <td height="12" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  223. </tr>
  224. <tr>
  225. <td height="27" width="30"> </td>
  226. <td height="27" width="397"><span class="mailtitle" style="font-family: Arial, sans-serif;color: #000000;font-size: 18px;line-height: 18px;font-weight: normal;">Neues Fax</span></td>
  227. <td height="27" width="103"><img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_logo-example.gif" style="padding: 0px;margin: 0px;"></td>
  228. <td height="27" width="30"></td>
  229. </tr>
  230. <tr>
  231. <td height="20" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  232. </tr>
  233. <tr>
  234. <td height="1" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;">
  235. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_line-grey.gif" style="padding: 0px;margin: 0px;">
  236. </td>
  237. </tr>
  238. </table>
  239. </td>
  240. </tr>
  241. <tr>
  242. <td colspan="3" width="560"> </td>
  243. </tr>
  244. <tr>
  245. <td width="30"> </td>
  246. <td width="500" height="30" valign="middle" align="right">
  247. <span class="accountno" style="font-family: Arial, sans-serif;font-size: 10px;color: #666666;">Ihre Kundennummer: 12345678</span>
  248. </td>
  249. <td width="30"> </td>
  250. </tr>
  251. HTML
  252. Neues Fax von 1234-93900
  253. Neues Fax
  254. Ihre Kundennummer: 12345678
  255. TEXT
  256. end
  257. it 'converts characters written in HTML ampersand code' do
  258. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  259. line&nbsp;1<br>
  260. you<br/>
  261. -----&amp;
  262. HTML
  263. line\u00A01
  264. you
  265. -----&
  266. TEXT
  267. end
  268. it 'converts <ul> to asterisk-demarcated list' do
  269. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  270. \u0020<ul><li>#1</li><li>#2</li></ul>
  271. HTML
  272. * #1
  273. * #2
  274. TEXT
  275. end
  276. it 'strips HTML frontmatter and <head> element' do
  277. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  278. <!DOCTYPE html>
  279. <html>
  280. <head>
  281. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  282. <head>
  283. <body style="font-family:Geneva,Helvetica,Arial,sans-serif; font-size: 12px;">
  284. <div>&gt; Welcome!</div><div>&gt;</div><div>&gt; Thank you for installing Zammad.</div><div>&gt;</div>
  285. </body>
  286. </html>
  287. HTML
  288. > Welcome!
  289. >
  290. > Thank you for installing Zammad.
  291. >
  292. TEXT
  293. end
  294. it 'strips <style> elements' do
  295. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  296. \u0020 <style type="text/css">
  297. body {
  298. width:90% !important;
  299. -webkit-text-size-adjust:90%;
  300. -ms-text-size-adjust:90%;
  301. font-family:'helvetica neue', helvetica, arial, geneva, sans-serif; f=
  302. ont-size: 12px;;
  303. }
  304. img {
  305. outline:none; text-decoration:none; -ms-interpolation-mode: bicubic;
  306. }
  307. a img {
  308. border:none;
  309. }
  310. table td {
  311. border-collapse: collapse;
  312. }
  313. table {
  314. border-collapse: collapse; mso-table-lspace:0pt; mso-table-rspace:0pt;
  315. }
  316. p, table, div, td {
  317. max-width: 600px;
  318. }
  319. p {
  320. margin: 0;
  321. }
  322. blockquote, pre {
  323. margin: 0px;
  324. padding: 8px 12px 8px 12px;
  325. }
  326. </style><p>some other content</p>
  327. HTML
  328. some other content
  329. TEXT
  330. end
  331. it 'strips <meta> elements' do
  332. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  333. \u0020 IT-Infrastruktur</span><br>
  334. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  335. <meta name="Generator" content="Microsoft Word 14 (filtered
  336. medium)">
  337. <!--[if !mso]><style>v:* {behavior:url(#default#VML);}
  338. o:* {behavior:url(#default#VML);}
  339. w:* {behavior:url(#default#VML);}
  340. .shape {behavior:url(#default#VML);}
  341. </style><![endif]-->
  342. <style><!--
  343. @font-face
  344. {font-family:calibri;
  345. panose-1:2 15 5 2 2 2 4 3 2 4;}
  346. @font-face
  347. {font-family:tahoma;
  348. panose-1:2 11 6 4 3 5 4 4 2 4;}
  349. p.msonormal, li.msonormal, div.msonormal
  350. {margin:0cm;
  351. margin-bottom:.0001pt;
  352. font-size:11.0pt;
  353. font-family:"calibri","sans-serif";
  354. mso-fareast-language:en-us;}
  355. a:link, span.msohyperlink
  356. {mso-style-priority:99;
  357. color:blue;
  358. text-decoration:underline;}
  359. a:visited, span.msohyperlinkfollowed
  360. {mso-style-priority:99;
  361. color:purple;
  362. text-decoration:underline;}
  363. p.msoacetate, li.msoacetate, div.msoacetate
  364. {mso-style-priority:99;
  365. mso-style-link:"sprechblasentext zchn";
  366. margin:0cm;
  367. margin-bottom:.0001pt;
  368. font-size:8.0pt;
  369. font-family:"tahoma","sans-serif";
  370. mso-fareast-language:en-us;}
  371. span.e-mailformatvorlage17
  372. {mso-style-type:personal;
  373. font-family:"calibri","sans-serif";
  374. color:windowtext;}
  375. span.sprechblasentextzchn
  376. {mso-style-name:"sprechblasentext zchn";
  377. mso-style-priority:99;
  378. mso-style-link:sprechblasentext;
  379. font-family:"tahoma","sans-serif";}
  380. .msochpdefault
  381. {mso-style-type:export-only;
  382. font-family:"calibri","sans-serif";
  383. mso-fareast-language:en-us;}
  384. @page wordsection1
  385. {size:612.0pt 792.0pt;
  386. margin:70.85pt 70.85pt 2.0cm 70.85pt;}
  387. div.wordsection1
  388. {page:wordsection1;}
  389. --></style><!--[if gte mso 9]><xml>
  390. <o:shapedefaults v:ext="edit" spidmax="1026" />
  391. </xml><![endif]--><!--[if gte mso 9]><xml>
  392. <o:shapelayout v:ext="edit">
  393. <o:idmap v:ext="edit" data="1" />
  394. </o:shapelayout></xml><![endif]-->
  395. HTML
  396. IT-Infrastruktur
  397. TEXT
  398. end
  399. it 'separates block-level elements by one newline (<p> following a non-<p> block gets two)' do
  400. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  401. <h1>some head</h1>
  402. some content
  403. <blockquote>
  404. <p>line 1</p>
  405. <p>line 2</p>
  406. </blockquote>
  407. <p>some text later</p>
  408. HTML
  409. some head
  410. some content
  411. > line 1
  412. > line 2
  413. some text later
  414. TEXT
  415. end
  416. it 'formats <blockquote> contents with leading "> "' do
  417. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  418. <h1>some head</h1>
  419. some content
  420. <blockquote>
  421. line 1<br/>
  422. line 2<br>
  423. </blockquote>
  424. <p>some text later</p>
  425. HTML
  426. some head
  427. some content
  428. > line 1
  429. > line 2
  430. some text later
  431. TEXT
  432. end
  433. it 'adds max. 2 newlines between block-level <blockquote> contents' do
  434. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  435. <h1>some head</h1>
  436. some content
  437. <blockquote>
  438. <div><div>line 1</div><br></div>
  439. <div><div>line 2</div><br></div>
  440. </blockquote>
  441. some text later
  442. HTML
  443. some head
  444. some content
  445. > line 1
  446. >
  447. > line 2
  448. some text later
  449. TEXT
  450. end
  451. it 'places numerical <a> references at end of text string' do
  452. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  453. <p>Best regards,</p>
  454. <p><i>Your Team Team</i></p>
  455. <p>P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click
  456. <a href="http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx">here</a> to unsubscribe from further e-mails.</p>
  457. -----------------------------
  458. <br />
  459. HTML
  460. Best regards,
  461. Your Team Team
  462. P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click [1] here to unsubscribe from further e-mails.
  463. -----------------------------
  464. [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx
  465. TEXT
  466. end
  467. it 'handles elements with missing closing tags' do
  468. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  469. <div><br>Dave and leaned her
  470. days adam.</div><span style="color:#F7F3FF; font-size:8px">Maybe we
  471. want any help me that.<br>Next morning charlie saw at their
  472. father.<br>Well as though adam took out here. Melvin will be more money.\u0020
  473. Called him into this one last thing.<br>Men-----------------------
  474. <br />
  475. HTML
  476. Dave and leaned her days adam.
  477. Maybe we want any help me that.
  478. Next morning charlie saw at their father.
  479. Well as though adam took out here. Melvin will be more money. Called him into this one last thing.
  480. Men-----------------------
  481. TEXT
  482. end
  483. context 'html encoding' do
  484. it 'converts &Auml; in Ä' do
  485. expect('<div>test something.&Auml;</div>'.html2text)
  486. .to eq('test something.Ä')
  487. end
  488. it 'strips invalid html encoding chars' do
  489. expect('<div>test something.&#55357;</div>'.html2text)
  490. .to eq('test something.í ˝')
  491. end
  492. end
  493. context 'performance tests' do
  494. let(:filler) do
  495. %(#{%(<p>some word <a href="http://example.com?domain?example.com">some url</a> and the end.</p>\n) * 11}\n)
  496. end
  497. it 'converts a 1076-byte unicode file in under 2s' do
  498. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  499. <html>
  500. <title>some title</title>
  501. <body>
  502. <div>hello</div>
  503. #{filler}
  504. </body>
  505. </html>
  506. HTML
  507. end
  508. it 'converts a 2.21 MiB unicode file in under 2s' do
  509. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  510. <html>
  511. <title>some title</title>
  512. <body>
  513. <div>hello</div>
  514. #{filler * 2312}
  515. </body>
  516. </html>
  517. HTML
  518. end
  519. end
  520. end
  521. describe '#html2html_strict' do
  522. it 'leaves human-readable text as-is' do
  523. expect('test'.html2html_strict).to eq('test')
  524. end
  525. it 'strips leading/trailing spaces' do
  526. expect(' test '.html2html_strict).to eq('test')
  527. end
  528. it 'also strips leading/trailing newlines' do
  529. expect("\n\n test \n\n\n".html2html_strict).to eq('test')
  530. end
  531. it 'also strips leading <br>' do
  532. expect('<br><br><div>abc</div>'.html2html_strict).to eq('<div>abc</div>')
  533. end
  534. it 'also strips trailing <br> & spaces' do
  535. expect('<div>abc</div><br> <br>'.html2html_strict).to eq('<div>abc</div>')
  536. end
  537. it 'leaves <b> as-is' do
  538. expect('<b>test</b>'.html2html_strict).to eq('<b>test</b>')
  539. end
  540. it 'downcases tag names' do
  541. expect('<B>test</B>'.html2html_strict).to eq('<b>test</b>')
  542. end
  543. it 'leaves <i> as-is' do
  544. expect('<i>test</i>'.html2html_strict).to eq('<i>test</i>')
  545. end
  546. it 'leaves <h1> as-is' do
  547. expect('<h1>test</h1>'.html2html_strict).to eq('<h1>test</h1>')
  548. end
  549. it 'leaves <h2> as-is' do
  550. expect('<h2>test</h2>'.html2html_strict).to eq('<h2>test</h2>')
  551. end
  552. it 'leaves <h3> as-is' do
  553. expect('<h3>test</h3>'.html2html_strict).to eq('<h3>test</h3>')
  554. end
  555. it 'leaves <pre> as-is' do
  556. expect("<pre>a\nb\nc</pre>".html2html_strict).to eq("<pre>a\nb\nc</pre>")
  557. end
  558. it 'leaves <pre> nested inside <div> as-is' do
  559. expect("<div><pre>a\nb\nc</pre></div>".html2html_strict).to eq("<div><pre>a\nb\nc</pre></div>")
  560. end
  561. it 'strips HTML comments' do
  562. expect('<h3>test</h3><!-- some comment -->'.html2html_strict).to eq('<h3>test</h3>')
  563. end
  564. it 'strips <html>/<body> tags & <head> elements' do
  565. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  566. <html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hello Martin,<o:p></o:p></span></div>
  567. HTML
  568. <div lang="DE">Hello Martin,</div>
  569. TEXT
  570. end
  571. it 'strips <span> tags' do
  572. expect('<span></span>'.html2html_strict).to eq('')
  573. end
  574. it 'keeps style with color in <span>' do
  575. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  576. <span style="color: red; bgcolor: red">Hello Martin,</span>
  577. HTML
  578. <span style="color: red;">Hello Martin,</span>
  579. TEXT
  580. end
  581. it 'remove style=#ffffff with color in <span>' do
  582. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  583. <span style="color: #ffffff; bgcolor: red">Hello Martin,</span>
  584. HTML
  585. Hello Martin,
  586. TEXT
  587. end
  588. it 'strips <span> tags, id/class attrs, and <o:*> (MS Office) tags' do
  589. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  590. <div id="123" class="WordSection1">
  591. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau Koppenhagen,<o:p></o:p></span></p>
  592. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  593. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  594. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  595. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  596. <o:p></o:p></span></p>
  597. <div>
  598. HTML
  599. <div>
  600. <p><span style="color:#1f497d;">Guten Morgen, Frau Koppenhagen,</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p></div>
  601. TEXT
  602. end
  603. it 'strips <font> tags' do
  604. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  605. <p><font size="2"><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></font></p>
  606. HTML
  607. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  608. TEXT
  609. end
  610. it 'strips extraneous whitespace from end of opening tag' do
  611. expect('<b >test</b>'.html2html_strict).to eq('<b>test</b>')
  612. end
  613. it 'strips extraneous whitespace from closing tag' do
  614. expect('<b >test</b >'.html2html_strict).to eq('<b>test</b>')
  615. end
  616. it 'does not detect < /b > as closing tag; converts chars and auto-closes tag' do
  617. expect('<b >test< /b >'.html2html_strict).to eq('<b>test&lt; /b &gt;</b>')
  618. end
  619. it 'does not detect <\n/b> as closing tag; converts chars and auto-closes tag' do
  620. expect("<b\n>test<\n/b>".html2html_strict).to eq('<b>test&lt; /b&gt;</b>')
  621. end
  622. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  623. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  624. <p> </p><p> </p><p> </p>
  625. HTML
  626. <p>&nbsp;</p>
  627. TEXT
  628. end
  629. it 'keeps lang attr on <p>' do
  630. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  631. <p lang="DE"><b><span></span></b></p>
  632. HTML
  633. <p lang="DE"></p>
  634. TEXT
  635. end
  636. it 'strips <span> inside <p>' do
  637. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  638. <p lang="DE"><b><span>Hello Martin,</span></b></p>
  639. HTML
  640. <p lang="DE"><b>Hello Martin,</b></p>
  641. TEXT
  642. end
  643. it 'strips empty <p> keep <p>s with content' do
  644. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  645. <p> </p><p>123</p><p></p>
  646. HTML
  647. <p>&nbsp;</p><p>123</p>
  648. TEXT
  649. end
  650. it 'strips <br> between <p>' do
  651. expect('<p>&nbsp;</p><br><br><p>&nbsp;</p>'.html2html_strict).to eq('<p>&nbsp;</p><p>&nbsp;</p>')
  652. end
  653. it 'auto-adds missing closing brackets on tags, but not opening brackets' do
  654. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  655. <b id=123 classs="
  656. some_class"
  657. >test<
  658. /b>
  659. HTML
  660. <b>test&lt; /b&gt;</b>
  661. TEXT
  662. end
  663. it 'auto-adds missing closing tags' do
  664. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  665. <ul id=123 classs="
  666. some_class"
  667. ><li>test</li>
  668. <li class="asasd">test</li><
  669. /ul>
  670. HTML
  671. <ul>
  672. <li>test</li>
  673. <li>test</li>&lt; /ul&gt;</ul>
  674. TEXT
  675. end
  676. it 'auto-closes <div> with missing closing tag; removes </p> with missing opening tag' do
  677. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  678. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" class="">Link des Adventkalenders</a> in<br class="">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class="">&nbsp;
  679. HTML
  680. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" rel="nofollow noreferrer noopener" target="_blank" title="http://newsletters.cylex.de/">Link des Adventkalenders</a> in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>
  681. TEXT
  682. end
  683. it 'intelligently inserts missing </td> & </tr> tags (and ignores misplaced </table> tags)' do
  684. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  685. <table>
  686. <tr>
  687. <td bgcolor=white><font size=2 face="sans-serif"><b>Franz Schäfer</b></font>
  688. <tr>
  689. <td bgcolor=white><font size=2 face="sans-serif">Manager Information Systems</font></table>
  690. <br>
  691. <table>
  692. <tr>
  693. <td bgcolor=white><font size=2 face="sans-serif">Telefon &nbsp;</font>
  694. <td bgcolor=white><font size=2 face="sans-serif">+49 000 000 8565</font>
  695. <tr>
  696. <td colspan=2 bgcolor=white><font size=2 face="sans-serif">christian.schaefer@example.com</font></table>
  697. <br>
  698. <table>
  699. HTML
  700. <table>
  701. <tr>
  702. <td>
  703. <b>Franz Schäfer</b>
  704. </td>
  705. </tr>
  706. <tr>
  707. <td>Manager Information Systems</td>
  708. </tr>
  709. </table>
  710. <br>
  711. <table>
  712. <tr>
  713. <td> Telefon </td>
  714. <td> +49 000 000 8565 </td>
  715. </tr>
  716. <tr>
  717. <td colspan="2">christian.schaefer@example.com</td>
  718. </tr>
  719. </table>
  720. TEXT
  721. end
  722. it 'ignores invalid (misspelled) attrs' do
  723. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  724. <b id=123 classs="
  725. some_class">test</b>
  726. HTML
  727. <b>test</b>
  728. TEXT
  729. end
  730. it 'strips incomplete CSS rules' do
  731. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  732. <p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>
  733. HTML
  734. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  735. TEXT
  736. end
  737. context 'for whitespace-only <div>' do
  738. it 'preserves a single space' do
  739. expect('<div> </div>'.html2html_strict).to eq('<div> </div>')
  740. end
  741. it 'converts a lone <br> to &nbsp;' do
  742. expect('<div><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  743. end
  744. it 'converts three <br> to one &nbsp;' do
  745. expect('<div style="max-width: 600px;"><br><br><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  746. end
  747. it 'collapses two nested, whitespace-only <div> into a single &nbsp;' do
  748. expect('<div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  749. end
  750. it 'collapses three nested, whitespace-only <div> into a single &nbsp;' do
  751. expect('<div><div> </div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  752. end
  753. it 'collapses 2+ nested, whitespace-only <p> into \n<p>&nbsp;</p>' do
  754. expect('<div><p> </p><p> </p></div>'.html2html_strict).to eq("<div>\n<p>&nbsp;</p></div>")
  755. end
  756. end
  757. context 'for <div> with content' do
  758. it 'also strips trailing/leading newlines inside <div>' do
  759. expect("<div>\n\n\ntest\n\n\n</div>".html2html_strict).to eq('<div>test</div>')
  760. end
  761. it 'also strips trailing/leading newlines & tabs inside <div>' do
  762. expect("<div>\n\t\ntest\n\t\n</div>".html2html_strict).to eq('<div>test</div>')
  763. end
  764. it 'also strips trailing/leading newlines & tabs inside <div>, but not internal spaces' do
  765. expect("<div>\n\t\ntest 123\n\t\n</div>".html2html_strict).to eq('<div>test 123</div>')
  766. end
  767. it 'strips newlines from trailing whitespace; leaves up to two <br> (with spaces) as-is' do
  768. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  769. <div>
  770. <br> <p><b>Description</b></p>
  771. <br> <br> </div>
  772. HTML
  773. <div>
  774. <br> <p><b>Description</b></p><br> <br> </div>
  775. TEXT
  776. end
  777. it 'strips newlines from trailing whitespace; collapses 3+ <br> into two' do
  778. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  779. <div>
  780. <br> <p><b>Description</b></p>
  781. <br> <br> <br> </div>
  782. HTML
  783. <div>
  784. <br> <p><b>Description</b></p><br><br></div>
  785. TEXT
  786. end
  787. it 'removes unnecessary <div> nesting' do
  788. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  789. <div><div>Hello Martin,</div></div>
  790. HTML
  791. <div>Hello Martin,</div>
  792. TEXT
  793. end
  794. it 'keeps innermost <div> when removing nesting' do
  795. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  796. <div lang="DE"><div><div>Hello Martin,</div></div></div>
  797. HTML
  798. <div lang="DE">Hello Martin,</div>
  799. TEXT
  800. end
  801. it 'keeps style with color in <div>' do
  802. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  803. <div style="color: red; bgcolor: red">Hello Martin,</div>
  804. HTML
  805. <div style="color: red;">Hello Martin,</div>
  806. TEXT
  807. end
  808. it 'remove style=#ffffff with color in <div>' do
  809. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  810. <div style="color: #ffffff; bgcolor: red">Hello Martin,</div>
  811. HTML
  812. <div>Hello Martin,</div>
  813. TEXT
  814. end
  815. it 'rearranges whitespace in nested <div>' do
  816. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  817. <div lang="DE"><div><div>Hello Martin,</div> </div></div>
  818. HTML
  819. <div lang="DE">
  820. <div>Hello Martin,</div></div>
  821. TEXT
  822. end
  823. it 'adds newline where <br> starts or ends <div> content' do
  824. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  825. <div style="max-width: 600px;"><br>abc<br><br></div>
  826. HTML
  827. <div>
  828. <br>abc<br><br>
  829. </div>
  830. TEXT
  831. end
  832. it 'leaves <s> nested in <div> as-is (?)' do
  833. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  834. <div><s>abc</s></div>
  835. HTML
  836. <div><s>abc</s></div>
  837. TEXT
  838. end
  839. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  840. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  841. <div><p> </p>
  842. <p> </p>
  843. <p> </p>
  844. </div>
  845. HTML
  846. <div>
  847. <p>&nbsp;</p></div>
  848. TEXT
  849. end
  850. it 'strips <div> tags when they contain only <p>' do
  851. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  852. <div>lala<div lang="DE"><p><span>Hello Martin,</span></p></div></div>
  853. HTML
  854. <div>lala<div lang="DE"><p>Hello Martin,</p></div></div>
  855. TEXT
  856. end
  857. end
  858. context 'link handling' do
  859. it 'adds rel & target attrs to <a> tags' do
  860. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  861. <a href="http://web.de">web.de</a>
  862. HTML
  863. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  864. TEXT
  865. end
  866. it 'removes id attrs' do
  867. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  868. <a id="123" href="http://web.de">web.de</a>
  869. HTML
  870. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  871. TEXT
  872. end
  873. it 'removes class/id attrs' do
  874. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  875. <a href="http://example.com" class="abc" id="123">http://example.com</a>
  876. HTML
  877. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  878. TEXT
  879. end
  880. it 'downcases <a> tags' do
  881. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  882. <A href="http://example.com?a=1;">http://example.com?a=1;</A>
  883. HTML
  884. <a href="http://example.com?a=1;" rel="nofollow noreferrer noopener" target="_blank">http://example.com?a=1;</a>
  885. TEXT
  886. end
  887. it 'doesn’t downcase href attr or inner text' do
  888. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  889. <A href="http://example.com/withSoMeUpper/And/downCase">http://example.com/withSoMeUpper/And/downCase</A>
  890. HTML
  891. <a href="http://example.com/withSoMeUpper/And/downCase" rel="nofollow noreferrer noopener" target="_blank">http://example.com/withSoMeUpper/And/downCase</a>
  892. TEXT
  893. end
  894. it 'automatically wraps <a> tags around valid URLs' do
  895. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  896. <div>https://www.facebook.com/test</div>
  897. HTML
  898. <div>\n<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>\n</div>
  899. TEXT
  900. end
  901. it 'does not wrap URLs if leading https?:// is missing' do
  902. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  903. some text www.example.com some other text
  904. HTML
  905. some text www.example.com some other text
  906. TEXT
  907. end
  908. it 'adds missing http:// to href attr (but not inner text)' do
  909. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  910. web <a href="www.example.com"><span style="color:blue">www.example.com</span></a>
  911. HTML
  912. web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a>
  913. TEXT
  914. end
  915. it 'includes URL parameters when wrapping URL in <a> tag' do
  916. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  917. <p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>
  918. HTML
  919. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap</a></p>
  920. TEXT
  921. end
  922. it 'does not rewrap valid URLs that already have <a> tags' do
  923. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  924. <a href="http://example.com">http://example.com</a>
  925. HTML
  926. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  927. TEXT
  928. end
  929. it 'recognizes URL parameters when matching href to inner text' do
  930. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  931. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  932. HTML
  933. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  934. TEXT
  935. end
  936. it 'recognizes <br> as URL boundary' do
  937. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  938. <div><br>https://www.facebook.com/test<br></div>
  939. HTML
  940. <div>
  941. <br><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a><br>\n</div>
  942. TEXT
  943. end
  944. it 'recognizes space as URL boundary' do
  945. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  946. some text http://example.com some other text
  947. HTML
  948. some text <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a> some other text
  949. TEXT
  950. end
  951. it 'wraps valid URLs from <div> elements in <a> tags' do
  952. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  953. <div>http://example.com</div>
  954. HTML
  955. <div>
  956. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  957. </div>
  958. TEXT
  959. end
  960. it 'recognizes trailing dot as URL boundary' do
  961. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  962. <div>http://example.com.</div>
  963. HTML
  964. <div>
  965. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  966. TEXT
  967. end
  968. it 'does not add a leading newline if <div> begins with non-URL text' do
  969. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  970. <div>lala http://example.com.</div>
  971. HTML
  972. <div>lala <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  973. TEXT
  974. end
  975. it 'recognizes trailing comma as URL boundary' do
  976. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  977. <div>http://example.com, and so on</div>
  978. HTML
  979. <div>
  980. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>, and so on</div>
  981. TEXT
  982. end
  983. it 'recognizes trailing comma as URL boundary (immediately following URL parameters)' do
  984. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  985. <div>http://example.com?lala=me, and so on</div>
  986. HTML
  987. <div>
  988. <a href="http://example.com?lala=me" rel="nofollow noreferrer noopener" target="_blank">http://example.com?lala=me</a>, and so on</div>
  989. TEXT
  990. end
  991. it 'strips <a> tags when no href is present' do
  992. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  993. <a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>
  994. HTML
  995. <span style="color:#44546a;">Hello Mr Smith,</span>
  996. TEXT
  997. end
  998. context 'when <a> inner text is HTML elements' do
  999. it 'leaves <img> elements as-is' do
  1000. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1001. <a href="http://example.com/?abc=123&123=abc" class="abc\n"\n><img src="cid:123"></a>
  1002. HTML
  1003. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com/?abc=123&amp;123=abc"><img src="cid:123"></a>
  1004. TEXT
  1005. end
  1006. it 'strips <span> tags, but not content' do
  1007. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1008. <a href="http://facebook.de/examplesrbog"><span lang="EN-US" style='color:blue'>http://facebook.de/examplesrbog</span></a>
  1009. HTML
  1010. <a href="http://facebook.de/examplesrbog" rel="nofollow noreferrer noopener" target="_blank"><span lang="EN-US" style="color:blue;">http://facebook.de/examplesrbog</span></a>
  1011. TEXT
  1012. end
  1013. it 'also strips surrounding <span> and <o:p> tags' do
  1014. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1015. <span style="font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1016. <a href="http://www.example.com"><span style="color:blue">www.example.com</span></a><o:p></o:p></span>
  1017. HTML
  1018. <span style="color:#1f497d;">web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a></span>
  1019. TEXT
  1020. end
  1021. end
  1022. context 'when <a> inner text and href do not match' do
  1023. it 'adds title attr' do
  1024. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1025. <a href="http://example.com">http://what-different.example.com</a>
  1026. HTML
  1027. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com">http://what-different.example.com</a>
  1028. TEXT
  1029. end
  1030. it 'converts unsafe characters in href attr and title' do
  1031. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1032. <a href="http://example.com %22test%22">http://what-different.example.com</a>
  1033. HTML
  1034. <a href="http://example.com%20%22test%22" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com%20%22test%22">http://what-different.example.com</a>
  1035. TEXT
  1036. end
  1037. it 'does not add title attr (for different capitalization)' do
  1038. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1039. <a href="http://example.com">http://EXAMPLE.com</a>
  1040. HTML
  1041. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://EXAMPLE.com</a>
  1042. TEXT
  1043. end
  1044. it 'does not add title attr (for URL-safe/unsafe characters)' do
  1045. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1046. <a href="http://example.com/?abc=123&123=abc">http://example.com?abc=123&amp;123=abc</a>
  1047. HTML
  1048. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank">http://example.com?abc=123&amp;123=abc</a>
  1049. TEXT
  1050. end
  1051. end
  1052. context 'for email links' do
  1053. it 'strips <a> tags' do
  1054. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1055. <a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1056. HTML
  1057. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1058. TEXT
  1059. end
  1060. it 'strips <a> tags (even with upcased "MAILTO:")' do
  1061. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1062. <a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1063. HTML
  1064. <a href="MAILTO:john.smith@example.com">john.smith@example.com</a>
  1065. TEXT
  1066. end
  1067. it 'extracts destination address when it differs from <a> innertext' do
  1068. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1069. <a href="MAILTO:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1070. HTML
  1071. <a href="MAILTO:john.smith2@example.com">john.smith@example.com</a>
  1072. TEXT
  1073. end
  1074. end
  1075. end
  1076. context 'for <img> tags' do
  1077. it 'removes color CSS rule from style attr' do
  1078. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1079. <img src="/some.png" style="color: blue; width: 30px; height: 50px">
  1080. HTML
  1081. <img src="/some.png" style=" width: 30px; height: 50px;">
  1082. TEXT
  1083. end
  1084. it 'converts width/height attrs to CSS rules' do
  1085. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1086. <img src="/some.png" width="30px" height="50px">
  1087. HTML
  1088. <img src="/some.png" style="width:30px;height:50px;">
  1089. TEXT
  1090. end
  1091. it 'automatically adds terminal semicolons to CSS rules' do
  1092. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1093. <img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1094. HTML
  1095. <img style="width: 181px; height: 125px;" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1096. TEXT
  1097. end
  1098. context 'when <img> nested in <a>, nested in <p>' do
  1099. it 'sanitizes those elements as normal' do
  1100. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1101. <p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>
  1102. HTML
  1103. <p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/"><span style="color:blue;"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></span></a></p>
  1104. TEXT
  1105. end
  1106. end
  1107. end
  1108. context 'sample email input' do
  1109. it 'handles sample input 1' do
  1110. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1111. <div>
  1112. abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1113. HTML
  1114. <div>abc<span class="js-signatureMarker"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1115. TEXT
  1116. end
  1117. it 'handles sample input 2' do
  1118. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1119. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1120. HTML
  1121. <div>abc<span class="js-signatureMarker"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1122. TEXT
  1123. end
  1124. it 'handles sample input 3' do
  1125. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1126. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p> </div>
  1127. HTML
  1128. <div>abc<span class="js-signatureMarker"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1129. TEXT
  1130. end
  1131. it 'handles sample input 4' do
  1132. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1133. <div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Mit freundlichem Gruß<span class="Apple-converted-space">&nbsp;</span><br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.<o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="mailto:john.smith@example.com" style=color: blue; text-decoration: underline; ">john.smith@example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="http://www.example.com" style="color: blue; text-decoration: underline; ">www.example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div>
  1134. HTML
  1135. <div><span>Mit freundlichem Gruß <br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.</span></div><div>
  1136. <span>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span>
  1137. </div><div>
  1138. <span>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span>
  1139. </div><div>
  1140. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1141. </div><div>
  1142. <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  1143. </div>
  1144. TEXT
  1145. end
  1146. it 'handles sample input 5' do
  1147. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1148. <body lang="DE" link="blue" vlink="purple"><div class="WordSection1">
  1149. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau ABC,<o:p></o:p></span></p>
  1150. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1151. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  1152. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1153. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  1154. <o:p></o:p></span></p>
  1155. <div>
  1156. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D"><o:p>&nbsp;</o:p></span></b></p>
  1157. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">Anna Smith<o:p></o:p></span></b></p>
  1158. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc SEV GmbH<o:p></o:p></span></b></p>
  1159. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc TRAV<o:p></o:p></span></b></p>
  1160. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">Marktstätte 123<o:p></o:p></span></p>
  1161. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">123456 Dorten<o:p></o:p></span></p>
  1162. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-1<o:p></o:p></span></p>
  1163. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-0<o:p></o:p></span></p>
  1164. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">F: &#43;49 (0) 12345/1234560-2<o:p></o:p></span></p>
  1165. <p class="MsoNormal"><a href="mailto:annad@example.com"><span style="font-size:9.0pt">annad@example.com</span></a><span style="font-size:9.0pt;color:#C00000"><o:p></o:p></span></p>
  1166. <p class="MsoNormal"><a href="http://www.example.com/"><span style="font-size:9.0pt">www.example.com</span></a><span style="font-size:9.0pt;color:#1F497D">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1167. </span><a href="http://www.ABC.com/"><span style="font-size:9.0pt">www.ABC.com</span></a><span style="font-size:9.0pt;color:#1F497D"><o:p></o:p></span></p>
  1168. <p class="MsoNormal"><span style="font-size:8.0pt;color:#1F497D">Geschäftsführer Vor Nach, VorUndZu Nach&nbsp;&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp;&nbsp; Amtsgericht Dort HRB 12345&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp; Ein Unternehmer der ABC Gruppe<o:p></o:p></span></p>
  1169. HTML
  1170. <div>
  1171. <p><span style="color:#1f497d;">Guten Morgen, Frau ABC,</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p><div>
  1172. <p><b><span style="color:#1f497d;"><p>&nbsp;</p></span></b></p><p><b><span style="color:#1f497d;">Anna Smith</span></b></p><p><b><span style="color:#1f497d;">art abc SEV GmbH</span></b></p><p><b><span style="color:#1f497d;">art abc TRAV</span></b></p><p><span style="color:#1f497d;">Marktstätte 123</span></p><p><span style="color:#1f497d;">123456 Dorten</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-1</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-0</span></p><p><span style="color:#1f497d;">F: +49 (0) 12345/1234560-2</span></p><p><a href="mailto:annad@example.com">annad@example.com</a><span style="color:#c00000;"></span></p><p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a><span style="color:#1f497d;"> </span><a href="http://www.ABC.com/" rel="nofollow noreferrer noopener" target="_blank">www.ABC.com</a><span style="color:#1f497d;"></span></p><p><span style="color:#1f497d;">Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe</span></p></div></div>
  1173. TEXT
  1174. end
  1175. it 'handles sample input 6' do
  1176. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1177. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1178. <div>
  1179. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1180. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">Von:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1181. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1182. <b>An:</b> 'Amaia Epalza'<br>
  1183. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017<o:p></o:p></span></p>
  1184. </div>
  1185. </div>
  1186. <p class="MsoNormal"><o:p>&nbsp;</o:p></p>
  1187. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am
  1188. </span></b><o:p></o:p></p>
  1189. <p class="MsoNormal"><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">&nbsp;</span><o:p></o:p></p>
  1190. <p class="MsoNormal">Guten Morgen Frau Epalza,<o:p></o:p></p>
  1191. HTML
  1192. <p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><div>
  1193. <div>
  1194. <span class="js-signatureMarker"></span><p><b>Von:</b><span> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1195. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1196. <b>An:</b> 'Amaia Epalza'<br>
  1197. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017</span></p></div></div><p>&nbsp;</p><p><b><span style="color:#1f497d;">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am </span></b></p><p><span style="color:#1f497d;"> </span></p><p>Guten Morgen Frau Epalza,</p>
  1198. TEXT
  1199. end
  1200. it 'handles sample input 7' do
  1201. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1202. <div class="">Wir brauchen also die Instanz <a href="http://example.zammad.com" class="">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div class=""><br class=""></div><div class="">Bitte um Freischaltung.</div><div class=""><br class=""></div><div class=""><br class=""><div class="">
  1203. HTML
  1204. <div>Wir brauchen also die Instanz <a href="http://example.zammad.com" rel="nofollow noreferrer noopener" target="_blank">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div>&nbsp;</div><div>Bitte um Freischaltung.</div><div>&nbsp;</div>
  1205. TEXT
  1206. end
  1207. it 'handles sample input 8' do
  1208. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1209. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US">oh jeee … Zauberwort vergessen ;-) Können Sie mir
  1210. <b>bitte</b> noch meine Testphase verlängern?<o:p></o:p></span></p>
  1211. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US"><o:p>&nbsp;</o:p></span></p>
  1212. HTML
  1213. <p><span style="color:#1f497d;">oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p>
  1214. TEXT
  1215. end
  1216. it 'handles sample input 9' do
  1217. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1218. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1219. HTML
  1220. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1221. TEXT
  1222. end
  1223. it 'handles sample input 10' do
  1224. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1225. <tr style="height: 15pt;" class=""><td width="170" nowrap="" valign="bottom" style="width: 127.5pt; border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt; height: 15pt;" class=""><p class="MsoNormal" align="center" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;"><span style="" class="">&nbsp;</span></p></td><td width="58" nowrap="" valign="bottom" style="width: 43.5pt; padding: 0cm 5.4pt; height: 15pt;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="" class="">20-29</span></div></td><td width="47" nowrap="" valign="bottom" style="width: 35pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">200</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">-1</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">201</span></div></td><td width="107" nowrap="" valign="bottom" style="width: 80pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="85" nowrap="" valign="bottom" style="width: 64pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="101" nowrap="" valign="bottom" style="width: 76pt; border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><b class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">country</span></b><span style="font-size: 11pt; font-family: Calibri, sans-serif;" class=""></span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Target (gross)</span></div></td><td width="123" nowrap="" valign="bottom" style="width: 92pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Remaining Recruits</span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Total Recruits</span></div></td></tr>
  1226. HTML
  1227. <tr>
  1228. <td valign="bottom" style=" border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt;"><p>&nbsp;</p></td>
  1229. <td valign="bottom" style=" padding: 0cm 5.4pt;"><div>20-29</div></td>
  1230. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">200</span></div></td>
  1231. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">-1</span></div></td>
  1232. <td valign="bottom" style=" border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">201</span></div></td>
  1233. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1234. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1235. <td valign="bottom" style=" border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>
  1236. <b>country</b>
  1237. </div></td>
  1238. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Target (gross)</div></td>
  1239. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Remaining Recruits</div></td>
  1240. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Total Recruits</div></td>
  1241. </tr>
  1242. TEXT
  1243. end
  1244. it 'handles sample input 11' do
  1245. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1246. <div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div>Dear Bob<span style="line-height: 23.8px;">:</span><span style="color: rgb(255, 255, 255); line-height: 1.7;">Mr/Mrs</span></div><div><br></div><div><span style="line-height: 1.7;">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src="cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com" orgwidth="1101" orgheight="637" data-image="1" style="width: 722.7px; height: 418px; border: none;"></div><div>Best regards!</div><div><br></div><div><b style="line-height: 1.7;"><i><u><span lang="EL" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style="position:relative;zoom:1"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div><div style="clear:both"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>
  1247. HTML
  1248. <div>\n<div>Dear Bob:<span style="color: rgb(255, 255, 255);">Mr/Mrs</span>
  1249. </div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src="cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com" style="width: 722.7px; height: 418px;"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u><span lang="EL" style="color:#17365d;">Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</span></u></i></b></div><div>\n<div>Bob Smith</div><div>\n<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div></div></div>
  1250. TEXT
  1251. end
  1252. it 'handles sample input 12' do
  1253. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1254. <li><a style="font-size:15px; font-family:Arial;color:#0f7246" class="text_link" href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh"><span style="color: rgb(0, 0, 0);">Luxemburg</span></a></li>
  1255. HTML
  1256. <li><a href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh" rel="nofollow noreferrer noopener" target="_blank" title="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh">Luxemburg</a></li>
  1257. TEXT
  1258. end
  1259. # https://github.com/zammad/zammad/issues/4112
  1260. it 'converts lists from MS Outlook correctly' do
  1261. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1262. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1263. <![if !supportLists]><span style="font-family:Symbol;mso-fareast-language:EN-US"><span style="mso-list:Ignore">·<span style="font:7.0pt &quot;Times New Roman&quot;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1264. </span></span></span><![endif]><span style="mso-fareast-language:EN-US">1<o:p></o:p></span></p>
  1265. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1266. <![if !supportLists]><span style="font-family:Symbol;mso-fareast-language:EN-US"><span style="mso-list:Ignore">·<span style="font:7.0pt &quot;Times New Roman&quot;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1267. </span></span></span><![endif]><span style="mso-fareast-language:EN-US">2<o:p></o:p></span></p>
  1268. HTML
  1269. <p>• 1</p><p>• 2</p>
  1270. TEXT
  1271. end
  1272. # https://github.com/zammad/zammad/issues/4184
  1273. it 'deletes downlevel revealed conditional comments' do
  1274. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1275. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1276. <![if !vml]><img width="52" height="52" src="cid:image001.png@01D8AC0A.08251CD0" v:shapes="Picture_x0020_1"><![endif]>
  1277. <span style="font-size:9.0pt;font-family:&quot;Arial&quot;,sans-serif;color:black;mso-fareast-language:EN-AU">dummy1</span>
  1278. <![if !vml]><img width="52" height="52" src="cid:image002.png@01D8AC0A.08251CD1" v:shapes="Picture_x0020_2"><![endif]>
  1279. <span style="font-size:9.0pt;font-family:&quot;Arial&quot;,sans-serif;color:black;mso-fareast-language:EN-AU">dummy2</span>
  1280. </p>
  1281. HTML
  1282. <p>
  1283. <img src="cid:image001.png@01D8AC0A.08251CD0" style="width:52px;height:52px;"> dummy1 <img src="cid:image002.png@01D8AC0A.08251CD1" style="width:52px;height:52px;"> dummy2 </p>
  1284. TEXT
  1285. end
  1286. end
  1287. context 'signature recognition' do
  1288. let(:marker) { '<span class="js-signatureMarker"></span>' }
  1289. it 'places marker before "--" line (surrounded by <br>)' do
  1290. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1291. lalala<br>--<br>Max Mix
  1292. HTML
  1293. lalala#{marker}<br>--<br>Max Mix
  1294. TEXT
  1295. end
  1296. it 'places marker before "--" line (surrounded by <br/>)' do
  1297. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1298. lalala<br/>--<br/>Max Mix
  1299. HTML
  1300. lalala#{marker}<br>--<br>Max Mix
  1301. TEXT
  1302. end
  1303. it 'places marker before "--" line (preceded by <br/>\n)' do
  1304. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1305. lalala<br/>
  1306. --<br/>Max Mix
  1307. HTML
  1308. lalala#{marker}<br> --<br>Max Mix
  1309. TEXT
  1310. end
  1311. it 'places marker before "--" line (surrounded by <p>)' do
  1312. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1313. lalala<p>--</p>Max Mix
  1314. HTML
  1315. lalala#{marker}<p>--</p>Max Mix
  1316. TEXT
  1317. end
  1318. it 'places marker before "__" line (surrounded by <br>)' do
  1319. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1320. lalala<br>__<br>Max Mix
  1321. HTML
  1322. lalala#{marker}<br>__<br>Max Mix
  1323. TEXT
  1324. end
  1325. it 'places marker before quoted reply’s "Von:" header (in German)' do
  1326. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1327. den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1328. HTML
  1329. den.<br>#{marker}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1330. TEXT
  1331. end
  1332. it 'places marker before quoted reply’s "Von:" header (as <p> with stripped parent <div>)' do
  1333. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1334. <div><div style="border:none;border-top:solid #e1e1e1 1.0pt;padding:3.0pt 0cm 0cm 0cm"><p class="MsoNormal"><b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif">Von:</span></b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b>\u0020
  1335. HTML
  1336. <div>#{marker}<p><b><span lang="DE">Von:</span></b><span lang="DE"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b> </span></p></div>
  1337. TEXT
  1338. end
  1339. it 'places marker before quoted reply’s "Von:" header (as <p> with parent <div>)' do
  1340. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1341. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1342. <p class="MsoNormal" style="margin-left:35.4pt"><b><span style="font-family:Calibri;color:black">Von:
  1343. </span></b><span style="font-family:Calibri;color:black">Johanna Kiefer via Zammad Projects &lt;projects@example.com&gt;<br>
  1344. <b>Organisation: </b>Zammad GmbH<br>
  1345. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br>
  1346. HTML
  1347. <div>
  1348. #{marker}<p><b>Von: </b><span>Johanna Kiefer via Zammad Projects &lt;projects@example.com&gt;<br>
  1349. <b>Organisation: </b>Zammad GmbH<br>
  1350. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br></span></p></div>
  1351. TEXT
  1352. end
  1353. it 'places marker before quoted reply’s "Von:" header (as <div>)' do
  1354. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1355. <div><br>
  1356. <br>
  1357. <br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
  1358. &nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
  1359. <br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
  1360. &nbsp;</font></div>
  1361. HTML
  1362. #{marker}<div><br>Von: Hotel &lt;info@example.com&gt; <br>An: </div>
  1363. TEXT
  1364. end
  1365. it 'places marker before English quoted text intro (as <blockquote>)' do
  1366. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1367. <br class=""><div><blockquote type="cite" class=""><div class="">On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com" class="">oliver@example.com</a>&gt; wrote:</div><br class="Apple-interchange-newline">
  1368. HTML
  1369. <div>#{marker}<blockquote type="cite">
  1370. <div>On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com">oliver@example.com</a>&gt; wrote:</div><br>
  1371. </blockquote></div>
  1372. TEXT
  1373. end
  1374. it 'does not place marker if blockquote doesn’t contain a quoted text intro' do
  1375. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1376. <br class=""><div><blockquote type="cite" class=""><div class="">some note</div><br class="Apple-interchange-newline">
  1377. HTML
  1378. <div><blockquote type="cite">
  1379. <div>some note</div><br>
  1380. </blockquote></div>
  1381. TEXT
  1382. end
  1383. it 'does not place marker if quoted text intro isn’t followed by a <blockquote>' do
  1384. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1385. <div>
  1386. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1387. <br>
  1388. </div>
  1389. HTML
  1390. <div>
  1391. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1392. <br>
  1393. </div>
  1394. TEXT
  1395. end
  1396. it 'places marker before German quoted text intro (before <blockquote>)' do
  1397. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1398. <div>
  1399. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1400. <br>
  1401. </div>
  1402. <blockquote type="cite">
  1403. <div>Dear Mr. Smith,<br></div>
  1404. </blockquote>
  1405. HTML
  1406. #{marker}<div>
  1407. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1408. <br>
  1409. </div><blockquote type="cite">
  1410. <div>Dear Mr. Smith,<br>
  1411. </div></blockquote>
  1412. TEXT
  1413. end
  1414. end
  1415. end
  1416. describe '#signature_identify' do
  1417. let(:marker) { '######SIGNATURE_MARKER######' }
  1418. context 'with no signature present' do
  1419. it 'leaves string as-is' do
  1420. expect((+'foo').signature_identify('text', true)).to eq('foo')
  1421. end
  1422. end
  1423. context 'with signature present' do
  1424. it 'places marker at start of "--" line' do
  1425. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1426. foo
  1427. --
  1428. bar
  1429. SRC
  1430. foo
  1431. #{marker}--
  1432. bar
  1433. MARKED
  1434. end
  1435. it 'places marker before English quoted text intro' do
  1436. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1437. On 01/04/15 10:55, Bob Smith wrote:
  1438. SRC
  1439. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1440. MARKED
  1441. end
  1442. it 'places marker before German quoted text intro' do
  1443. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1444. Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@zammad.ink>:
  1445. SRC
  1446. #{marker}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@zammad.ink>:
  1447. MARKED
  1448. end
  1449. it 'ignores trailing empty line' do
  1450. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1451. test 123
  1452. test 123
  1453. --
  1454. Bob Smith
  1455. SRC
  1456. test 123
  1457. test 123
  1458. #{marker}--
  1459. Bob Smith
  1460. MARKED
  1461. end
  1462. it 'ignores trailing double empty lines' do
  1463. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1464. test 123
  1465. test 123
  1466. --
  1467. Bob Smith
  1468. SRC
  1469. test 123
  1470. test 123
  1471. #{marker}--
  1472. Bob Smith
  1473. MARKED
  1474. end
  1475. it 'ignores leading/trailing empty lines' do
  1476. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1477. test 123\u0020
  1478. 1
  1479. 2
  1480. 3
  1481. 4
  1482. 5
  1483. 6
  1484. 7
  1485. 8
  1486. 9
  1487. --
  1488. Bob Smith
  1489. SRC
  1490. test 123\u0020
  1491. 1
  1492. 2
  1493. 3
  1494. 4
  1495. 5
  1496. 6
  1497. 7
  1498. 8
  1499. 9
  1500. #{marker}--
  1501. Bob Smith
  1502. MARKED
  1503. end
  1504. it 'ignores lines starting with "--" but containing more text' do
  1505. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1506. test 123\u0020
  1507. --no not match--
  1508. --
  1509. Bob Smith
  1510. SRC
  1511. test 123\u0020
  1512. --no not match--
  1513. #{marker}--
  1514. Bob Smith
  1515. MARKED
  1516. end
  1517. it 'places marker at start of " -- " line' do
  1518. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1519. test 123\u0020
  1520. --no not match--
  1521. --\u0020
  1522. Bob Smith
  1523. SRC
  1524. test 123\u0020
  1525. --no not match--
  1526. #{marker} --\u0020
  1527. Bob Smith
  1528. MARKED
  1529. end
  1530. it 'places marker on empty line if possible / only places one marker' do
  1531. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1532. test 123\u0020
  1533. --
  1534. Bob Smith
  1535. --
  1536. Bob Smith
  1537. SRC
  1538. test 123\u0020
  1539. #{marker}
  1540. --
  1541. Bob Smith
  1542. --
  1543. Bob Smith
  1544. MARKED
  1545. end
  1546. context 'for Apple email quote text' do
  1547. context 'in English' do
  1548. it 'places two markers, one before quoted text intro and one at start of "--" line' do
  1549. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1550. test 123\u0020
  1551. --no not match--
  1552. Bob Smith
  1553. On 01/04/15 10:55, Bob Smith wrote:
  1554. lalala
  1555. --
  1556. some test
  1557. SRC
  1558. test 123\u0020
  1559. --no not match--
  1560. Bob Smith
  1561. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1562. lalala
  1563. #{marker}--
  1564. some test
  1565. MARKED
  1566. end
  1567. end
  1568. context 'auf Deutsch' do
  1569. it 'places marker before quoted text intro' do
  1570. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1571. test 123\u0020
  1572. --no not match--
  1573. Bob Smith
  1574. Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1575. lalala
  1576. SRC
  1577. test 123\u0020
  1578. --no not match--
  1579. Bob Smith
  1580. #{marker}Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1581. lalala
  1582. MARKED
  1583. end
  1584. end
  1585. end
  1586. context 'for MS email quote text' do
  1587. context 'in English' do
  1588. it 'places marker before quoted text intro' do
  1589. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1590. test 123test 123\u0020
  1591. --no not match--
  1592. Bob Smith
  1593. From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1594. Sent: Donnerstag, 2. April 2015 10:00
  1595. lalala</div>
  1596. SRC
  1597. test 123test 123\u0020
  1598. --no not match--
  1599. Bob Smith
  1600. #{marker}From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1601. Sent: Donnerstag, 2. April 2015 10:00
  1602. lalala</div>
  1603. MARKED
  1604. end
  1605. end
  1606. context 'auf Deutsch' do
  1607. it 'places marker before quoted text intro' do
  1608. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1609. test 123\u0020
  1610. --no not match--
  1611. Bob Smith
  1612. Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1613. Gesendet: Donnerstag, 2. April 2015 10:00
  1614. Betreff: lalala
  1615. SRC
  1616. test 123\u0020
  1617. --no not match--
  1618. Bob Smith
  1619. #{marker}Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1620. Gesendet: Donnerstag, 2. April 2015 10:00
  1621. Betreff: lalala
  1622. MARKED
  1623. end
  1624. end
  1625. context 'en francais' do
  1626. it 'places marker before quoted text intro' do
  1627. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1628. test 123\u0020
  1629. --no not match--
  1630. Bob Smith
  1631. De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1632. Envoyé : mercredi 29 avril 2015 17:31
  1633. Objet : lalala
  1634. SRC
  1635. test 123\u0020
  1636. --no not match--
  1637. Bob Smith
  1638. #{marker}De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1639. Envoyé : mercredi 29 avril 2015 17:31
  1640. Objet : lalala
  1641. MARKED
  1642. end
  1643. end
  1644. end
  1645. end
  1646. end
  1647. describe '#utf8_encode' do
  1648. context 'on valid, UTF-8-encoded strings' do
  1649. subject(:string) { 'hello' }
  1650. it 'returns an identical copy' do
  1651. expect(string.utf8_encode).to eq(string)
  1652. expect(string.utf8_encode.encoding).to be(string.encoding)
  1653. expect(string.utf8_encode).not_to be(string)
  1654. end
  1655. context 'which are incorrectly set to other, technically valid encodings' do
  1656. subject(:string) { described_class.new('ö', encoding: 'tis-620') }
  1657. it 'sets input encoding to UTF-8 instead of attempting conversion' do
  1658. expect(string.utf8_encode).to eq(string.dup.force_encoding('utf-8'))
  1659. end
  1660. end
  1661. end
  1662. context 'on strings in other encodings' do
  1663. subject(:string) { original_string.encode(input_encoding) }
  1664. context 'with no from: option' do
  1665. let(:original_string) { 'Tschüss!' }
  1666. let(:input_encoding) { Encoding::ISO_8859_2 }
  1667. it 'detects the input encoding' do
  1668. expect(string.utf8_encode).to eq(original_string)
  1669. end
  1670. end
  1671. context 'with a valid from: option' do
  1672. let(:original_string) { 'Tschüss!' }
  1673. let(:input_encoding) { Encoding::ISO_8859_2 }
  1674. it 'uses the specified input encoding' do
  1675. expect(string.utf8_encode(from: 'iso-8859-2')).to eq(original_string)
  1676. end
  1677. it 'uses any valid input encoding, even if not correct' do
  1678. expect(string.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!')
  1679. end
  1680. end
  1681. context 'with an invalid from: option' do
  1682. let(:original_string) { '―陈志' }
  1683. let(:input_encoding) { Encoding::GB18030 }
  1684. it 'does not try it' do
  1685. expect { string.encode('utf-8', 'gb2312') }
  1686. .to raise_error(Encoding::InvalidByteSequenceError)
  1687. expect { string.utf8_encode(from: 'gb2312') }
  1688. .not_to raise_error
  1689. end
  1690. it 'uses the detected input encoding instead' do
  1691. expect(string.utf8_encode(from: 'gb2312')).to eq(original_string)
  1692. end
  1693. end
  1694. end
  1695. context 'performance' do
  1696. subject(:string) { original_string.encode(input_encoding) }
  1697. context 'with utf8_encode in iso-8859-1' do
  1698. let(:original_string) { 'äöü0' * 999_999 }
  1699. let(:input_encoding) { Encoding::ISO_8859_1 }
  1700. it 'detects the input encoding' do
  1701. Timeout.timeout(1) do
  1702. expect(string.utf8_encode(from: 'iso-8859-1')).to eq(original_string)
  1703. end
  1704. end
  1705. end
  1706. context 'with utf8_encode in utf-8' do
  1707. let(:original_string) { 'äöü0' * 999_999 }
  1708. let(:input_encoding) { Encoding::UTF_8 }
  1709. it 'detects the input encoding' do
  1710. Timeout.timeout(1) do
  1711. expect(string.utf8_encode(from: 'utf-8')).to eq(original_string)
  1712. end
  1713. end
  1714. end
  1715. context 'with utf8_encode in iso-8859-1 and charset detection' do
  1716. let(:original_string) { 'äöü0' * 199_999 }
  1717. let(:input_encoding) { Encoding::ISO_8859_1 }
  1718. it 'detects the input encoding' do
  1719. Timeout.timeout(18) do
  1720. expect(string.utf8_encode(from: 'utf-8')).to eq(original_string)
  1721. end
  1722. end
  1723. end
  1724. end
  1725. end
  1726. end