string_spec.rb 94 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005
  1. # Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/
  2. # frozen_string_literal: true
  3. require 'rails_helper'
  4. RSpec.describe String do
  5. describe '#strip' do
  6. context 'default behavior' do
  7. it 'removes leading/trailing spaces' do
  8. expect(' test '.strip).to eq('test')
  9. end
  10. it 'removes trailing newlines' do
  11. expect("test\n".strip).to eq('test')
  12. end
  13. it 'does not remove internal spaces / newlines' do
  14. expect("test \n test".strip).to eq("test \n test")
  15. end
  16. end
  17. context 'monkey-patched behavior' do
  18. it 'removes leading/trailing zero-width spaces, but not internal ones' do
  19. expect(" \r\n test \u{200B} \n test\u{200B} \u{200B}".strip)
  20. .to eq("test \u{200B} \n test")
  21. end
  22. it 'does not break on non-unicode strings' do
  23. expect(described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT').strip)
  24. .to eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  25. end
  26. end
  27. end
  28. describe '#strip!' do
  29. context 'default behavior' do
  30. it 'removes leading/trailing spaces (in place)' do
  31. str = +' test '
  32. expect(str.strip!).to be(str).and eq('test')
  33. end
  34. it 'removes trailing newlines (in place)' do
  35. str = +"test\n"
  36. expect(str.strip!).to be(str).and eq('test')
  37. end
  38. it 'does not remove internal spaces / newlines (in place)' do
  39. str = +"test \n test "
  40. expect(str.strip!).to be(str).and eq(str)
  41. end
  42. end
  43. context 'monkey-patched behavior' do
  44. it 'removes leading/trailing zero-width spaces, but not internal ones (in place)' do
  45. str = +" \r\n test \u{200B} \n test\u{200B} \u{200B}"
  46. expect(str.strip!).to be(str).and eq("test \u{200B} \n test")
  47. end
  48. it 'does not break on invalid-unicode strings (in place)' do
  49. str = described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT')
  50. expect(str.strip!)
  51. .to be(str).and eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  52. end
  53. end
  54. end
  55. describe '#to_filename' do
  56. it 'does not modify strings in place' do
  57. %w[test Some::File].each do |str|
  58. expect { str.to_filename }.not_to change { str }
  59. end
  60. end
  61. it 'leaves all-downcase strings as-is' do
  62. expect('test'.to_filename).to eq('test')
  63. end
  64. it 'converts camelcase Ruby constant paths to snakecase file paths' do
  65. expect('Some::File'.to_filename).to eq('some/file')
  66. end
  67. end
  68. describe '#to_classname' do
  69. it 'does not modify strings in place' do
  70. %w[test some/file].each do |str|
  71. expect { str.to_classname }.not_to change { str }
  72. end
  73. end
  74. it 'capitalizes all-downcase strings' do
  75. expect('test'.to_classname).to eq('Test')
  76. end
  77. it 'converts snakecase file paths to camelcase Ruby constant paths' do
  78. expect('some/file'.to_classname).to eq('Some::File')
  79. end
  80. context 'unlike ActiveSupport’s #classify' do
  81. it 'preserves pluralized names' do
  82. expect('some/files'.to_classname).to eq('Some::Files')
  83. expect('some_test/files'.to_classname).to eq('SomeTest::Files')
  84. end
  85. end
  86. end
  87. describe '#html2text' do
  88. it 'does not modify strings in place' do
  89. %w[test <div>test</div>].each do |str|
  90. expect { str.html2text }.not_to change { str }
  91. end
  92. end
  93. it 'leaves human-readable text as-is' do
  94. expect('test'.html2text).to eq('test')
  95. end
  96. it 'strips leading/trailing spaces' do
  97. expect(' test '.html2text).to eq('test')
  98. end
  99. it 'also strips leading/trailing newlines' do
  100. expect("\n\n test \n\n\n".html2text).to eq('test')
  101. end
  102. it 'strips HTML tags around text content' do
  103. expect('<div>test</div>'.html2text).to eq('test')
  104. end
  105. it 'strips trailing <br> inside last <div>' do
  106. expect('<div>test<br></div>'.html2text).to eq('test')
  107. end
  108. it 'strips trailing <br> and newlines inside last <div>' do
  109. expect("<div>test<br><br><br>\n<br>\n<br>\n</div>".html2text).to eq('test')
  110. end
  111. it 'strips trailing <br>, newlines, and spaces inside last <div>' do
  112. expect("<div>test<br><br> <br> \n<br> \n<br> \n</div>".html2text).to eq('test')
  113. end
  114. it 'strips trailing <br>, newlines, and &nbsp; inside last <div>' do
  115. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>".html2text).to eq('test')
  116. end
  117. it 'strips trailing whitespace (including &nbsp; & <br>) both inside and after last tag' do
  118. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>&nbsp;".html2text).to eq('test')
  119. end
  120. it 'also strips nested HTML tags' do
  121. expect("<p><span>Was\nsoll verbessert werden:</span></p>".html2text)
  122. .to eq('Was soll verbessert werden:')
  123. end
  124. it 'in <pre> elements, collapses multiple newlines into one' do
  125. expect("<pre>test\n\ntest</pre>".html2text).to eq("test\ntest")
  126. end
  127. it 'in <code> elements, collapses multiple newlines into one' do
  128. expect("<code>test\n\ntest</code>".html2text).to eq("test\ntest")
  129. end
  130. it 'converts <table> cells and row to space-separated lines' do
  131. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  132. <table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>
  133. HTML
  134. test col
  135. test 4711
  136. TEXT
  137. end
  138. it 'strips HTML comments' do
  139. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  140. <!-- some comment -->
  141. <div>
  142. test<br><br><br>
  143. <br>
  144. <br>
  145. </div>
  146. HTML
  147. test
  148. TEXT
  149. end
  150. it 'converts <a> elements to plain text with numerical references' do
  151. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  152. <div><a href="https://zammad.org">Best Tool of the World</a>
  153. some other text</div>
  154. <div>
  155. HTML
  156. [1] Best Tool of the Worldsome other text
  157. [1] https://zammad.org
  158. TEXT
  159. end
  160. it 'converts <hr> elements to separate paragraphs containing only "___"' do
  161. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  162. <!-- some comment -->
  163. <div>
  164. test<br><br><br>
  165. <hr/>
  166. <br>
  167. </div>
  168. HTML
  169. test
  170. ___
  171. TEXT
  172. end
  173. it 'converts <br> elements to newlines (max. 2)' do
  174. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  175. test<br><br><br>--<br>abc</div>
  176. HTML
  177. test
  178. --
  179. abc
  180. TEXT
  181. end
  182. it 'strips Microsoft Outlook conditional comments' do
  183. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  184. Ihr RZ-Team<br />
  185. <br />
  186. <!--[if gte mso 9]><xml> <o:DocumentProperties> <o:Author>test</o:Author> =
  187. <o:Template>A75DB76E.dotm</o:Template> <o:LastAuthor>test</o:LastAuthor> =
  188. <o:Revision>5</o:Revision> <o:Created>2011-05-18T07:08:00Z</o:Created> <=
  189. o:LastSaved>2011-07-04T17:59:00Z</o:LastSaved> <o:Pages>1</o:Pages> <o:Wo=
  190. rds>189</o:Words> <o:Characters>1192</o:Characters> <o:Lines>9</o:Lines> =
  191. <o:Paragraphs>2</o:Paragraphs> <o:CharactersWithSpaces>1379</o:Characters=
  192. WithSpaces> <o:Version>11.5606</o:Version> </o:DocumentProperties></xml><!=
  193. [endif]-->
  194. HTML
  195. Ihr RZ-Team
  196. TEXT
  197. end
  198. it 'strips <img> elements' do
  199. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  200. <html>
  201. <head>
  202. <title>Neues Fax von 1234-93900</title>
  203. </head>
  204. <body style="margin: 0px;padding: 0px;font-family: Arial, sans-serif;font-size: 12px;">
  205. <table cellpadding="0" cellspacing="0" width="100%" height="100%" bgcolor="#d9e7f0" id="mailbg"
  206. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 100%;height: 100%;background-color: #d9e7f0;padding: 0px;margin: 0px;">
  207. <tr>
  208. <td valign="top">
  209. <center>
  210. <br><br>
  211. <table width="560" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF" id="mailcontainer"
  212. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 560px;margin: 0px auto;padding: 0px;background-color: #FFFFFF;">
  213. <tr>
  214. <td colspan="3" width="560" id="mail_header" valign="top" style="width: 560px;background-color: #FFFFFF;font-family: Arial, sans-serif;color: #000000;padding: 0px;margin: 0px;">
  215. <table width="560" cellpadding="0" cellspacing="0" style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;">
  216. <tr>
  217. <td height="10" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;height:10px;">
  218. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_header.gif" style="padding: 0px;margin: 0px;">
  219. </td>
  220. </tr>
  221. <tr>
  222. <td height="12" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  223. </tr>
  224. <tr>
  225. <td height="27" width="30"> </td>
  226. <td height="27" width="397"><span class="mailtitle" style="font-family: Arial, sans-serif;color: #000000;font-size: 18px;line-height: 18px;font-weight: normal;">Neues Fax</span></td>
  227. <td height="27" width="103"><img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_logo-example.gif" style="padding: 0px;margin: 0px;"></td>
  228. <td height="27" width="30"></td>
  229. </tr>
  230. <tr>
  231. <td height="20" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  232. </tr>
  233. <tr>
  234. <td height="1" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;">
  235. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_line-grey.gif" style="padding: 0px;margin: 0px;">
  236. </td>
  237. </tr>
  238. </table>
  239. </td>
  240. </tr>
  241. <tr>
  242. <td colspan="3" width="560"> </td>
  243. </tr>
  244. <tr>
  245. <td width="30"> </td>
  246. <td width="500" height="30" valign="middle" align="right">
  247. <span class="accountno" style="font-family: Arial, sans-serif;font-size: 10px;color: #666666;">Ihre Kundennummer: 12345678</span>
  248. </td>
  249. <td width="30"> </td>
  250. </tr>
  251. HTML
  252. Neues Fax von 1234-93900
  253. Neues Fax
  254. Ihre Kundennummer: 12345678
  255. TEXT
  256. end
  257. it 'converts characters written in HTML ampersand code' do
  258. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  259. line&nbsp;1<br>
  260. you<br/>
  261. -----&amp;
  262. HTML
  263. line\u00A01
  264. you
  265. -----&
  266. TEXT
  267. end
  268. it 'converts <ul> to asterisk-demarcated list' do
  269. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  270. \u0020<ul><li>#1</li><li>#2</li></ul>
  271. HTML
  272. * #1
  273. * #2
  274. TEXT
  275. end
  276. it 'strips HTML frontmatter and <head> element' do
  277. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  278. <!DOCTYPE html>
  279. <html>
  280. <head>
  281. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  282. <head>
  283. <body style="font-family:Geneva,Helvetica,Arial,sans-serif; font-size: 12px;">
  284. <div>&gt; Welcome!</div><div>&gt;</div><div>&gt; Thank you for installing Zammad.</div><div>&gt;</div>
  285. </body>
  286. </html>
  287. HTML
  288. > Welcome!
  289. >
  290. > Thank you for installing Zammad.
  291. >
  292. TEXT
  293. end
  294. it 'strips <style> elements' do
  295. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  296. \u0020 <style type="text/css">
  297. body {
  298. width:90% !important;
  299. -webkit-text-size-adjust:90%;
  300. -ms-text-size-adjust:90%;
  301. font-family:'helvetica neue', helvetica, arial, geneva, sans-serif; f=
  302. ont-size: 12px;;
  303. }
  304. img {
  305. outline:none; text-decoration:none; -ms-interpolation-mode: bicubic;
  306. }
  307. a img {
  308. border:none;
  309. }
  310. table td {
  311. border-collapse: collapse;
  312. }
  313. table {
  314. border-collapse: collapse; mso-table-lspace:0pt; mso-table-rspace:0pt;
  315. }
  316. p, table, div, td {
  317. max-width: 600px;
  318. }
  319. p {
  320. margin: 0;
  321. }
  322. blockquote, pre {
  323. margin: 0px;
  324. padding: 8px 12px 8px 12px;
  325. }
  326. </style><p>some other content</p>
  327. HTML
  328. some other content
  329. TEXT
  330. end
  331. it 'strips <meta> elements' do
  332. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  333. \u0020 IT-Infrastruktur</span><br>
  334. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  335. <meta name="Generator" content="Microsoft Word 14 (filtered
  336. medium)">
  337. <!--[if !mso]><style>v:* {behavior:url(#default#VML);}
  338. o:* {behavior:url(#default#VML);}
  339. w:* {behavior:url(#default#VML);}
  340. .shape {behavior:url(#default#VML);}
  341. </style><![endif]-->
  342. <style><!--
  343. @font-face
  344. {font-family:calibri;
  345. panose-1:2 15 5 2 2 2 4 3 2 4;}
  346. @font-face
  347. {font-family:tahoma;
  348. panose-1:2 11 6 4 3 5 4 4 2 4;}
  349. p.msonormal, li.msonormal, div.msonormal
  350. {margin:0cm;
  351. margin-bottom:.0001pt;
  352. font-size:11.0pt;
  353. font-family:"calibri","sans-serif";
  354. mso-fareast-language:en-us;}
  355. a:link, span.msohyperlink
  356. {mso-style-priority:99;
  357. color:blue;
  358. text-decoration:underline;}
  359. a:visited, span.msohyperlinkfollowed
  360. {mso-style-priority:99;
  361. color:purple;
  362. text-decoration:underline;}
  363. p.msoacetate, li.msoacetate, div.msoacetate
  364. {mso-style-priority:99;
  365. mso-style-link:"sprechblasentext zchn";
  366. margin:0cm;
  367. margin-bottom:.0001pt;
  368. font-size:8.0pt;
  369. font-family:"tahoma","sans-serif";
  370. mso-fareast-language:en-us;}
  371. span.e-mailformatvorlage17
  372. {mso-style-type:personal;
  373. font-family:"calibri","sans-serif";
  374. color:windowtext;}
  375. span.sprechblasentextzchn
  376. {mso-style-name:"sprechblasentext zchn";
  377. mso-style-priority:99;
  378. mso-style-link:sprechblasentext;
  379. font-family:"tahoma","sans-serif";}
  380. .msochpdefault
  381. {mso-style-type:export-only;
  382. font-family:"calibri","sans-serif";
  383. mso-fareast-language:en-us;}
  384. @page wordsection1
  385. {size:612.0pt 792.0pt;
  386. margin:70.85pt 70.85pt 2.0cm 70.85pt;}
  387. div.wordsection1
  388. {page:wordsection1;}
  389. --></style><!--[if gte mso 9]><xml>
  390. <o:shapedefaults v:ext="edit" spidmax="1026" />
  391. </xml><![endif]--><!--[if gte mso 9]><xml>
  392. <o:shapelayout v:ext="edit">
  393. <o:idmap v:ext="edit" data="1" />
  394. </o:shapelayout></xml><![endif]-->
  395. HTML
  396. IT-Infrastruktur
  397. TEXT
  398. end
  399. it 'separates block-level elements by one newline (<p> following a non-<p> block gets two)' do
  400. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  401. <h1>some head</h1>
  402. some content
  403. <blockquote>
  404. <p>line 1</p>
  405. <p>line 2</p>
  406. </blockquote>
  407. <p>some text later</p>
  408. HTML
  409. some head
  410. some content
  411. > line 1
  412. > line 2
  413. some text later
  414. TEXT
  415. end
  416. it 'formats <blockquote> contents with leading "> "' do
  417. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  418. <h1>some head</h1>
  419. some content
  420. <blockquote>
  421. line 1<br/>
  422. line 2<br>
  423. </blockquote>
  424. <p>some text later</p>
  425. HTML
  426. some head
  427. some content
  428. > line 1
  429. > line 2
  430. some text later
  431. TEXT
  432. end
  433. it 'adds max. 2 newlines between block-level <blockquote> contents' do
  434. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  435. <h1>some head</h1>
  436. some content
  437. <blockquote>
  438. <div><div>line 1</div><br></div>
  439. <div><div>line 2</div><br></div>
  440. </blockquote>
  441. some text later
  442. HTML
  443. some head
  444. some content
  445. > line 1
  446. >
  447. > line 2
  448. some text later
  449. TEXT
  450. end
  451. it 'places numerical <a> references at end of text string' do
  452. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  453. <p>Best regards,</p>
  454. <p><i>Your Team Team</i></p>
  455. <p>P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click
  456. <a href="http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx">here</a> to unsubscribe from further e-mails.</p>
  457. -----------------------------
  458. <br />
  459. HTML
  460. Best regards,
  461. Your Team Team
  462. P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click [1] here to unsubscribe from further e-mails.
  463. -----------------------------
  464. [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx
  465. TEXT
  466. end
  467. it 'handles elements with missing closing tags' do
  468. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  469. <div><br>Dave and leaned her
  470. days adam.</div><span style="color:#F7F3FF; font-size:8px">Maybe we
  471. want any help me that.<br>Next morning charlie saw at their
  472. father.<br>Well as though adam took out here. Melvin will be more money.\u0020
  473. Called him into this one last thing.<br>Men-----------------------
  474. <br />
  475. HTML
  476. Dave and leaned her days adam.
  477. Maybe we want any help me that.
  478. Next morning charlie saw at their father.
  479. Well as though adam took out here. Melvin will be more money. Called him into this one last thing.
  480. Men-----------------------
  481. TEXT
  482. end
  483. context 'html encoding' do
  484. it 'converts &Auml; in Ä' do
  485. expect('<div>test something.&Auml;</div>'.html2text)
  486. .to eq('test something.Ä')
  487. end
  488. it 'strips invalid html encoding chars' do
  489. expect('<div>test something.&#55357;</div>'.html2text)
  490. .to eq('test something.í ˝')
  491. end
  492. end
  493. context 'performance tests' do
  494. let(:filler) do
  495. %(#{%(<p>some word <a href="http://example.com?domain?example.com">some url</a> and the end.</p>\n) * 11}\n)
  496. end
  497. it 'converts a 1076-byte unicode file in under 2s' do
  498. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  499. <html>
  500. <title>some title</title>
  501. <body>
  502. <div>hello</div>
  503. #{filler}
  504. </body>
  505. </html>
  506. HTML
  507. end
  508. it 'converts a 2.21 MiB unicode file in under 2s' do
  509. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  510. <html>
  511. <title>some title</title>
  512. <body>
  513. <div>hello</div>
  514. #{filler * 2312}
  515. </body>
  516. </html>
  517. HTML
  518. end
  519. end
  520. end
  521. describe '#html2html_strict' do
  522. it 'leaves human-readable text as-is' do
  523. expect('test'.html2html_strict.first).to eq('test')
  524. end
  525. it 'strips leading/trailing spaces' do
  526. expect(' test '.html2html_strict.first).to eq('test')
  527. end
  528. it 'also strips leading/trailing newlines' do
  529. expect("\n\n test \n\n\n".html2html_strict.first).to eq('test')
  530. end
  531. it 'also strips leading <br>' do
  532. expect('<br><br><div>abc</div>'.html2html_strict.first).to eq('<div>abc</div>')
  533. end
  534. it 'also strips trailing <br> & spaces' do
  535. expect('<div>abc</div><br> <br>'.html2html_strict.first).to eq('<div>abc</div>')
  536. end
  537. it 'leaves <b> as-is' do
  538. expect('<b>test</b>'.html2html_strict.first).to eq('<b>test</b>')
  539. end
  540. it 'downcases tag names' do
  541. expect('<B>test</B>'.html2html_strict.first).to eq('<b>test</b>')
  542. end
  543. it 'leaves <i> as-is' do
  544. expect('<i>test</i>'.html2html_strict.first).to eq('<i>test</i>')
  545. end
  546. it 'leaves <h1> as-is' do
  547. expect('<h1>test</h1>'.html2html_strict.first).to eq('<h1>test</h1>')
  548. end
  549. it 'leaves <h2> as-is' do
  550. expect('<h2>test</h2>'.html2html_strict.first).to eq('<h2>test</h2>')
  551. end
  552. it 'leaves <h3> as-is' do
  553. expect('<h3>test</h3>'.html2html_strict.first).to eq('<h3>test</h3>')
  554. end
  555. it 'leaves <pre> as-is' do
  556. expect("<pre>a\nb\nc</pre>".html2html_strict.first).to eq("<pre>a\nb\nc</pre>")
  557. end
  558. it 'leaves <pre> nested inside <div> as-is' do
  559. expect("<div><pre>a\nb\nc</pre></div>".html2html_strict.first).to eq("<div><pre>a\nb\nc</pre></div>")
  560. end
  561. it 'strips HTML comments' do
  562. expect('<h3>test</h3><!-- some comment -->'.html2html_strict.first).to eq('<h3>test</h3>')
  563. end
  564. it 'strips <html>/<body> tags & <head> elements' do
  565. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  566. <html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hello Martin,<o:p></o:p></span></div>
  567. HTML
  568. <div lang="DE">Hello Martin,</div>
  569. TEXT
  570. end
  571. it 'strips <span> tags' do
  572. expect('<span></span>'.html2html_strict.first).to eq('')
  573. end
  574. it 'keeps style with color in <span>' do
  575. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  576. <span style="color: red; bgcolor: red">Hello Martin,</span>
  577. HTML
  578. <span style="color: red;">Hello Martin,</span>
  579. TEXT
  580. end
  581. it 'remove style=#ffffff with color in <span>' do
  582. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  583. <span style="color: #ffffff; bgcolor: red">Hello Martin,</span>
  584. HTML
  585. Hello Martin,
  586. TEXT
  587. end
  588. it 'strips <span> tags, id/class attrs, and <o:*> (MS Office) tags' do
  589. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  590. <div id="123" class="WordSection1">
  591. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau Koppenhagen,<o:p></o:p></span></p>
  592. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  593. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  594. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  595. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  596. <o:p></o:p></span></p>
  597. <div>
  598. HTML
  599. <div>
  600. <p><span style="color:#1f497d;">Guten Morgen, Frau Koppenhagen,</span></p><p><span style="color:#1f497d;"> </span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"> </span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p></div>
  601. TEXT
  602. end
  603. it 'strips <font> tags' do
  604. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  605. <p><font size="2"><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></font></p>
  606. HTML
  607. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  608. TEXT
  609. end
  610. it 'strips extraneous whitespace from end of opening tag' do
  611. expect('<b >test</b>'.html2html_strict.first).to eq('<b>test</b>')
  612. end
  613. it 'strips extraneous whitespace from closing tag' do
  614. expect('<b >test</b >'.html2html_strict.first).to eq('<b>test</b>')
  615. end
  616. it 'does not detect < /b > as closing tag; converts chars and auto-closes tag' do
  617. expect('<b >test< /b >'.html2html_strict.first).to eq('<b>test&lt; /b &gt;</b>')
  618. end
  619. it 'does not detect <\n/b> as closing tag; converts chars and auto-closes tag' do
  620. expect("<b\n>test<\n/b>".html2html_strict.first).to eq('<b>test&lt; /b&gt;</b>')
  621. end
  622. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  623. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  624. <p> </p><p> </p><p> </p>
  625. HTML
  626. <p>&nbsp;</p>
  627. TEXT
  628. end
  629. it 'keeps lang attr on <p>' do
  630. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  631. <p lang="DE"><b><span></span></b></p>
  632. HTML
  633. <p lang="DE"></p>
  634. TEXT
  635. end
  636. it 'strips <span> inside <p>' do
  637. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  638. <p lang="DE"><b><span>Hello Martin,</span></b></p>
  639. HTML
  640. <p lang="DE"><b>Hello Martin,</b></p>
  641. TEXT
  642. end
  643. it 'strips empty <p> keep <p>s with content' do
  644. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  645. <p> </p><p>123</p><p></p>
  646. HTML
  647. <p>&nbsp;</p><p>123</p>
  648. TEXT
  649. end
  650. it 'strips <br> between <p>' do
  651. expect('<p>&nbsp;</p><br><br><p>&nbsp;</p>'.html2html_strict.first).to eq('<p>&nbsp;</p><p>&nbsp;</p>')
  652. end
  653. it 'auto-adds missing closing brackets on tags, but not opening brackets' do
  654. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  655. <b id=123 classs="
  656. some_class"
  657. >test<
  658. /b>
  659. HTML
  660. <b>test&lt; /b&gt;</b>
  661. TEXT
  662. end
  663. it 'auto-adds missing closing tags' do
  664. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  665. <ul id=123 classs="
  666. some_class"
  667. ><li>test</li>
  668. <li class="asasd">test</li><
  669. /ul>
  670. HTML
  671. <ul>
  672. <li>test</li>
  673. <li>test</li>&lt; /ul&gt;</ul>
  674. TEXT
  675. end
  676. it 'auto-closes <div> with missing closing tag; removes </p> with missing opening tag' do
  677. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  678. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" class="">Link des Adventkalenders</a> in<br class="">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class="">&nbsp;
  679. HTML
  680. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" rel="nofollow noreferrer noopener" target="_blank" title="http://newsletters.cylex.de/">Link des Adventkalenders</a> in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>
  681. TEXT
  682. end
  683. it 'intelligently inserts missing </td> & </tr> tags (and ignores misplaced </table> tags)' do
  684. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  685. <table>
  686. <tr>
  687. <td bgcolor=white><font size=2 face="sans-serif"><b>Franz Schäfer</b></font>
  688. <tr>
  689. <td bgcolor=white><font size=2 face="sans-serif">Manager Information Systems</font></table>
  690. <br>
  691. <table>
  692. <tr>
  693. <td bgcolor=white><font size=2 face="sans-serif">Telefon &nbsp;</font>
  694. <td bgcolor=white><font size=2 face="sans-serif">+49 000 000 8565</font>
  695. <tr>
  696. <td colspan=2 bgcolor=white><font size=2 face="sans-serif">christian.schaefer@example.com</font></table>
  697. <br>
  698. <table>
  699. HTML
  700. <table>
  701. <tr>
  702. <td>
  703. <b>Franz Schäfer</b>
  704. </td>
  705. </tr>
  706. <tr>
  707. <td>Manager Information Systems</td>
  708. </tr>
  709. </table>
  710. <br>
  711. <table>
  712. <tr>
  713. <td> Telefon </td>
  714. <td> +49 000 000 8565 </td>
  715. </tr>
  716. <tr>
  717. <td colspan="2">christian.schaefer@example.com</td>
  718. </tr>
  719. </table>
  720. TEXT
  721. end
  722. it 'ignores invalid (misspelled) attrs' do
  723. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  724. <b id=123 classs="
  725. some_class">test</b>
  726. HTML
  727. <b>test</b>
  728. TEXT
  729. end
  730. it 'strips incomplete CSS rules' do
  731. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  732. <p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>
  733. HTML
  734. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  735. TEXT
  736. end
  737. context 'for whitespace-only <div>' do
  738. it 'preserves a single space' do
  739. expect('<div> </div>'.html2html_strict.first).to eq('<div> </div>')
  740. end
  741. it 'converts a lone <br> to &nbsp;' do
  742. expect('<div><br></div>'.html2html_strict.first).to eq('<div>&nbsp;</div>')
  743. end
  744. it 'converts three <br> to one &nbsp;' do
  745. expect('<div style="max-width: 600px;"><br><br><br></div>'.html2html_strict.first).to eq('<div>&nbsp;</div>')
  746. end
  747. it 'collapses two nested, whitespace-only <div> into a single &nbsp;' do
  748. expect('<div><div> </div><div> </div></div>'.html2html_strict.first).to eq('<div>&nbsp;</div>')
  749. end
  750. it 'collapses three nested, whitespace-only <div> into a single &nbsp;' do
  751. expect('<div><div> </div><div> </div><div> </div></div>'.html2html_strict.first).to eq('<div>&nbsp;</div>')
  752. end
  753. it 'collapses 2+ nested, whitespace-only <p> into \n<p>&nbsp;</p>' do
  754. expect('<div><p> </p><p> </p></div>'.html2html_strict.first).to eq("<div>\n<p>&nbsp;</p></div>")
  755. end
  756. end
  757. context 'for <div> with content' do
  758. it 'also strips trailing/leading newlines inside <div>' do
  759. expect("<div>\n\n\ntest\n\n\n</div>".html2html_strict.first).to eq('<div>test</div>')
  760. end
  761. it 'also strips trailing/leading newlines & tabs inside <div>' do
  762. expect("<div>\n\t\ntest\n\t\n</div>".html2html_strict.first).to eq('<div>test</div>')
  763. end
  764. it 'also strips trailing/leading newlines & tabs inside <div>, but not internal spaces' do
  765. expect("<div>\n\t\ntest 123\n\t\n</div>".html2html_strict.first).to eq('<div>test 123</div>')
  766. end
  767. it 'strips newlines from trailing whitespace; leaves up to two <br> (with spaces) as-is' do
  768. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  769. <div>
  770. <br> <p><b>Description</b></p>
  771. <br> <br> </div>
  772. HTML
  773. <div>
  774. <br> <p><b>Description</b></p><br> <br> </div>
  775. TEXT
  776. end
  777. it 'strips newlines from trailing whitespace; collapses 3+ <br> into two' do
  778. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  779. <div>
  780. <br> <p><b>Description</b></p>
  781. <br> <br> <br> </div>
  782. HTML
  783. <div>
  784. <br> <p><b>Description</b></p><br><br></div>
  785. TEXT
  786. end
  787. it 'removes unnecessary <div> nesting' do
  788. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  789. <div><div>Hello Martin,</div></div>
  790. HTML
  791. <div>Hello Martin,</div>
  792. TEXT
  793. end
  794. it 'keeps innermost <div> when removing nesting' do
  795. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  796. <div lang="DE"><div><div>Hello Martin,</div></div></div>
  797. HTML
  798. <div lang="DE">Hello Martin,</div>
  799. TEXT
  800. end
  801. it 'keeps style with color in <div>' do
  802. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  803. <div style="color: red; bgcolor: red">Hello Martin,</div>
  804. HTML
  805. <div style="color: red;">Hello Martin,</div>
  806. TEXT
  807. end
  808. it 'remove style=#ffffff with color in <div>' do
  809. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  810. <div style="color: #ffffff; bgcolor: red">Hello Martin,</div>
  811. HTML
  812. <div>Hello Martin,</div>
  813. TEXT
  814. end
  815. it 'rearranges whitespace in nested <div>' do
  816. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  817. <div lang="DE"><div><div>Hello Martin,</div> </div></div>
  818. HTML
  819. <div lang="DE">
  820. <div>Hello Martin,</div></div>
  821. TEXT
  822. end
  823. it 'adds newline where <br> starts or ends <div> content' do
  824. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  825. <div style="max-width: 600px;"><br>abc<br><br></div>
  826. HTML
  827. <div>
  828. <br>abc<br><br>
  829. </div>
  830. TEXT
  831. end
  832. it 'leaves <s> nested in <div> as-is (?)' do
  833. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  834. <div><s>abc</s></div>
  835. HTML
  836. <div><s>abc</s></div>
  837. TEXT
  838. end
  839. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  840. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  841. <div><p> </p>
  842. <p> </p>
  843. <p> </p>
  844. </div>
  845. HTML
  846. <div>
  847. <p>&nbsp;</p></div>
  848. TEXT
  849. end
  850. it 'strips <div> tags when they contain only <p>' do
  851. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  852. <div>lala<div lang="DE"><p><span>Hello Martin,</span></p></div></div>
  853. HTML
  854. <div>lala<div lang="DE"><p>Hello Martin,</p></div></div>
  855. TEXT
  856. end
  857. end
  858. context 'link handling' do
  859. it 'adds rel & target attrs to <a> tags' do
  860. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  861. <a href="http://web.de">web.de</a>
  862. HTML
  863. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  864. TEXT
  865. end
  866. it 'removes id attrs' do
  867. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  868. <a id="123" href="http://web.de">web.de</a>
  869. HTML
  870. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  871. TEXT
  872. end
  873. it 'removes class/id attrs' do
  874. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  875. <a href="http://example.com" class="abc" id="123">http://example.com</a>
  876. HTML
  877. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  878. TEXT
  879. end
  880. it 'downcases <a> tags' do
  881. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  882. <A href="http://example.com?a=1;">http://example.com?a=1;</A>
  883. HTML
  884. <a href="http://example.com?a=1;" rel="nofollow noreferrer noopener" target="_blank">http://example.com?a=1;</a>
  885. TEXT
  886. end
  887. it 'doesn’t downcase href attr or inner text' do
  888. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  889. <A href="http://example.com/withSoMeUpper/And/downCase">http://example.com/withSoMeUpper/And/downCase</A>
  890. HTML
  891. <a href="http://example.com/withSoMeUpper/And/downCase" rel="nofollow noreferrer noopener" target="_blank">http://example.com/withSoMeUpper/And/downCase</a>
  892. TEXT
  893. end
  894. it 'automatically wraps <a> tags around valid URLs' do
  895. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  896. <div>https://www.facebook.com/test</div>
  897. HTML
  898. <div>\n<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>\n</div>
  899. TEXT
  900. end
  901. it 'does not wrap URLs if leading https?:// is missing' do
  902. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  903. some text www.example.com some other text
  904. HTML
  905. some text www.example.com some other text
  906. TEXT
  907. end
  908. it 'adds missing http:// to href attr (but not inner text)' do
  909. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  910. web <a href="www.example.com"><span style="color:blue">www.example.com</span></a>
  911. HTML
  912. web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a>
  913. TEXT
  914. end
  915. it 'includes URL parameters when wrapping URL in <a> tag' do
  916. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  917. <p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>
  918. HTML
  919. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap</a></p>
  920. TEXT
  921. end
  922. it 'does not rewrap valid URLs that already have <a> tags' do
  923. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  924. <a href="http://example.com">http://example.com</a>
  925. HTML
  926. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  927. TEXT
  928. end
  929. it 'recognizes URL parameters when matching href to inner text' do
  930. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  931. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  932. HTML
  933. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  934. TEXT
  935. end
  936. it 'recognizes <br> as URL boundary' do
  937. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  938. <div><br>https://www.facebook.com/test<br></div>
  939. HTML
  940. <div>
  941. <br><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a><br>\n</div>
  942. TEXT
  943. end
  944. it 'recognizes space as URL boundary' do
  945. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  946. some text http://example.com some other text
  947. HTML
  948. some text <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a> some other text
  949. TEXT
  950. end
  951. it 'wraps valid URLs from <div> elements in <a> tags' do
  952. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  953. <div>http://example.com</div>
  954. HTML
  955. <div>
  956. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  957. </div>
  958. TEXT
  959. end
  960. it 'recognizes trailing dot as URL boundary' do
  961. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  962. <div>http://example.com.</div>
  963. HTML
  964. <div>
  965. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  966. TEXT
  967. end
  968. it 'does not add a leading newline if <div> begins with non-URL text' do
  969. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  970. <div>lala http://example.com.</div>
  971. HTML
  972. <div>lala <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  973. TEXT
  974. end
  975. it 'recognizes trailing comma as URL boundary' do
  976. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  977. <div>http://example.com, and so on</div>
  978. HTML
  979. <div>
  980. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>, and so on</div>
  981. TEXT
  982. end
  983. it 'recognizes trailing comma as URL boundary (immediately following URL parameters)' do
  984. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  985. <div>http://example.com?lala=me, and so on</div>
  986. HTML
  987. <div>
  988. <a href="http://example.com?lala=me" rel="nofollow noreferrer noopener" target="_blank">http://example.com?lala=me</a>, and so on</div>
  989. TEXT
  990. end
  991. it 'strips <a> tags when no href is present' do
  992. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  993. <a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>
  994. HTML
  995. <span style="color:#44546a;">Hello Mr Smith,</span>
  996. TEXT
  997. end
  998. context 'when <a> inner text is HTML elements' do
  999. it 'leaves <img> elements as-is' do
  1000. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1001. <a href="http://example.com/?abc=123&123=abc" class="abc\n"\n><img src="cid:123"></a>
  1002. HTML
  1003. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com/?abc=123&amp;123=abc"><img src="cid:123"></a>
  1004. TEXT
  1005. end
  1006. it 'strips <span> tags, but not content' do
  1007. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1008. <a href="http://facebook.de/examplesrbog"><span lang="EN-US" style='color:blue'>http://facebook.de/examplesrbog</span></a>
  1009. HTML
  1010. <a href="http://facebook.de/examplesrbog" rel="nofollow noreferrer noopener" target="_blank"><span lang="EN-US" style="color:blue;">http://facebook.de/examplesrbog</span></a>
  1011. TEXT
  1012. end
  1013. it 'also strips surrounding <span> and <o:p> tags' do
  1014. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1015. <span style="font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1016. <a href="http://www.example.com"><span style="color:blue">www.example.com</span></a><o:p></o:p></span>
  1017. HTML
  1018. <span style="color:#1f497d;">web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a></span>
  1019. TEXT
  1020. end
  1021. end
  1022. context 'when <a> inner text and href do not match' do
  1023. it 'adds title attr' do
  1024. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1025. <a href="http://example.com">http://what-different.example.com</a>
  1026. HTML
  1027. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com">http://what-different.example.com</a>
  1028. TEXT
  1029. end
  1030. it 'converts unsafe characters in href attr and title' do
  1031. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1032. <a href="http://example.com %22test%22">http://what-different.example.com</a>
  1033. HTML
  1034. <a href="http://example.com%20%22test%22" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com%20%22test%22">http://what-different.example.com</a>
  1035. TEXT
  1036. end
  1037. it 'does not add title attr (for different capitalization)' do
  1038. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1039. <a href="http://example.com">http://EXAMPLE.com</a>
  1040. HTML
  1041. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://EXAMPLE.com</a>
  1042. TEXT
  1043. end
  1044. it 'does not add title attr (for URL-safe/unsafe characters)' do
  1045. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1046. <a href="http://example.com/?abc=123&123=abc">http://example.com?abc=123&amp;123=abc</a>
  1047. HTML
  1048. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank">http://example.com?abc=123&amp;123=abc</a>
  1049. TEXT
  1050. end
  1051. end
  1052. context 'for email links' do
  1053. it 'strips <a> tags' do
  1054. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1055. <a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1056. HTML
  1057. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1058. TEXT
  1059. end
  1060. it 'strips <a> tags (even with upcased "MAILTO:")' do
  1061. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1062. <a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1063. HTML
  1064. <a href="MAILTO:john.smith@example.com">john.smith@example.com</a>
  1065. TEXT
  1066. end
  1067. it 'extracts destination address when it differs from <a> innertext' do
  1068. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1069. <a href="MAILTO:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1070. HTML
  1071. <a href="MAILTO:john.smith2@example.com">john.smith@example.com</a>
  1072. TEXT
  1073. end
  1074. end
  1075. end
  1076. context 'for <img> tags' do
  1077. it 'removes color CSS rule from style attr' do
  1078. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1079. <img src="/some.png" style="color: blue; width: 30px; height: 50px">
  1080. HTML
  1081. <img src="/some.png" style=" width: 30px; height: 50px;">
  1082. TEXT
  1083. end
  1084. it 'converts width/height attrs to CSS rules' do
  1085. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1086. <img src="/some.png" width="30px" height="50px">
  1087. HTML
  1088. <img src="/some.png" style="width:30px;height:50px;">
  1089. TEXT
  1090. end
  1091. it 'automatically adds terminal semicolons to CSS rules' do
  1092. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1093. <img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1094. HTML
  1095. <img style="width: 181px; height: 125px;" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1096. TEXT
  1097. end
  1098. context 'when <img> nested in <a>, nested in <p>' do
  1099. it 'sanitizes those elements as normal' do
  1100. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1101. <p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>
  1102. HTML
  1103. <p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/"><span style="color:blue;"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></span></a></p>
  1104. TEXT
  1105. end
  1106. end
  1107. end
  1108. context 'sample email input' do
  1109. it 'handles sample input 1' do
  1110. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1111. <div>
  1112. abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1113. HTML
  1114. <div>abc<span class="js-signatureMarker"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1115. TEXT
  1116. end
  1117. it 'handles sample input 2' do
  1118. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1119. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1120. HTML
  1121. <div>abc<span class="js-signatureMarker"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1122. TEXT
  1123. end
  1124. it 'handles sample input 3' do
  1125. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1126. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p> </div>
  1127. HTML
  1128. <div>abc<span class="js-signatureMarker"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1129. TEXT
  1130. end
  1131. it 'handles sample input 4' do
  1132. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1133. <div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Mit freundlichem Gruß<span class="Apple-converted-space">&nbsp;</span><br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.<o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="mailto:john.smith@example.com" style=color: blue; text-decoration: underline; ">john.smith@example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="http://www.example.com" style="color: blue; text-decoration: underline; ">www.example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div>
  1134. HTML
  1135. <div><span>Mit freundlichem Gruß <br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.</span></div><div>
  1136. <span>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span>
  1137. </div><div>
  1138. <span>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span>
  1139. </div><div>
  1140. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1141. </div><div>
  1142. <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  1143. </div>
  1144. TEXT
  1145. end
  1146. it 'handles sample input 5' do
  1147. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1148. <body lang="DE" link="blue" vlink="purple"><div class="WordSection1">
  1149. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau ABC,<o:p></o:p></span></p>
  1150. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1151. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  1152. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1153. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  1154. <o:p></o:p></span></p>
  1155. <div>
  1156. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D"><o:p>&nbsp;</o:p></span></b></p>
  1157. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">Anna Smith<o:p></o:p></span></b></p>
  1158. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc SEV GmbH<o:p></o:p></span></b></p>
  1159. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc TRAV<o:p></o:p></span></b></p>
  1160. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">Marktstätte 123<o:p></o:p></span></p>
  1161. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">123456 Dorten<o:p></o:p></span></p>
  1162. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-1<o:p></o:p></span></p>
  1163. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-0<o:p></o:p></span></p>
  1164. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">F: &#43;49 (0) 12345/1234560-2<o:p></o:p></span></p>
  1165. <p class="MsoNormal"><a href="mailto:annad@example.com"><span style="font-size:9.0pt">annad@example.com</span></a><span style="font-size:9.0pt;color:#C00000"><o:p></o:p></span></p>
  1166. <p class="MsoNormal"><a href="http://www.example.com/"><span style="font-size:9.0pt">www.example.com</span></a><span style="font-size:9.0pt;color:#1F497D">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1167. </span><a href="http://www.ABC.com/"><span style="font-size:9.0pt">www.ABC.com</span></a><span style="font-size:9.0pt;color:#1F497D"><o:p></o:p></span></p>
  1168. <p class="MsoNormal"><span style="font-size:8.0pt;color:#1F497D">Geschäftsführer Vor Nach, VorUndZu Nach&nbsp;&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp;&nbsp; Amtsgericht Dort HRB 12345&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp; Ein Unternehmer der ABC Gruppe<o:p></o:p></span></p>
  1169. HTML
  1170. <div>
  1171. <p><span style="color:#1f497d;">Guten Morgen, Frau ABC,</span></p><p><span style="color:#1f497d;"> </span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"> </span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p><div>
  1172. <p><b><span style="color:#1f497d;"> </span></b></p><p><b><span style="color:#1f497d;">Anna Smith</span></b></p><p><b><span style="color:#1f497d;">art abc SEV GmbH</span></b></p><p><b><span style="color:#1f497d;">art abc TRAV</span></b></p><p><span style="color:#1f497d;">Marktstätte 123</span></p><p><span style="color:#1f497d;">123456 Dorten</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-1</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-0</span></p><p><span style="color:#1f497d;">F: +49 (0) 12345/1234560-2</span></p><p><a href="mailto:annad@example.com">annad@example.com</a><span style="color:#c00000;"></span></p><p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a><span style="color:#1f497d;"> </span><a href="http://www.ABC.com/" rel="nofollow noreferrer noopener" target="_blank">www.ABC.com</a><span style="color:#1f497d;"></span></p><p><span style="color:#1f497d;">Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe</span></p></div></div>
  1173. TEXT
  1174. end
  1175. it 'handles sample input 6' do
  1176. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1177. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1178. <div>
  1179. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1180. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">Von:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1181. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1182. <b>An:</b> 'Amaia Epalza'<br>
  1183. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017<o:p></o:p></span></p>
  1184. </div>
  1185. </div>
  1186. <p class="MsoNormal"><o:p>&nbsp;</o:p></p>
  1187. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am
  1188. </span></b><o:p></o:p></p>
  1189. <p class="MsoNormal"><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">&nbsp;</span><o:p></o:p></p>
  1190. <p class="MsoNormal">Guten Morgen Frau Epalza,<o:p></o:p></p>
  1191. HTML
  1192. <p><span style="color:#1f497d;"> </span></p><div>
  1193. <div>
  1194. <span class="js-signatureMarker"></span><p><b>Von:</b><span> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1195. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1196. <b>An:</b> 'Amaia Epalza'<br>
  1197. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017</span></p></div></div><p>&nbsp;</p><p><b><span style="color:#1f497d;">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am </span></b></p><p><span style="color:#1f497d;"> </span></p><p>Guten Morgen Frau Epalza,</p>
  1198. TEXT
  1199. end
  1200. it 'handles sample input 7' do
  1201. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1202. <div class="">Wir brauchen also die Instanz <a href="http://example.zammad.com" class="">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div class=""><br class=""></div><div class="">Bitte um Freischaltung.</div><div class=""><br class=""></div><div class=""><br class=""><div class="">
  1203. HTML
  1204. <div>Wir brauchen also die Instanz <a href="http://example.zammad.com" rel="nofollow noreferrer noopener" target="_blank">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div>&nbsp;</div><div>Bitte um Freischaltung.</div><div>&nbsp;</div>
  1205. TEXT
  1206. end
  1207. it 'handles sample input 8' do
  1208. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1209. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US">oh jeee … Zauberwort vergessen ;-) Können Sie mir
  1210. <b>bitte</b> noch meine Testphase verlängern?<o:p></o:p></span></p>
  1211. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US"><o:p>&nbsp;</o:p></span></p>
  1212. HTML
  1213. <p><span style="color:#1f497d;">oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</span></p><p><span style="color:#1f497d;"> </span></p>
  1214. TEXT
  1215. end
  1216. it 'handles sample input 9' do
  1217. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1218. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=secret_key" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=secret_key" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=secret_key</a></div>
  1219. HTML
  1220. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=secret_key" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=secret_key" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=secret_key</a></div>
  1221. TEXT
  1222. end
  1223. it 'handles sample input 10' do
  1224. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1225. <tr style="height: 15pt;" class=""><td width="170" nowrap="" valign="bottom" style="width: 127.5pt; border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt; height: 15pt;" class=""><p class="MsoNormal" align="center" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;"><span style="" class="">&nbsp;</span></p></td><td width="58" nowrap="" valign="bottom" style="width: 43.5pt; padding: 0cm 5.4pt; height: 15pt;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="" class="">20-29</span></div></td><td width="47" nowrap="" valign="bottom" style="width: 35pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">200</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">-1</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">201</span></div></td><td width="107" nowrap="" valign="bottom" style="width: 80pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="85" nowrap="" valign="bottom" style="width: 64pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="101" nowrap="" valign="bottom" style="width: 76pt; border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><b class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">country</span></b><span style="font-size: 11pt; font-family: Calibri, sans-serif;" class=""></span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Target (gross)</span></div></td><td width="123" nowrap="" valign="bottom" style="width: 92pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Remaining Recruits</span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: 'Times New Roman', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Total Recruits</span></div></td></tr>
  1226. HTML
  1227. <tr>
  1228. <td valign="bottom" style="width: 127.5pt; border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt;"><p>&nbsp;</p></td>
  1229. <td valign="bottom" style="width: 43.5pt; padding: 0cm 5.4pt;"><div>20-29</div></td>
  1230. <td valign="bottom" style="width: 35pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">200</span></div></td>
  1231. <td valign="bottom" style="width: 57pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">-1</span></div></td>
  1232. <td valign="bottom" style="width: 57pt; border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">201</span></div></td>
  1233. <td valign="bottom" style="width: 80pt; padding: 0cm 5.4pt;"></td>
  1234. <td valign="bottom" style="width: 64pt; padding: 0cm 5.4pt;"></td>
  1235. <td valign="bottom" style="width: 76pt; border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>
  1236. <b>country</b>
  1237. </div></td>
  1238. <td valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Target (gross)</div></td>
  1239. <td valign="bottom" style="width: 92pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Remaining Recruits</div></td>
  1240. <td valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Total Recruits</div></td>
  1241. </tr>
  1242. TEXT
  1243. end
  1244. it 'handles sample input 11' do
  1245. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1246. <div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div>Dear Bob<span style="line-height: 23.8px;">:</span><span style="color: rgb(255, 255, 255); line-height: 1.7;">Mr/Mrs</span></div><div><br></div><div><span style="line-height: 1.7;">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src="cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com" orgwidth="1101" orgheight="637" data-image="1" style="width: 722.7px; height: 418px; border: none;"></div><div>Best regards!</div><div><br></div><div><b style="line-height: 1.7;"><i><u><span lang="EL" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style="position:relative;zoom:1"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div><div style="clear:both"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>
  1247. HTML
  1248. <div>\n<div>Dear Bob:<span style="color: rgb(255, 255, 255);">Mr/Mrs</span>
  1249. </div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src="cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com" style="width: 722.7px; height: 418px;"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u><span lang="EL" style="color:#17365d;">Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</span></u></i></b></div><div>\n<div>Bob Smith</div><div>\n<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div></div></div>
  1250. TEXT
  1251. end
  1252. it 'handles sample input 12' do
  1253. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1254. <li><a style="font-size:15px; font-family:Arial;color:#0f7246" class="text_link" href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh"><span style="color: rgb(0, 0, 0);">Luxemburg</span></a></li>
  1255. HTML
  1256. <li><a href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh" rel="nofollow noreferrer noopener" target="_blank" title="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh">Luxemburg</a></li>
  1257. TEXT
  1258. end
  1259. # https://github.com/zammad/zammad/issues/4112
  1260. it 'converts lists from MS Outlook correctly' do
  1261. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1262. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1263. <![if !supportLists]><span style="font-family:Symbol;mso-fareast-language:EN-US"><span style="mso-list:Ignore">·<span style="font:7.0pt &quot;Times New Roman&quot;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1264. </span></span></span><![endif]><span style="mso-fareast-language:EN-US">1<o:p></o:p></span></p>
  1265. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1266. <![if !supportLists]><span style="font-family:Symbol;mso-fareast-language:EN-US"><span style="mso-list:Ignore">·<span style="font:7.0pt &quot;Times New Roman&quot;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1267. </span></span></span><![endif]><span style="mso-fareast-language:EN-US">2<o:p></o:p></span></p>
  1268. HTML
  1269. <p>• 1</p><p>• 2</p>
  1270. TEXT
  1271. end
  1272. # https://github.com/zammad/zammad/issues/4184
  1273. it 'deletes downlevel revealed conditional comments' do
  1274. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1275. <p class="MsoPlainText" style="margin-left:36.0pt;text-indent:-18.0pt;mso-list:l0 level1 lfo1">
  1276. <![if !vml]><img width="52" height="52" src="cid:image001.png@01D8AC0A.08251CD0" v:shapes="Picture_x0020_1"><![endif]>
  1277. <span style="font-size:9.0pt;font-family:&quot;Arial&quot;,sans-serif;color:black;mso-fareast-language:EN-AU">dummy1</span>
  1278. <![if !vml]><img width="52" height="52" src="cid:image002.png@01D8AC0A.08251CD1" v:shapes="Picture_x0020_2"><![endif]>
  1279. <span style="font-size:9.0pt;font-family:&quot;Arial&quot;,sans-serif;color:black;mso-fareast-language:EN-AU">dummy2</span>
  1280. </p>
  1281. HTML
  1282. <p>
  1283. <img src="cid:image001.png@01D8AC0A.08251CD0" style="width:52px;height:52px;"> dummy1 <img src="cid:image002.png@01D8AC0A.08251CD1" style="width:52px;height:52px;"> dummy2 </p>
  1284. TEXT
  1285. end
  1286. end
  1287. context 'signature recognition' do
  1288. let(:marker) { '<span class="js-signatureMarker"></span>' }
  1289. it 'does not trim trailing whitespace and keeps the signature block working' do
  1290. expect('Hello<br><br>-- <br>This is the signature'.html2html_strict.first).to eq("Hello<br>#{marker}<br>-- <br>This is the signature")
  1291. end
  1292. it 'places marker before "--" line (surrounded by <br>)' do
  1293. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1294. lalala<br>--<br>Max Mix
  1295. HTML
  1296. lalala#{marker}<br>--<br>Max Mix
  1297. TEXT
  1298. end
  1299. it 'places marker before "--" line (surrounded by <br/>)' do
  1300. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1301. lalala<br/>--<br/>Max Mix
  1302. HTML
  1303. lalala#{marker}<br>--<br>Max Mix
  1304. TEXT
  1305. end
  1306. it 'places marker before "--" line (preceded by <br/>\n)' do
  1307. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1308. lalala<br/>
  1309. --<br/>Max Mix
  1310. HTML
  1311. lalala#{marker}<br> --<br>Max Mix
  1312. TEXT
  1313. end
  1314. it 'places marker before "--" line (surrounded by <p>)' do
  1315. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1316. lalala<p>--</p>Max Mix
  1317. HTML
  1318. lalala#{marker}<p>--</p>Max Mix
  1319. TEXT
  1320. end
  1321. it 'places marker before "__" line (surrounded by <br>)' do
  1322. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1323. lalala<br>__<br>Max Mix
  1324. HTML
  1325. lalala#{marker}<br>__<br>Max Mix
  1326. TEXT
  1327. end
  1328. it 'places marker before quoted reply’s "Von:" header (in German)' do
  1329. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1330. den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1331. HTML
  1332. den.<br>#{marker}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1333. TEXT
  1334. end
  1335. it 'places marker before quoted reply’s "Von:" header (as <p> with stripped parent <div>)' do
  1336. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1337. <div><div style="border:none;border-top:solid #e1e1e1 1.0pt;padding:3.0pt 0cm 0cm 0cm"><p class="MsoNormal"><b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif">Von:</span></b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b>\u0020
  1338. HTML
  1339. <div>#{marker}<p><b><span lang="DE">Von:</span></b><span lang="DE"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b> </span></p></div>
  1340. TEXT
  1341. end
  1342. it 'places marker before quoted reply’s "Von:" header (as <p> with parent <div>)' do
  1343. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1344. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1345. <p class="MsoNormal" style="margin-left:35.4pt"><b><span style="font-family:Calibri;color:black">Von:
  1346. </span></b><span style="font-family:Calibri;color:black">Johanna Kiefer via Zammad Projects &lt;projects@example.com&gt;<br>
  1347. <b>Organisation: </b>Zammad GmbH<br>
  1348. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br>
  1349. HTML
  1350. <div>
  1351. #{marker}<p><b>Von: </b><span>Johanna Kiefer via Zammad Projects &lt;projects@example.com&gt;<br>
  1352. <b>Organisation: </b>Zammad GmbH<br>
  1353. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br></span></p></div>
  1354. TEXT
  1355. end
  1356. it 'places marker before quoted reply’s "Von:" header (as <div>)' do
  1357. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1358. <div><br>
  1359. <br>
  1360. <br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
  1361. &nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
  1362. <br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
  1363. &nbsp;</font></div>
  1364. HTML
  1365. #{marker}<div><br>Von: Hotel &lt;info@example.com&gt; <br>An: </div>
  1366. TEXT
  1367. end
  1368. it 'places marker before English quoted text intro (as <blockquote>)' do
  1369. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1370. <br class=""><div><blockquote type="cite" class=""><div class="">On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com" class="">oliver@example.com</a>&gt; wrote:</div><br class="Apple-interchange-newline">
  1371. HTML
  1372. <div>#{marker}<blockquote type="cite">
  1373. <div>On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com">oliver@example.com</a>&gt; wrote:</div><br>
  1374. </blockquote></div>
  1375. TEXT
  1376. end
  1377. it 'does not place marker if blockquote doesn’t contain a quoted text intro' do
  1378. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1379. <br class=""><div><blockquote type="cite" class=""><div class="">some note</div><br class="Apple-interchange-newline">
  1380. HTML
  1381. <div><blockquote type="cite">
  1382. <div>some note</div><br>
  1383. </blockquote></div>
  1384. TEXT
  1385. end
  1386. it 'does not place marker if quoted text intro isn’t followed by a <blockquote>' do
  1387. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1388. <div>
  1389. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1390. <br>
  1391. </div>
  1392. HTML
  1393. <div>
  1394. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1395. <br>
  1396. </div>
  1397. TEXT
  1398. end
  1399. it 'places marker before German quoted text intro (before <blockquote>)' do
  1400. expect(<<~HTML.chomp.html2html_strict.first).to eq(<<~TEXT.chomp)
  1401. <div>
  1402. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1403. <br>
  1404. </div>
  1405. <blockquote type="cite">
  1406. <div>Dear Mr. Smith,<br></div>
  1407. </blockquote>
  1408. HTML
  1409. #{marker}<div>
  1410. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1411. <br>
  1412. </div><blockquote type="cite">
  1413. <div>Dear Mr. Smith,<br>
  1414. </div></blockquote>
  1415. TEXT
  1416. end
  1417. end
  1418. end
  1419. describe '#signature_identify' do
  1420. let(:marker) { '######SIGNATURE_MARKER######' }
  1421. context 'with no signature present' do
  1422. it 'leaves string as-is' do
  1423. expect((+'foo').signature_identify('text', true)).to eq('foo')
  1424. end
  1425. end
  1426. context 'with signature present' do
  1427. it 'places marker at start of "--" line' do
  1428. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1429. foo
  1430. --
  1431. bar
  1432. SRC
  1433. foo
  1434. #{marker}--
  1435. bar
  1436. MARKED
  1437. end
  1438. it 'places marker before English quoted text intro' do
  1439. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1440. On 01/04/15 10:55, Bob Smith wrote:
  1441. SRC
  1442. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1443. MARKED
  1444. end
  1445. it 'places marker before German quoted text intro' do
  1446. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1447. Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@zammad.ink>:
  1448. SRC
  1449. #{marker}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@zammad.ink>:
  1450. MARKED
  1451. end
  1452. it 'ignores trailing empty line' do
  1453. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1454. test 123
  1455. test 123
  1456. --
  1457. Bob Smith
  1458. SRC
  1459. test 123
  1460. test 123
  1461. #{marker}--
  1462. Bob Smith
  1463. MARKED
  1464. end
  1465. it 'ignores trailing double empty lines' do
  1466. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1467. test 123
  1468. test 123
  1469. --
  1470. Bob Smith
  1471. SRC
  1472. test 123
  1473. test 123
  1474. #{marker}--
  1475. Bob Smith
  1476. MARKED
  1477. end
  1478. it 'ignores leading/trailing empty lines' do
  1479. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1480. test 123\u0020
  1481. 1
  1482. 2
  1483. 3
  1484. 4
  1485. 5
  1486. 6
  1487. 7
  1488. 8
  1489. 9
  1490. --
  1491. Bob Smith
  1492. SRC
  1493. test 123\u0020
  1494. 1
  1495. 2
  1496. 3
  1497. 4
  1498. 5
  1499. 6
  1500. 7
  1501. 8
  1502. 9
  1503. #{marker}--
  1504. Bob Smith
  1505. MARKED
  1506. end
  1507. it 'ignores lines starting with "--" but containing more text' do
  1508. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1509. test 123\u0020
  1510. --no not match--
  1511. --
  1512. Bob Smith
  1513. SRC
  1514. test 123\u0020
  1515. --no not match--
  1516. #{marker}--
  1517. Bob Smith
  1518. MARKED
  1519. end
  1520. it 'places marker at start of " -- " line' do
  1521. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1522. test 123\u0020
  1523. --no not match--
  1524. --\u0020
  1525. Bob Smith
  1526. SRC
  1527. test 123\u0020
  1528. --no not match--
  1529. #{marker} --\u0020
  1530. Bob Smith
  1531. MARKED
  1532. end
  1533. it 'places marker on empty line if possible / only places one marker' do
  1534. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1535. test 123\u0020
  1536. --
  1537. Bob Smith
  1538. --
  1539. Bob Smith
  1540. SRC
  1541. test 123\u0020
  1542. #{marker}
  1543. --
  1544. Bob Smith
  1545. --
  1546. Bob Smith
  1547. MARKED
  1548. end
  1549. context 'for Apple email quote text' do
  1550. context 'in English' do
  1551. it 'places two markers, one before quoted text intro and one at start of "--" line' do
  1552. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1553. test 123\u0020
  1554. --no not match--
  1555. Bob Smith
  1556. On 01/04/15 10:55, Bob Smith wrote:
  1557. lalala
  1558. --
  1559. some test
  1560. SRC
  1561. test 123\u0020
  1562. --no not match--
  1563. Bob Smith
  1564. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1565. lalala
  1566. #{marker}--
  1567. some test
  1568. MARKED
  1569. end
  1570. end
  1571. context 'auf Deutsch' do
  1572. it 'places marker before quoted text intro' do
  1573. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1574. test 123\u0020
  1575. --no not match--
  1576. Bob Smith
  1577. Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1578. lalala
  1579. SRC
  1580. test 123\u0020
  1581. --no not match--
  1582. Bob Smith
  1583. #{marker}Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1584. lalala
  1585. MARKED
  1586. end
  1587. end
  1588. end
  1589. context 'for MS email quote text' do
  1590. context 'in English' do
  1591. it 'places marker before quoted text intro' do
  1592. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1593. test 123test 123\u0020
  1594. --no not match--
  1595. Bob Smith
  1596. From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1597. Sent: Donnerstag, 2. April 2015 10:00
  1598. lalala</div>
  1599. SRC
  1600. test 123test 123\u0020
  1601. --no not match--
  1602. Bob Smith
  1603. #{marker}From: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1604. Sent: Donnerstag, 2. April 2015 10:00
  1605. lalala</div>
  1606. MARKED
  1607. end
  1608. end
  1609. context 'auf Deutsch' do
  1610. it 'places marker before quoted text intro' do
  1611. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1612. test 123\u0020
  1613. --no not match--
  1614. Bob Smith
  1615. Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1616. Gesendet: Donnerstag, 2. April 2015 10:00
  1617. Betreff: lalala
  1618. SRC
  1619. test 123\u0020
  1620. --no not match--
  1621. Bob Smith
  1622. #{marker}Von: Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1623. Gesendet: Donnerstag, 2. April 2015 10:00
  1624. Betreff: lalala
  1625. MARKED
  1626. end
  1627. end
  1628. context 'en francais' do
  1629. it 'places marker before quoted text intro' do
  1630. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1631. test 123\u0020
  1632. --no not match--
  1633. Bob Smith
  1634. De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1635. Envoyé : mercredi 29 avril 2015 17:31
  1636. Objet : lalala
  1637. SRC
  1638. test 123\u0020
  1639. --no not match--
  1640. Bob Smith
  1641. #{marker}De : Martin Edenhofer via Zammad Support [mailto:support@zammad.inc]
  1642. Envoyé : mercredi 29 avril 2015 17:31
  1643. Objet : lalala
  1644. MARKED
  1645. end
  1646. end
  1647. end
  1648. end
  1649. end
  1650. describe '#utf8_encode' do
  1651. context 'on valid, UTF-8-encoded strings' do
  1652. subject(:string) { 'hello' }
  1653. it 'returns an identical copy' do
  1654. expect(string.utf8_encode).to eq(string)
  1655. expect(string.utf8_encode.encoding).to be(string.encoding)
  1656. expect(string.utf8_encode).not_to be(string)
  1657. end
  1658. context 'which are incorrectly set to other, technically valid encodings' do
  1659. subject(:string) { described_class.new('ö', encoding: 'tis-620') }
  1660. it 'sets input encoding to UTF-8 instead of attempting conversion' do
  1661. expect(string.utf8_encode).to eq(string.dup.force_encoding('utf-8'))
  1662. end
  1663. end
  1664. end
  1665. context 'on strings in other encodings' do
  1666. subject(:string) { original_string.encode(input_encoding) }
  1667. context 'with no from: option' do
  1668. let(:original_string) { 'Tschüss!' }
  1669. let(:input_encoding) { Encoding::ISO_8859_2 }
  1670. it 'detects the input encoding' do
  1671. expect(string.utf8_encode).to eq(original_string)
  1672. end
  1673. end
  1674. context 'with a valid from: option' do
  1675. let(:original_string) { 'Tschüss!' }
  1676. let(:input_encoding) { Encoding::ISO_8859_2 }
  1677. it 'uses the specified input encoding' do
  1678. expect(string.utf8_encode(from: 'iso-8859-2')).to eq(original_string)
  1679. end
  1680. it 'uses any valid input encoding, even if not correct' do
  1681. expect(string.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!')
  1682. end
  1683. end
  1684. context 'with an invalid from: option' do
  1685. let(:original_string) { '―陈志' }
  1686. let(:input_encoding) { Encoding::GB18030 }
  1687. it 'does not try it' do
  1688. expect { string.encode('utf-8', 'gb2312') }
  1689. .to raise_error(Encoding::InvalidByteSequenceError)
  1690. expect { string.utf8_encode(from: 'gb2312') }
  1691. .not_to raise_error
  1692. end
  1693. it 'uses the detected input encoding instead' do
  1694. expect(string.utf8_encode(from: 'gb2312')).to eq(original_string)
  1695. end
  1696. end
  1697. end
  1698. context 'performance' do
  1699. subject(:string) { original_string.encode(input_encoding) }
  1700. context 'with utf8_encode in iso-8859-1' do
  1701. let(:original_string) { 'äöü0' * 999_999 }
  1702. let(:input_encoding) { Encoding::ISO_8859_1 }
  1703. it 'detects the input encoding' do
  1704. Timeout.timeout(1) do
  1705. expect(string.utf8_encode(from: 'iso-8859-1')).to eq(original_string)
  1706. end
  1707. end
  1708. end
  1709. context 'with utf8_encode in utf-8' do
  1710. let(:original_string) { 'äöü0' * 999_999 }
  1711. let(:input_encoding) { Encoding::UTF_8 }
  1712. it 'detects the input encoding' do
  1713. Timeout.timeout(1) do
  1714. expect(string.utf8_encode(from: 'utf-8')).to eq(original_string)
  1715. end
  1716. end
  1717. end
  1718. context 'with utf8_encode in iso-8859-1 and charset detection' do
  1719. let(:original_string) { 'äöü0' * 199_999 }
  1720. let(:input_encoding) { Encoding::ISO_8859_1 }
  1721. it 'detects the input encoding' do
  1722. Timeout.timeout(18) do
  1723. expect(string.utf8_encode(from: 'utf-8')).to eq(original_string)
  1724. end
  1725. end
  1726. end
  1727. end
  1728. end
  1729. end