string_spec.rb 89 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935
  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. RSpec.describe String do
  4. describe '#strip' do
  5. context 'default behavior' do
  6. it 'removes leading/trailing spaces' do
  7. expect(' test '.strip).to eq('test')
  8. end
  9. it 'removes trailing newlines' do
  10. expect("test\n".strip).to eq('test')
  11. end
  12. it 'does not remove internal spaces / newlines' do
  13. expect("test \n test".strip).to eq("test \n test")
  14. end
  15. end
  16. context 'monkey-patched behavior' do
  17. it 'removes leading/trailing zero-width spaces, but not internal ones' do
  18. expect(" \r\n test \u{200B} \n test\u{200B} \u{200B}".strip)
  19. .to eq("test \u{200B} \n test")
  20. end
  21. it 'does not break on non-unicode strings' do
  22. expect(described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT').strip)
  23. .to eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  24. end
  25. end
  26. end
  27. describe '#strip!' do
  28. context 'default behavior' do
  29. it 'removes leading/trailing spaces (in place)' do
  30. str = +' test '
  31. expect(str.strip!).to be(str).and eq('test')
  32. end
  33. it 'removes trailing newlines (in place)' do
  34. str = +"test\n"
  35. expect(str.strip!).to be(str).and eq('test')
  36. end
  37. it 'does not remove internal spaces / newlines (in place)' do
  38. str = +"test \n test "
  39. expect(str.strip!).to be(str).and eq(str)
  40. end
  41. end
  42. context 'monkey-patched behavior' do
  43. it 'removes leading/trailing zero-width spaces, but not internal ones (in place)' do
  44. str = +" \r\n test \u{200B} \n test\u{200B} \u{200B}"
  45. expect(str.strip!).to be(str).and eq("test \u{200B} \n test")
  46. end
  47. it 'does not break on invalid-unicode strings (in place)' do
  48. str = described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT')
  49. expect(str.strip!)
  50. .to be(str).and eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  51. end
  52. end
  53. end
  54. describe '#to_filename' do
  55. it 'does not modify strings in place' do
  56. %w[test Some::File].each do |str|
  57. expect { str.to_filename }.not_to change { str }
  58. end
  59. end
  60. it 'leaves all-downcase strings as-is' do
  61. expect('test'.to_filename).to eq('test')
  62. end
  63. it 'converts camelcase Ruby constant paths to snakecase file paths' do
  64. expect('Some::File'.to_filename).to eq('some/file')
  65. end
  66. end
  67. describe '#to_classname' do
  68. it 'does not modify strings in place' do
  69. %w[test some/file].each do |str|
  70. expect { str.to_classname }.not_to change { str }
  71. end
  72. end
  73. it 'capitalizes all-downcase strings' do
  74. expect('test'.to_classname).to eq('Test')
  75. end
  76. it 'converts snakecase file paths to camelcase Ruby constant paths' do
  77. expect('some/file'.to_classname).to eq('Some::File')
  78. end
  79. context 'unlike ActiveSupport’s #classify' do
  80. it 'preserves pluralized names' do
  81. expect('some/files'.to_classname).to eq('Some::Files')
  82. expect('some_test/files'.to_classname).to eq('SomeTest::Files')
  83. end
  84. end
  85. end
  86. describe '#html2text' do
  87. it 'does not modify strings in place' do
  88. %w[test <div>test</div>].each do |str|
  89. expect { str.html2text }.not_to change { str }
  90. end
  91. end
  92. it 'leaves human-readable text as-is' do
  93. expect('test'.html2text).to eq('test')
  94. end
  95. it 'strips leading/trailing spaces' do
  96. expect(' test '.html2text).to eq('test')
  97. end
  98. it 'also strips leading/trailing newlines' do
  99. expect("\n\n test \n\n\n".html2text).to eq('test')
  100. end
  101. it 'strips HTML tags around text content' do
  102. expect('<div>test</div>'.html2text).to eq('test')
  103. end
  104. it 'strips trailing <br> inside last <div>' do
  105. expect('<div>test<br></div>'.html2text).to eq('test')
  106. end
  107. it 'strips trailing <br> and newlines inside last <div>' do
  108. expect("<div>test<br><br><br>\n<br>\n<br>\n</div>".html2text).to eq('test')
  109. end
  110. it 'strips trailing <br>, newlines, and spaces inside last <div>' do
  111. expect("<div>test<br><br> <br> \n<br> \n<br> \n</div>".html2text).to eq('test')
  112. end
  113. it 'strips trailing <br>, newlines, and &nbsp; inside last <div>' do
  114. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>".html2text).to eq('test')
  115. end
  116. it 'strips trailing whitespace (including &nbsp; & <br>) both inside and after last tag' do
  117. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>&nbsp;".html2text).to eq('test')
  118. end
  119. it 'also strips nested HTML tags' do
  120. expect("<p><span>Was\nsoll verbessert werden:</span></p>".html2text)
  121. .to eq('Was soll verbessert werden:')
  122. end
  123. it 'in <pre> elements, collapses multiple newlines into one' do
  124. expect("<pre>test\n\ntest</pre>".html2text).to eq("test\ntest")
  125. end
  126. it 'in <code> elements, collapses multiple newlines into one' do
  127. expect("<code>test\n\ntest</code>".html2text).to eq("test\ntest")
  128. end
  129. it 'converts <table> cells and row to space-separated lines' do
  130. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  131. <table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>
  132. HTML
  133. test col
  134. test 4711
  135. TEXT
  136. end
  137. it 'strips HTML comments' do
  138. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  139. <!-- some comment -->
  140. <div>
  141. test<br><br><br>
  142. <br>
  143. <br>
  144. </div>
  145. HTML
  146. test
  147. TEXT
  148. end
  149. it 'converts <a> elements to plain text with numerical references' do
  150. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  151. <div><a href="https://zammad.org">Best Tool of the World</a>
  152. some other text</div>
  153. <div>
  154. HTML
  155. [1] Best Tool of the Worldsome other text
  156. [1] https://zammad.org
  157. TEXT
  158. end
  159. it 'converts <hr> elements to separate paragraphs containing only "___"' do
  160. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  161. <!-- some comment -->
  162. <div>
  163. test<br><br><br>
  164. <hr/>
  165. <br>
  166. </div>
  167. HTML
  168. test
  169. ___
  170. TEXT
  171. end
  172. it 'converts <br> elements to newlines (max. 2)' do
  173. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  174. test<br><br><br>--<br>abc</div>
  175. HTML
  176. test
  177. --
  178. abc
  179. TEXT
  180. end
  181. it 'strips Microsoft Outlook conditional comments' do
  182. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  183. Ihr RZ-Team<br />
  184. <br />
  185. <!--[if gte mso 9]><xml> <o:DocumentProperties> <o:Author>test</o:Author> =
  186. <o:Template>A75DB76E.dotm</o:Template> <o:LastAuthor>test</o:LastAuthor> =
  187. <o:Revision>5</o:Revision> <o:Created>2011-05-18T07:08:00Z</o:Created> <=
  188. o:LastSaved>2011-07-04T17:59:00Z</o:LastSaved> <o:Pages>1</o:Pages> <o:Wo=
  189. rds>189</o:Words> <o:Characters>1192</o:Characters> <o:Lines>9</o:Lines> =
  190. <o:Paragraphs>2</o:Paragraphs> <o:CharactersWithSpaces>1379</o:Characters=
  191. WithSpaces> <o:Version>11.5606</o:Version> </o:DocumentProperties></xml><!=
  192. [endif]-->
  193. HTML
  194. Ihr RZ-Team
  195. TEXT
  196. end
  197. it 'strips <img> elements' do
  198. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  199. <html>
  200. <head>
  201. <title>Neues Fax von 1234-93900</title>
  202. </head>
  203. <body style="margin: 0px;padding: 0px;font-family: Arial, sans-serif;font-size: 12px;">
  204. <table cellpadding="0" cellspacing="0" width="100%" height="100%" bgcolor="#d9e7f0" id="mailbg"
  205. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 100%;height: 100%;background-color: #d9e7f0;padding: 0px;margin: 0px;">
  206. <tr>
  207. <td valign="top">
  208. <center>
  209. <br><br>
  210. <table width="560" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF" id="mailcontainer"
  211. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 560px;margin: 0px auto;padding: 0px;background-color: #FFFFFF;">
  212. <tr>
  213. <td colspan="3" width="560" id="mail_header" valign="top" style="width: 560px;background-color: #FFFFFF;font-family: Arial, sans-serif;color: #000000;padding: 0px;margin: 0px;">
  214. <table width="560" cellpadding="0" cellspacing="0" style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;">
  215. <tr>
  216. <td height="10" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;height:10px;">
  217. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_header.gif" style="padding: 0px;margin: 0px;">
  218. </td>
  219. </tr>
  220. <tr>
  221. <td height="12" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  222. </tr>
  223. <tr>
  224. <td height="27" width="30"> </td>
  225. <td height="27" width="397"><span class="mailtitle" style="font-family: Arial, sans-serif;color: #000000;font-size: 18px;line-height: 18px;font-weight: normal;">Neues Fax</span></td>
  226. <td height="27" width="103"><img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_logo-example.gif" style="padding: 0px;margin: 0px;"></td>
  227. <td height="27" width="30"></td>
  228. </tr>
  229. <tr>
  230. <td height="20" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  231. </tr>
  232. <tr>
  233. <td height="1" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;">
  234. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_line-grey.gif" style="padding: 0px;margin: 0px;">
  235. </td>
  236. </tr>
  237. </table>
  238. </td>
  239. </tr>
  240. <tr>
  241. <td colspan="3" width="560"> </td>
  242. </tr>
  243. <tr>
  244. <td width="30"> </td>
  245. <td width="500" height="30" valign="middle" align="right">
  246. <span class="accountno" style="font-family: Arial, sans-serif;font-size: 10px;color: #666666;">Ihre Kundennummer: 12345678</span>
  247. </td>
  248. <td width="30"> </td>
  249. </tr>
  250. HTML
  251. Neues Fax von 1234-93900
  252. Neues Fax
  253. Ihre Kundennummer: 12345678
  254. TEXT
  255. end
  256. it 'converts characters written in HTML ampersand code' do
  257. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  258. line&nbsp;1<br>
  259. you<br/>
  260. -----&amp;
  261. HTML
  262. line\u00A01
  263. you
  264. -----&
  265. TEXT
  266. end
  267. it 'converts <ul> to asterisk-demarcated list' do
  268. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  269. \u0020<ul><li>#1</li><li>#2</li></ul>
  270. HTML
  271. * #1
  272. * #2
  273. TEXT
  274. end
  275. it 'strips HTML frontmatter and <head> element' do
  276. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  277. <!DOCTYPE html>
  278. <html>
  279. <head>
  280. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  281. <head>
  282. <body style="font-family:Geneva,Helvetica,Arial,sans-serif; font-size: 12px;">
  283. <div>&gt; Welcome!</div><div>&gt;</div><div>&gt; Thank you for installing Zammad.</div><div>&gt;</div>
  284. </body>
  285. </html>
  286. HTML
  287. > Welcome!
  288. >
  289. > Thank you for installing Zammad.
  290. >
  291. TEXT
  292. end
  293. it 'strips <style> elements' do
  294. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  295. \u0020 <style type="text/css">
  296. body {
  297. width:90% !important;
  298. -webkit-text-size-adjust:90%;
  299. -ms-text-size-adjust:90%;
  300. font-family:\'helvetica neue\', helvetica, arial, geneva, sans-serif; f=
  301. ont-size: 12px;;
  302. }
  303. img {
  304. outline:none; text-decoration:none; -ms-interpolation-mode: bicubic;
  305. }
  306. a img {
  307. border:none;
  308. }
  309. table td {
  310. border-collapse: collapse;
  311. }
  312. table {
  313. border-collapse: collapse; mso-table-lspace:0pt; mso-table-rspace:0pt;
  314. }
  315. p, table, div, td {
  316. max-width: 600px;
  317. }
  318. p {
  319. margin: 0;
  320. }
  321. blockquote, pre {
  322. margin: 0px;
  323. padding: 8px 12px 8px 12px;
  324. }
  325. </style><p>some other content</p>
  326. HTML
  327. some other content
  328. TEXT
  329. end
  330. it 'strips <meta> elements' do
  331. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  332. \u0020 IT-Infrastruktur</span><br>
  333. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  334. <meta name="Generator" content="Microsoft Word 14 (filtered
  335. medium)">
  336. <!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
  337. o\:* {behavior:url(#default#VML);}
  338. w\:* {behavior:url(#default#VML);}
  339. .shape {behavior:url(#default#VML);}
  340. </style><![endif]-->
  341. <style><!--
  342. @font-face
  343. {font-family:calibri;
  344. panose-1:2 15 5 2 2 2 4 3 2 4;}
  345. @font-face
  346. {font-family:tahoma;
  347. panose-1:2 11 6 4 3 5 4 4 2 4;}
  348. p.msonormal, li.msonormal, div.msonormal
  349. {margin:0cm;
  350. margin-bottom:.0001pt;
  351. font-size:11.0pt;
  352. font-family:"calibri","sans-serif";
  353. mso-fareast-language:en-us;}
  354. a:link, span.msohyperlink
  355. {mso-style-priority:99;
  356. color:blue;
  357. text-decoration:underline;}
  358. a:visited, span.msohyperlinkfollowed
  359. {mso-style-priority:99;
  360. color:purple;
  361. text-decoration:underline;}
  362. p.msoacetate, li.msoacetate, div.msoacetate
  363. {mso-style-priority:99;
  364. mso-style-link:"sprechblasentext zchn";
  365. margin:0cm;
  366. margin-bottom:.0001pt;
  367. font-size:8.0pt;
  368. font-family:"tahoma","sans-serif";
  369. mso-fareast-language:en-us;}
  370. span.e-mailformatvorlage17
  371. {mso-style-type:personal;
  372. font-family:"calibri","sans-serif";
  373. color:windowtext;}
  374. span.sprechblasentextzchn
  375. {mso-style-name:"sprechblasentext zchn";
  376. mso-style-priority:99;
  377. mso-style-link:sprechblasentext;
  378. font-family:"tahoma","sans-serif";}
  379. .msochpdefault
  380. {mso-style-type:export-only;
  381. font-family:"calibri","sans-serif";
  382. mso-fareast-language:en-us;}
  383. @page wordsection1
  384. {size:612.0pt 792.0pt;
  385. margin:70.85pt 70.85pt 2.0cm 70.85pt;}
  386. div.wordsection1
  387. {page:wordsection1;}
  388. --></style><!--[if gte mso 9]><xml>
  389. <o:shapedefaults v:ext="edit" spidmax="1026" />
  390. </xml><![endif]--><!--[if gte mso 9]><xml>
  391. <o:shapelayout v:ext="edit">
  392. <o:idmap v:ext="edit" data="1" />
  393. </o:shapelayout></xml><![endif]-->
  394. HTML
  395. IT-Infrastruktur
  396. TEXT
  397. end
  398. it 'separates block-level elements by one newline (<p> following a non-<p> block gets two)' do
  399. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  400. <h1>some head</h1>
  401. some content
  402. <blockquote>
  403. <p>line 1</p>
  404. <p>line 2</p>
  405. </blockquote>
  406. <p>some text later</p>
  407. HTML
  408. some head
  409. some content
  410. > line 1
  411. > line 2
  412. some text later
  413. TEXT
  414. end
  415. it 'formats <blockquote> contents with leading "> "' do
  416. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  417. <h1>some head</h1>
  418. some content
  419. <blockquote>
  420. line 1<br/>
  421. line 2<br>
  422. </blockquote>
  423. <p>some text later</p>
  424. HTML
  425. some head
  426. some content
  427. > line 1
  428. > line 2
  429. some text later
  430. TEXT
  431. end
  432. it 'adds max. 2 newlines between block-level <blockquote> contents' do
  433. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  434. <h1>some head</h1>
  435. some content
  436. <blockquote>
  437. <div><div>line 1</div><br></div>
  438. <div><div>line 2</div><br></div>
  439. </blockquote>
  440. some text later
  441. HTML
  442. some head
  443. some content
  444. > line 1
  445. >
  446. > line 2
  447. some text later
  448. TEXT
  449. end
  450. it 'places numerical <a> references at end of text string' do
  451. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  452. <p>Best regards,</p>
  453. <p><i>Your Team Team</i></p>
  454. <p>P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click
  455. <a href="http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx">here</a> to unsubscribe from further e-mails.</p>
  456. -----------------------------
  457. <br />
  458. HTML
  459. Best regards,
  460. Your Team Team
  461. P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click [1] here to unsubscribe from further e-mails.
  462. -----------------------------
  463. [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx
  464. TEXT
  465. end
  466. it 'handles elements with missing closing tags' do
  467. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  468. <div><br>Dave and leaned her
  469. days adam.</div><span style="color:#F7F3FF; font-size:8px">Maybe we
  470. want any help me that.<br>Next morning charlie saw at their
  471. father.<br>Well as though adam took out here. Melvin will be more money.\u0020
  472. Called him into this one last thing.<br>Men-----------------------
  473. <br />
  474. HTML
  475. Dave and leaned her days adam.
  476. Maybe we want any help me that.
  477. Next morning charlie saw at their father.
  478. Well as though adam took out here. Melvin will be more money. Called him into this one last thing.
  479. Men-----------------------
  480. TEXT
  481. end
  482. context 'performance tests' do
  483. let(:filler) do
  484. %(<p>some word <a href="http://example.com?domain?example.com">some url</a> and the end.</p>\n) * 11 + "\n"
  485. end
  486. it 'converts a 1076-byte unicode file in under 2s' do
  487. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  488. <html>
  489. <title>some title</title>
  490. <body>
  491. <div>hello</div>
  492. #{filler}
  493. </body>
  494. </html>
  495. HTML
  496. end
  497. it 'converts a 2.21 MiB unicode file in under 2s' do
  498. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  499. <html>
  500. <title>some title</title>
  501. <body>
  502. <div>hello</div>
  503. #{filler * 2312}
  504. </body>
  505. </html>
  506. HTML
  507. end
  508. end
  509. end
  510. describe '#html2html_strict' do
  511. it 'leaves human-readable text as-is' do
  512. expect('test'.html2html_strict).to eq('test')
  513. end
  514. it 'strips leading/trailing spaces' do
  515. expect(' test '.html2html_strict).to eq('test')
  516. end
  517. it 'also strips leading/trailing newlines' do
  518. expect("\n\n test \n\n\n".html2html_strict).to eq('test')
  519. end
  520. it 'also strips leading <br>' do
  521. expect('<br><br><div>abc</div>'.html2html_strict).to eq('<div>abc</div>')
  522. end
  523. it 'also strips trailing <br> & spaces' do
  524. expect('<div>abc</div><br> <br>'.html2html_strict).to eq('<div>abc</div>')
  525. end
  526. it 'leaves <b> as-is' do
  527. expect('<b>test</b>'.html2html_strict).to eq('<b>test</b>')
  528. end
  529. it 'downcases tag names' do
  530. expect('<B>test</B>'.html2html_strict).to eq('<b>test</b>')
  531. end
  532. it 'leaves <i> as-is' do
  533. expect('<i>test</i>'.html2html_strict).to eq('<i>test</i>')
  534. end
  535. it 'leaves <h1> as-is' do
  536. expect('<h1>test</h1>'.html2html_strict).to eq('<h1>test</h1>')
  537. end
  538. it 'leaves <h2> as-is' do
  539. expect('<h2>test</h2>'.html2html_strict).to eq('<h2>test</h2>')
  540. end
  541. it 'leaves <h3> as-is' do
  542. expect('<h3>test</h3>'.html2html_strict).to eq('<h3>test</h3>')
  543. end
  544. it 'leaves <pre> as-is' do
  545. expect("<pre>a\nb\nc</pre>".html2html_strict).to eq("<pre>a\nb\nc</pre>")
  546. end
  547. it 'leaves <pre> nested inside <div> as-is' do
  548. expect("<div><pre>a\nb\nc</pre></div>".html2html_strict).to eq("<div><pre>a\nb\nc</pre></div>")
  549. end
  550. it 'strips HTML comments' do
  551. expect('<h3>test</h3><!-- some comment -->'.html2html_strict).to eq('<h3>test</h3>')
  552. end
  553. it 'strips <html>/<body> tags & <head> elements' do
  554. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  555. <html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hello Martin,<o:p></o:p></span></div>
  556. HTML
  557. <div>Hello Martin,</div>
  558. TEXT
  559. end
  560. it 'strips <span> tags' do
  561. expect('<span></span>'.html2html_strict).to eq('')
  562. end
  563. it 'strips <span> tags, id/class attrs, and <o:*> (MS Office) tags' do
  564. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  565. <div id="123" class="WordSection1">
  566. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau Koppenhagen,<o:p></o:p></span></p>
  567. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  568. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  569. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  570. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  571. <o:p></o:p></span></p>
  572. <div>
  573. HTML
  574. <div>
  575. <p>Guten Morgen, Frau Koppenhagen,</p><p>&nbsp;</p><p>vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</p><p>&nbsp;</p><p>Nochmals vielen Dank und herzliche Grüße</p></div>
  576. TEXT
  577. end
  578. it 'strips <font> tags' do
  579. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  580. <p><font size="2"><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></font></p>
  581. HTML
  582. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  583. TEXT
  584. end
  585. it 'strips extraneous whitespace from end of opening tag' do
  586. expect('<b >test</b>'.html2html_strict).to eq('<b>test</b>')
  587. end
  588. it 'strips extraneous whitespace from closing tag' do
  589. expect('<b >test</b >'.html2html_strict).to eq('<b>test</b>')
  590. end
  591. it 'does not detect < /b > as closing tag; converts chars and auto-closes tag' do
  592. expect('<b >test< /b >'.html2html_strict).to eq('<b>test&lt; /b &gt;</b>')
  593. end
  594. it 'does not detect <\n/b> as closing tag; converts chars and auto-closes tag' do
  595. expect("<b\n>test<\n/b>".html2html_strict).to eq('<b>test&lt; /b&gt;</b>')
  596. end
  597. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  598. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  599. <p> </p><p> </p><p> </p>
  600. HTML
  601. <p>&nbsp;</p>
  602. TEXT
  603. end
  604. it 'keeps lang attr on <p>' do
  605. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  606. <p lang="DE"><b><span></span></b></p>
  607. HTML
  608. <p lang="DE"></p>
  609. TEXT
  610. end
  611. it 'strips <span> inside <p>' do
  612. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  613. <p lang="DE"><b><span>Hello Martin,</span></b></p>
  614. HTML
  615. <p lang="DE"><b>Hello Martin,</b></p>
  616. TEXT
  617. end
  618. it 'strips <br> between <p>' do
  619. expect('<p>&nbsp;</p><br><br><p>&nbsp;</p>'.html2html_strict).to eq('<p>&nbsp;</p><p>&nbsp;</p>')
  620. end
  621. it 'auto-adds missing closing brackets on tags, but not opening brackets' do
  622. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  623. <b id=123 classs="
  624. some_class"
  625. >test<
  626. /b>
  627. HTML
  628. <b>test&lt; /b&gt;</b>
  629. TEXT
  630. end
  631. it 'auto-adds missing closing tags' do
  632. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  633. <ul id=123 classs="
  634. some_class"
  635. ><li>test</li>
  636. <li class="asasd">test</li><
  637. /ul>
  638. HTML
  639. <ul>
  640. <li>test</li>
  641. <li>test</li>&lt; /ul&gt;</ul>
  642. TEXT
  643. end
  644. it 'auto-closes <div> with missing closing tag; removes </p> with missing opening tag' do
  645. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  646. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" class="">Link des Adventkalenders</a> in<br class="">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class="">&nbsp;
  647. HTML
  648. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" rel="nofollow noreferrer noopener" target="_blank" title="http://newsletters.cylex.de/">Link des Adventkalenders</a> in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>
  649. TEXT
  650. end
  651. it 'intelligently inserts missing </td> & </tr> tags (and ignores misplaced </table> tags)' do
  652. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  653. <table>
  654. <tr>
  655. <td bgcolor=white><font size=2 face="sans-serif"><b>Franz Schäfer</b></font>
  656. <tr>
  657. <td bgcolor=white><font size=2 face="sans-serif">Manager Information Systems</font></table>
  658. <br>
  659. <table>
  660. <tr>
  661. <td bgcolor=white><font size=2 face="sans-serif">Telefon &nbsp;</font>
  662. <td bgcolor=white><font size=2 face="sans-serif">+49 000 000 8565</font>
  663. <tr>
  664. <td colspan=2 bgcolor=white><font size=2 face="sans-serif">christian.schaefer@example.com</font></table>
  665. <br>
  666. <table>
  667. HTML
  668. <table>
  669. <tr>
  670. <td>
  671. <b>Franz Schäfer</b>
  672. </td>
  673. </tr>
  674. <tr>
  675. <td>Manager Information Systems</td>
  676. </tr>
  677. </table>
  678. <br>
  679. <table>
  680. <tr>
  681. <td> Telefon </td>
  682. <td> +49 000 000 8565 </td>
  683. </tr>
  684. <tr>
  685. <td colspan="2">christian.schaefer@example.com</td>
  686. </tr>
  687. </table>
  688. TEXT
  689. end
  690. it 'ignores invalid (misspelled) attrs' do
  691. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  692. <b id=123 classs="
  693. some_class">test</b>
  694. HTML
  695. <b>test</b>
  696. TEXT
  697. end
  698. it 'strips incomplete CSS rules' do
  699. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  700. <p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>
  701. HTML
  702. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  703. TEXT
  704. end
  705. context 'for whitespace-only <div>' do
  706. it 'preserves a single space' do
  707. expect('<div> </div>'.html2html_strict).to eq('<div> </div>')
  708. end
  709. it 'converts a lone <br> to &nbsp;' do
  710. expect('<div><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  711. end
  712. it 'converts three <br> to one &nbsp;' do
  713. expect('<div style="max-width: 600px;"><br><br><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  714. end
  715. it 'collapses two nested, whitespace-only <div> into a single &nbsp;' do
  716. expect('<div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  717. end
  718. it 'collapses three nested, whitespace-only <div> into a single &nbsp;' do
  719. expect('<div><div> </div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  720. end
  721. it 'collapses 2+ nested, whitespace-only <p> into \n<p>&nbsp;</p>' do
  722. expect('<div><p> </p><p> </p></div>'.html2html_strict).to eq("<div>\n<p>&nbsp;</p></div>")
  723. end
  724. end
  725. context 'for <div> with content' do
  726. it 'also strips trailing/leading newlines inside <div>' do
  727. expect("<div>\n\n\ntest\n\n\n</div>".html2html_strict).to eq('<div>test</div>')
  728. end
  729. it 'also strips trailing/leading newlines & tabs inside <div>' do
  730. expect("<div>\n\t\ntest\n\t\n</div>".html2html_strict).to eq('<div>test</div>')
  731. end
  732. it 'also strips trailing/leading newlines & tabs inside <div>, but not internal spaces' do
  733. expect("<div>\n\t\ntest 123\n\t\n</div>".html2html_strict).to eq('<div>test 123</div>')
  734. end
  735. it 'strips newlines from trailing whitespace; leaves up to two <br> (with spaces) as-is' do
  736. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  737. <div>
  738. <br> <p><b>Description</b></p>
  739. <br> <br> </div>
  740. HTML
  741. <div>
  742. <br> <p><b>Description</b></p><br> <br> </div>
  743. TEXT
  744. end
  745. it 'strips newlines from trailing whitespace; collapses 3+ <br> into two' do
  746. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  747. <div>
  748. <br> <p><b>Description</b></p>
  749. <br> <br> <br> </div>
  750. HTML
  751. <div>
  752. <br> <p><b>Description</b></p><br><br></div>
  753. TEXT
  754. end
  755. it 'removes unnecessary <div> nesting' do
  756. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  757. <div><div>Hello Martin,</div></div>
  758. HTML
  759. <div>Hello Martin,</div>
  760. TEXT
  761. end
  762. it 'keeps innermost <div> when removing nesting' do
  763. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  764. <div lang="DE"><div><div>Hello Martin,</div></div></div>
  765. HTML
  766. <div>Hello Martin,</div>
  767. TEXT
  768. end
  769. it 'rearranges whitespace in nested <div>' do
  770. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  771. <div lang="DE"><div><div>Hello Martin,</div> </div></div>
  772. HTML
  773. <div>
  774. <div>Hello Martin,</div></div>
  775. TEXT
  776. end
  777. it 'adds newline where <br> starts or ends <div> content' do
  778. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  779. <div style="max-width: 600px;"><br>abc<br><br></div>
  780. HTML
  781. <div>
  782. <br>abc<br><br>
  783. </div>
  784. TEXT
  785. end
  786. it 'leaves <s> nested in <div> as-is (?)' do
  787. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  788. <div><s>abc</s></div>
  789. HTML
  790. <div><s>abc</s></div>
  791. TEXT
  792. end
  793. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  794. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  795. <div><p> </p>
  796. <p> </p>
  797. <p> </p>
  798. </div>
  799. HTML
  800. <div>
  801. <p>&nbsp;</p></div>
  802. TEXT
  803. end
  804. it 'strips <div> tags when they contain only <p>' do
  805. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  806. <div>lala<div lang="DE"><p><span>Hello Martin,</span></p></div></div>
  807. HTML
  808. <div>lala<p>Hello Martin,</p></div>
  809. TEXT
  810. end
  811. end
  812. context 'link handling' do
  813. it 'adds rel & target attrs to <a> tags' do
  814. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  815. <a href="http://web.de">web.de</a>
  816. HTML
  817. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  818. TEXT
  819. end
  820. it 'removes id attrs' do
  821. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  822. <a id="123" href="http://web.de">web.de</a>
  823. HTML
  824. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  825. TEXT
  826. end
  827. it 'removes class/id attrs' do
  828. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  829. <a href="http://example.com" class="abc" id="123">http://example.com</a>
  830. HTML
  831. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  832. TEXT
  833. end
  834. it 'downcases <a> tags' do
  835. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  836. <A href="http://example.com?a=1;">http://example.com?a=1;</A>
  837. HTML
  838. <a href="http://example.com?a=1;" rel="nofollow noreferrer noopener" target="_blank">http://example.com?a=1;</a>
  839. TEXT
  840. end
  841. it 'doesn’t downcase href attr or inner text' do
  842. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  843. <A href="http://example.com/withSoMeUpper/And/downCase">http://example.com/withSoMeUpper/And/downCase</A>
  844. HTML
  845. <a href="http://example.com/withSoMeUpper/And/downCase" rel="nofollow noreferrer noopener" target="_blank">http://example.com/withSoMeUpper/And/downCase</a>
  846. TEXT
  847. end
  848. it 'automatically wraps <a> tags around valid URLs' do
  849. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  850. <div>https://www.facebook.com/test</div>
  851. HTML
  852. <div>\n<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>\n</div>
  853. TEXT
  854. end
  855. it 'does not wrap URLs if leading https?:// is missing' do
  856. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  857. some text www.example.com some other text
  858. HTML
  859. some text www.example.com some other text
  860. TEXT
  861. end
  862. it 'adds missing http:// to href attr (but not inner text)' do
  863. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  864. web <a href="www.example.com"><span style="color:blue">www.example.com</span></a>
  865. HTML
  866. web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  867. TEXT
  868. end
  869. it 'includes URL parameters when wrapping URL in <a> tag' do
  870. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  871. <p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>
  872. HTML
  873. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap</a></p>
  874. TEXT
  875. end
  876. it 'does not rewrap valid URLs that already have <a> tags' do
  877. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  878. <a href="http://example.com">http://example.com</a>
  879. HTML
  880. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  881. TEXT
  882. end
  883. it 'recognizes URL parameters when matching href to inner text' do
  884. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  885. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  886. HTML
  887. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  888. TEXT
  889. end
  890. it 'recognizes <br> as URL boundary' do
  891. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  892. <div><br>https://www.facebook.com/test<br></div>
  893. HTML
  894. <div>
  895. <br><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a><br>\n</div>
  896. TEXT
  897. end
  898. it 'recognizes space as URL boundary' do
  899. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  900. some text http://example.com some other text
  901. HTML
  902. some text <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a> some other text
  903. TEXT
  904. end
  905. it 'wraps valid URLs from <div> elements in <a> tags' do
  906. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  907. <div>http://example.com</div>
  908. HTML
  909. <div>
  910. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  911. </div>
  912. TEXT
  913. end
  914. it 'recognizes trailing dot as URL boundary' do
  915. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  916. <div>http://example.com.</div>
  917. HTML
  918. <div>
  919. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  920. TEXT
  921. end
  922. it 'does not add a leading newline if <div> begins with non-URL text' do
  923. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  924. <div>lala http://example.com.</div>
  925. HTML
  926. <div>lala <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  927. TEXT
  928. end
  929. it 'recognizes trailing comma as URL boundary' do
  930. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  931. <div>http://example.com, and so on</div>
  932. HTML
  933. <div>
  934. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>, and so on</div>
  935. TEXT
  936. end
  937. it 'recognizes trailing comma as URL boundary (immediately following URL parameters)' do
  938. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  939. <div>http://example.com?lala=me, and so on</div>
  940. HTML
  941. <div>
  942. <a href="http://example.com?lala=me" rel="nofollow noreferrer noopener" target="_blank">http://example.com?lala=me</a>, and so on</div>
  943. TEXT
  944. end
  945. it 'strips <a> tags when no href is present' do
  946. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  947. <a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>
  948. HTML
  949. Hello Mr Smith,
  950. TEXT
  951. end
  952. context 'when <a> inner text is HTML elements' do
  953. it 'leaves <img> elements as-is' do
  954. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  955. <a href="http://example.com/?abc=123&123=abc" class="abc\n"\n><img src="cid:123"></a>
  956. HTML
  957. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com/?abc=123&amp;123=abc"><img src="cid:123"></a>
  958. TEXT
  959. end
  960. it 'strips <span> tags, but not content' do
  961. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  962. <a href="http://facebook.de/examplesrbog"><span lang="EN-US" style='color:blue'>http://facebook.de/examplesrbog</span></a>
  963. HTML
  964. <a href="http://facebook.de/examplesrbog" rel="nofollow noreferrer noopener" target="_blank">http://facebook.de/examplesrbog</a>
  965. TEXT
  966. end
  967. it 'also strips surrounding <span> and <o:p> tags' do
  968. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  969. <span style="font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  970. <a href="http://www.example.com"><span style="color:blue">www.example.com</span></a><o:p></o:p></span>
  971. HTML
  972. web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  973. TEXT
  974. end
  975. end
  976. context 'when <a> inner text and href do not match' do
  977. it 'adds title attr' do
  978. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  979. <a href="http://example.com">http://what-different.example.com</a>
  980. HTML
  981. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com">http://what-different.example.com</a>
  982. TEXT
  983. end
  984. it 'converts unsafe characters in href attr and title' do
  985. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  986. <a href="http://example.com %22test%22">http://what-different.example.com</a>
  987. HTML
  988. <a href="http://example.com%20%22test%22" rel="nofollow noreferrer noopener" target="_blank" title='http://example.com "test"'>http://what-different.example.com</a>
  989. TEXT
  990. end
  991. it 'does not add title attr (for different capitalization)' do
  992. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  993. <a href="http://example.com">http://EXAMPLE.com</a>
  994. HTML
  995. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://EXAMPLE.com</a>
  996. TEXT
  997. end
  998. it 'does not add title attr (for trailing slash)' do
  999. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1000. <a href="http://example.com/" class="abc">http://example.com</a>
  1001. HTML
  1002. <a href="http://example.com/" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  1003. TEXT
  1004. end
  1005. it 'does not add title attr (for trailing slash and newline)' do
  1006. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1007. <a href="http://example.com/\n" class="abc">http://example.com</a>
  1008. HTML
  1009. <a href="http://example.com/" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  1010. TEXT
  1011. end
  1012. it 'does not add title attr (for trailing slash, newline, and space)' do
  1013. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1014. <a href="http://example.com/\n " class="abc
  1015. ">http://example.com</a>
  1016. HTML
  1017. <a href="http://example.com/" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  1018. TEXT
  1019. end
  1020. it 'does not add title attr (for URL-safe/unsafe characters)' do
  1021. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1022. <a href="http://example.com/?abc=123&123=abc">http://example.com?abc=123&amp;123=abc</a>
  1023. HTML
  1024. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank">http://example.com?abc=123&amp;123=abc</a>
  1025. TEXT
  1026. end
  1027. end
  1028. context 'for email links' do
  1029. it 'strips <a> tags' do
  1030. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1031. <a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1032. HTML
  1033. john.smith@example.com
  1034. TEXT
  1035. end
  1036. it 'strips <a> tags (even with upcased "MAILTO:")' do
  1037. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1038. <a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1039. HTML
  1040. john.smith@example.com
  1041. TEXT
  1042. end
  1043. it 'extracts destination address when it differs from <a> innertext' do
  1044. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1045. <a href="MAILTO:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1046. HTML
  1047. john.smith2@example.com
  1048. TEXT
  1049. end
  1050. end
  1051. end
  1052. context 'for <img> tags' do
  1053. it 'removes color CSS rule from style attr' do
  1054. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1055. <img src="/some.png" style="color: blue; width: 30px; height: 50px">
  1056. HTML
  1057. <img src="/some.png" style=" width: 30px; height: 50px;">
  1058. TEXT
  1059. end
  1060. it 'converts width/height attrs to CSS rules' do
  1061. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1062. <img src="/some.png" width="30px" height="50px">
  1063. HTML
  1064. <img src="/some.png" style="width:30px;height:50px;">
  1065. TEXT
  1066. end
  1067. it 'automatically adds terminal semicolons to CSS rules' do
  1068. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1069. <img style="width: 181px; height: 125px" src="...">
  1070. HTML
  1071. <img style="width: 181px; height: 125px;" src="...">
  1072. TEXT
  1073. end
  1074. context 'when <img> nested in <a>, nested in <p>' do
  1075. it 'sanitizes those elements as normal' do
  1076. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1077. <p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>
  1078. HTML
  1079. <p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>
  1080. TEXT
  1081. end
  1082. end
  1083. end
  1084. context 'sample email input' do
  1085. it 'handles sample input 1' do
  1086. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1087. <div>
  1088. abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1089. HTML
  1090. <div>abc<span class=\"js-signatureMarker\"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1091. TEXT
  1092. end
  1093. it 'handles sample input 2' do
  1094. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1095. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1096. HTML
  1097. <div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1098. TEXT
  1099. end
  1100. it 'handles sample input 3' do
  1101. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1102. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p> </div>
  1103. HTML
  1104. <div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1105. TEXT
  1106. end
  1107. it 'handles sample input 4' do
  1108. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1109. <div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Mit freundlichem Gruß<span class="Apple-converted-space">&nbsp;</span><br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.<o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="mailto:john.smith@example.com" style=color: blue; text-decoration: underline; ">john.smith@example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="http://www.example.com" style="color: blue; text-decoration: underline; ">www.example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div>
  1110. HTML
  1111. <div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co. </div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>
  1112. </div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472 </div><div>john.smith@example.com</div><div>
  1113. <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  1114. </div>
  1115. TEXT
  1116. end
  1117. it 'handles sample input 5' do
  1118. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1119. <body lang="DE" link="blue" vlink="purple"><div class="WordSection1">
  1120. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau ABC,<o:p></o:p></span></p>
  1121. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1122. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  1123. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1124. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  1125. <o:p></o:p></span></p>
  1126. <div>
  1127. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D"><o:p>&nbsp;</o:p></span></b></p>
  1128. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">Anna Smith<o:p></o:p></span></b></p>
  1129. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc SEV GmbH<o:p></o:p></span></b></p>
  1130. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc TRAV<o:p></o:p></span></b></p>
  1131. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">Marktstätte 123<o:p></o:p></span></p>
  1132. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">123456 Dorten<o:p></o:p></span></p>
  1133. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-1<o:p></o:p></span></p>
  1134. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-0<o:p></o:p></span></p>
  1135. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">F: &#43;49 (0) 12345/1234560-2<o:p></o:p></span></p>
  1136. <p class="MsoNormal"><a href="mailto:annad@example.com"><span style="font-size:9.0pt">annad@example.com</span></a><span style="font-size:9.0pt;color:#C00000"><o:p></o:p></span></p>
  1137. <p class="MsoNormal"><a href="http://www.example.com/"><span style="font-size:9.0pt">www.example.com</span></a><span style="font-size:9.0pt;color:#1F497D">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1138. </span><a href="http://www.ABC.com/"><span style="font-size:9.0pt">www.ABC.com</span></a><span style="font-size:9.0pt;color:#1F497D"><o:p></o:p></span></p>
  1139. <p class="MsoNormal"><span style="font-size:8.0pt;color:#1F497D">Geschäftsführer Vor Nach, VorUndZu Nach&nbsp;&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp;&nbsp; Amtsgericht Dort HRB 12345&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp; Ein Unternehmer der ABC Gruppe<o:p></o:p></span></p>
  1140. HTML
  1141. <div>
  1142. <p>Guten Morgen, Frau ABC,</p><p>&nbsp;</p><p>vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</p><p>&nbsp;</p><p>Nochmals vielen Dank und herzliche Grüße</p><div> <p>&nbsp;</p><p><b>Anna Smith</b></p><p><b>art abc SEV GmbH</b></p><p><b>art abc TRAV</b></p><p>Marktstätte 123</p><p>123456 Dorten</p><p>T: +49 (0) 12345/1234560-1</p><p>T: +49 (0) 12345/1234560-0</p><p>F: +49 (0) 12345/1234560-2</p><p>annad@example.com</p><p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a> <a href="http://www.ABC.com/" rel="nofollow noreferrer noopener" target="_blank">www.ABC.com</a></p><p>Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe</p></div></div>
  1143. TEXT
  1144. end
  1145. it 'handles sample input 6' do
  1146. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1147. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1148. <div>
  1149. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1150. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">Von:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1151. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1152. <b>An:</b> \'Amaia Epalza\'<br>
  1153. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017<o:p></o:p></span></p>
  1154. </div>
  1155. </div>
  1156. <p class="MsoNormal"><o:p>&nbsp;</o:p></p>
  1157. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am
  1158. </span></b><o:p></o:p></p>
  1159. <p class="MsoNormal"><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">&nbsp;</span><o:p></o:p></p>
  1160. <p class="MsoNormal">Guten Morgen Frau Epalza,<o:p></o:p></p>
  1161. HTML
  1162. <p>&nbsp;</p><div>
  1163. <div>
  1164. <span class="js-signatureMarker"></span><p><b>Von:</b> Besucherbüro, MKuk [besucherbuero@example.com] <br>
  1165. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1166. <b>An:</b> 'Amaia Epalza'<br>
  1167. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017</p></div></div><p>&nbsp;</p><p><b>Reservierungsbestätigung Führung Skulptur-Projekte 2017 am </b></p><p>&nbsp;</p><p>Guten Morgen Frau Epalza,</p>
  1168. TEXT
  1169. end
  1170. it 'handles sample input 7' do
  1171. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1172. <div class="">Wir brauchen also die Instanz <a href="http://example.zammad.com" class="">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div class=""><br class=""></div><div class="">Bitte um Freischaltung.</div><div class=""><br class=""></div><div class=""><br class=""><div class="">
  1173. HTML
  1174. <div>Wir brauchen also die Instanz <a href="http://example.zammad.com" rel="nofollow noreferrer noopener" target="_blank">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div>&nbsp;</div><div>Bitte um Freischaltung.</div><div>&nbsp;</div>
  1175. TEXT
  1176. end
  1177. it 'handles sample input 8' do
  1178. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1179. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US">oh jeee … Zauberwort vergessen ;-) Können Sie mir
  1180. <b>bitte</b> noch meine Testphase verlängern?<o:p></o:p></span></p>
  1181. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US"><o:p>&nbsp;</o:p></span></p>
  1182. HTML
  1183. <p>oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</p><p>&nbsp;</p>
  1184. TEXT
  1185. end
  1186. it 'handles sample input 9' do
  1187. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1188. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1189. HTML
  1190. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1191. TEXT
  1192. end
  1193. it 'handles sample input 10' do
  1194. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1195. <tr style="height: 15pt;" class=""><td width="170" nowrap="" valign="bottom" style="width: 127.5pt; border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt; height: 15pt;" class=""><p class="MsoNormal" align="center" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;"><span style="" class="">&nbsp;</span></p></td><td width="58" nowrap="" valign="bottom" style="width: 43.5pt; padding: 0cm 5.4pt; height: 15pt;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="" class="">20-29</span></div></td><td width="47" nowrap="" valign="bottom" style="width: 35pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">200</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">-1</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">201</span></div></td><td width="107" nowrap="" valign="bottom" style="width: 80pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="85" nowrap="" valign="bottom" style="width: 64pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="101" nowrap="" valign="bottom" style="width: 76pt; border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><b class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">country</span></b><span style="font-size: 11pt; font-family: Calibri, sans-serif;" class=""></span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Target (gross)</span></div></td><td width="123" nowrap="" valign="bottom" style="width: 92pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Remaining Recruits</span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Total Recruits</span></div></td></tr>
  1196. HTML
  1197. <tr>
  1198. <td valign="bottom" style=" border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt;"><p>&nbsp;</p></td>
  1199. <td valign="bottom" style=" padding: 0cm 5.4pt;"><div>20-29</div></td>
  1200. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><span style="color: rgb(156, 0, 6);">200</span></td>
  1201. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><span style="color: rgb(156, 0, 6);">-1</span></td>
  1202. <td valign="bottom" style=" border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><span style="color: rgb(156, 0, 6);">201</span></td>
  1203. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1204. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1205. <td valign="bottom" style=" border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>
  1206. <b>country</b>
  1207. </div></td>
  1208. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Target (gross)</div></td>
  1209. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Remaining Recruits</div></td>
  1210. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Total Recruits</div></td>
  1211. </tr>
  1212. TEXT
  1213. end
  1214. it 'handles sample input 11' do
  1215. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1216. <div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div>Dear Bob<span style="line-height: 23.8px;">:</span><span style="color: rgb(255, 255, 255); line-height: 1.7;">Mr/Mrs</span></div><div><br></div><div><span style="line-height: 1.7;">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src="cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com" orgwidth="1101" orgheight="637" data-image="1" style="width: 722.7px; height: 418px; border: none;"></div><div>Best regards!</div><div><br></div><div><b style="line-height: 1.7;"><i><u><span lang="EL" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style="position:relative;zoom:1"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: www.example.com</div></div><div style="clear:both"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>
  1217. HTML
  1218. <div>\n<div>Dear Bob:Mr/Mrs</div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src="cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com" style="width: 722.7px; height: 418px;"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u>Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</u></i></b></div><div>\n<div>Bob Smith</div><div>\n<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: www.example.com</div></div></div></div>
  1219. TEXT
  1220. end
  1221. it 'handles sample input 12' do
  1222. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1223. <li><a style="font-size:15px; font-family:Arial;color:#0f7246" class="text_link" href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh"><span style="color: rgb(0, 0, 0);">Luxemburg</span></a></li>
  1224. HTML
  1225. <li><a href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh" rel="nofollow noreferrer noopener" target="_blank" title="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh">Luxemburg</a></li>
  1226. TEXT
  1227. end
  1228. end
  1229. context 'signature recognition' do
  1230. let(:marker) { '<span class="js-signatureMarker"></span>' }
  1231. it 'places marker before "--" line (surrounded by <br>)' do
  1232. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1233. lalala<br>--<br>Max Mix
  1234. HTML
  1235. lalala#{marker}<br>--<br>Max Mix
  1236. TEXT
  1237. end
  1238. it 'places marker before "--" line (surrounded by <br/>)' do
  1239. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1240. lalala<br/>--<br/>Max Mix
  1241. HTML
  1242. lalala#{marker}<br>--<br>Max Mix
  1243. TEXT
  1244. end
  1245. it 'places marker before "--" line (preceded by <br/>\n)' do
  1246. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1247. lalala<br/>
  1248. --<br/>Max Mix
  1249. HTML
  1250. lalala#{marker}<br> --<br>Max Mix
  1251. TEXT
  1252. end
  1253. it 'places marker before "--" line (surrounded by <p>)' do
  1254. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1255. lalala<p>--</p>Max Mix
  1256. HTML
  1257. lalala#{marker}<p>--</p>Max Mix
  1258. TEXT
  1259. end
  1260. it 'places marker before "__" line (surrounded by <br>)' do
  1261. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1262. lalala<br>__<br>Max Mix
  1263. HTML
  1264. lalala#{marker}<br>__<br>Max Mix
  1265. TEXT
  1266. end
  1267. it 'places marker before quoted reply’s "Von:" header (in German)' do
  1268. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1269. den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1270. HTML
  1271. den.<br>#{marker}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1272. TEXT
  1273. end
  1274. it 'places marker before quoted reply’s "Von:" header (as <p> with stripped parent <div>)' do
  1275. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1276. <div><div style="border:none;border-top:solid #e1e1e1 1.0pt;padding:3.0pt 0cm 0cm 0cm"><p class="MsoNormal"><b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif">Von:</span></b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b>\u0020
  1277. HTML
  1278. #{marker}<p><b>Von:</b> Martin Edenhofer via Zammad Helpdesk [mailto:support@example.com] <br><b>Gesendet:</b> </p>
  1279. TEXT
  1280. end
  1281. it 'places marker before quoted reply’s "Von:" header (as <p> with parent <div>)' do
  1282. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1283. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1284. <p class="MsoNormal" style="margin-left:35.4pt"><b><span style="font-family:Calibri;color:black">Von:
  1285. </span></b><span style="font-family:Calibri;color:black">Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
  1286. <b>Organisation: </b>Znuny Group<br>
  1287. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br>
  1288. HTML
  1289. <div>
  1290. #{marker}<p><b>Von: </b>Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
  1291. <b>Organisation: </b>Znuny Group<br>
  1292. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br></p></div>
  1293. TEXT
  1294. end
  1295. it 'places marker before quoted reply’s "Von:" header (as <div>)' do
  1296. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1297. <div><br>
  1298. <br>
  1299. <br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
  1300. &nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
  1301. <br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
  1302. &nbsp;</font></div>
  1303. HTML
  1304. #{marker}<div><br>Von: Hotel &lt;info@example.com&gt; <br>An: </div>
  1305. TEXT
  1306. end
  1307. it 'places marker before English quoted text intro (as <blockquote>)' do
  1308. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1309. <br class=""><div><blockquote type="cite" class=""><div class="">On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com" class="">oliver@example.com</a>&gt; wrote:</div><br class="Apple-interchange-newline">
  1310. HTML
  1311. <div>#{marker}<blockquote type="cite">
  1312. <div>On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;oliver@example.com&gt; wrote:</div><br>
  1313. </blockquote></div>
  1314. TEXT
  1315. end
  1316. it 'does not place marker if blockquote doesn’t contain a quoted text intro' do
  1317. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1318. <br class=""><div><blockquote type="cite" class=""><div class="">some note</div><br class="Apple-interchange-newline">
  1319. HTML
  1320. <div><blockquote type="cite">
  1321. <div>some note</div><br>
  1322. </blockquote></div>
  1323. TEXT
  1324. end
  1325. it 'does not place marker if quoted text intro isn’t followed by a <blockquote>' do
  1326. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1327. <div>
  1328. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1329. <br>
  1330. </div>
  1331. HTML
  1332. <div>
  1333. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1334. <br>
  1335. </div>
  1336. TEXT
  1337. end
  1338. it 'places marker before German quoted text intro (before <blockquote>)' do
  1339. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1340. <div>
  1341. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1342. <br>
  1343. </div>
  1344. <blockquote type="cite">
  1345. <div>Dear Mr. Smith,<br></div>
  1346. </blockquote>
  1347. HTML
  1348. #{marker}<div>
  1349. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1350. <br>
  1351. </div><blockquote type="cite">
  1352. <div>Dear Mr. Smith,<br>
  1353. </div></blockquote>
  1354. TEXT
  1355. end
  1356. end
  1357. end
  1358. describe '#signature_identify' do
  1359. let(:marker) { '######SIGNATURE_MARKER######' }
  1360. context 'with no signature present' do
  1361. it 'leaves string as-is' do
  1362. expect((+'foo').signature_identify('text', true)).to eq('foo')
  1363. end
  1364. end
  1365. context 'with signature present' do
  1366. it 'places marker at start of "--" line' do
  1367. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1368. foo
  1369. --
  1370. bar
  1371. SRC
  1372. foo
  1373. #{marker}--
  1374. bar
  1375. MARKED
  1376. end
  1377. it 'places marker before English quoted text intro' do
  1378. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1379. On 01/04/15 10:55, Bob Smith wrote:
  1380. SRC
  1381. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1382. MARKED
  1383. end
  1384. it 'places marker before German quoted text intro' do
  1385. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1386. Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:
  1387. SRC
  1388. #{marker}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:
  1389. MARKED
  1390. end
  1391. it 'ignores trailing empty line' do
  1392. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1393. test 123
  1394. test 123
  1395. --
  1396. Bob Smith
  1397. SRC
  1398. test 123
  1399. test 123
  1400. #{marker}--
  1401. Bob Smith
  1402. MARKED
  1403. end
  1404. it 'ignores trailing double empty lines' do
  1405. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1406. test 123
  1407. test 123
  1408. --
  1409. Bob Smith
  1410. SRC
  1411. test 123
  1412. test 123
  1413. #{marker}--
  1414. Bob Smith
  1415. MARKED
  1416. end
  1417. it 'ignores leading/trailing empty lines' do
  1418. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1419. test 123\u0020
  1420. 1
  1421. 2
  1422. 3
  1423. 4
  1424. 5
  1425. 6
  1426. 7
  1427. 8
  1428. 9
  1429. --
  1430. Bob Smith
  1431. SRC
  1432. test 123\u0020
  1433. 1
  1434. 2
  1435. 3
  1436. 4
  1437. 5
  1438. 6
  1439. 7
  1440. 8
  1441. 9
  1442. #{marker}--
  1443. Bob Smith
  1444. MARKED
  1445. end
  1446. it 'ignores lines starting with "--" but containing more text' do
  1447. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1448. test 123\u0020
  1449. --no not match--
  1450. --
  1451. Bob Smith
  1452. SRC
  1453. test 123\u0020
  1454. --no not match--
  1455. #{marker}--
  1456. Bob Smith
  1457. MARKED
  1458. end
  1459. it 'places marker at start of " -- " line' do
  1460. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1461. test 123\u0020
  1462. --no not match--
  1463. --\u0020
  1464. Bob Smith
  1465. SRC
  1466. test 123\u0020
  1467. --no not match--
  1468. #{marker} --\u0020
  1469. Bob Smith
  1470. MARKED
  1471. end
  1472. it 'places marker on empty line if possible / only places one marker' do
  1473. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1474. test 123\u0020
  1475. --
  1476. Bob Smith
  1477. --
  1478. Bob Smith
  1479. SRC
  1480. test 123\u0020
  1481. #{marker}
  1482. --
  1483. Bob Smith
  1484. --
  1485. Bob Smith
  1486. MARKED
  1487. end
  1488. context 'for Apple email quote text' do
  1489. context 'in English' do
  1490. it 'places two markers, one before quoted text intro and one at start of "--" line' do
  1491. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1492. test 123\u0020
  1493. --no not match--
  1494. Bob Smith
  1495. On 01/04/15 10:55, Bob Smith wrote:
  1496. lalala
  1497. --
  1498. some test
  1499. SRC
  1500. test 123\u0020
  1501. --no not match--
  1502. Bob Smith
  1503. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1504. lalala
  1505. #{marker}--
  1506. some test
  1507. MARKED
  1508. end
  1509. end
  1510. context 'auf Deutsch' do
  1511. it 'places marker before quoted text intro' do
  1512. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1513. test 123\u0020
  1514. --no not match--
  1515. Bob Smith
  1516. Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1517. lalala
  1518. SRC
  1519. test 123\u0020
  1520. --no not match--
  1521. Bob Smith
  1522. #{marker}Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1523. lalala
  1524. MARKED
  1525. end
  1526. end
  1527. end
  1528. context 'for MS email quote text' do
  1529. context 'in English' do
  1530. it 'places marker before quoted text intro' do
  1531. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1532. test 123test 123\u0020
  1533. --no not match--
  1534. Bob Smith
  1535. From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1536. Sent: Donnerstag, 2. April 2015 10:00
  1537. lalala</div>
  1538. SRC
  1539. test 123test 123\u0020
  1540. --no not match--
  1541. Bob Smith
  1542. #{marker}From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1543. Sent: Donnerstag, 2. April 2015 10:00
  1544. lalala</div>
  1545. MARKED
  1546. end
  1547. end
  1548. context 'auf Deutsch' do
  1549. it 'places marker before quoted text intro' do
  1550. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1551. test 123\u0020
  1552. --no not match--
  1553. Bob Smith
  1554. Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1555. Gesendet: Donnerstag, 2. April 2015 10:00
  1556. Betreff: lalala
  1557. SRC
  1558. test 123\u0020
  1559. --no not match--
  1560. Bob Smith
  1561. #{marker}Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1562. Gesendet: Donnerstag, 2. April 2015 10:00
  1563. Betreff: lalala
  1564. MARKED
  1565. end
  1566. end
  1567. context 'en francais' do
  1568. it 'places marker before quoted text intro' do
  1569. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1570. test 123\u0020
  1571. --no not match--
  1572. Bob Smith
  1573. De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1574. Envoyé : mercredi 29 avril 2015 17:31
  1575. Objet : lalala
  1576. SRC
  1577. test 123\u0020
  1578. --no not match--
  1579. Bob Smith
  1580. #{marker}De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1581. Envoyé : mercredi 29 avril 2015 17:31
  1582. Objet : lalala
  1583. MARKED
  1584. end
  1585. end
  1586. end
  1587. end
  1588. end
  1589. describe '#utf8_encode' do
  1590. context 'on valid, UTF-8-encoded strings' do
  1591. let(:subject) { 'hello' }
  1592. it 'returns an identical copy' do
  1593. expect(subject.utf8_encode).to eq(subject)
  1594. expect(subject.utf8_encode.encoding).to be(subject.encoding)
  1595. expect(subject.utf8_encode).not_to be(subject)
  1596. end
  1597. context 'which are incorrectly set to other, technically valid encodings' do
  1598. let(:subject) { described_class.new('ö', encoding: 'tis-620') }
  1599. it 'sets input encoding to UTF-8 instead of attempting conversion' do
  1600. expect(subject.utf8_encode).to eq(subject.dup.force_encoding('utf-8'))
  1601. end
  1602. end
  1603. end
  1604. context 'on strings in other encodings' do
  1605. let(:subject) { original_string.encode(input_encoding) }
  1606. context 'with no from: option' do
  1607. let(:original_string) { 'Tschüss!' }
  1608. let(:input_encoding) { Encoding::ISO_8859_2 }
  1609. it 'detects the input encoding' do
  1610. expect(subject.utf8_encode).to eq(original_string)
  1611. end
  1612. end
  1613. context 'with a valid from: option' do
  1614. let(:original_string) { 'Tschüss!' }
  1615. let(:input_encoding) { Encoding::ISO_8859_2 }
  1616. it 'uses the specified input encoding' do
  1617. expect(subject.utf8_encode(from: 'iso-8859-2')).to eq(original_string)
  1618. end
  1619. it 'uses any valid input encoding, even if not correct' do
  1620. expect(subject.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!')
  1621. end
  1622. end
  1623. context 'with an invalid from: option' do
  1624. let(:original_string) { '―陈志' }
  1625. let(:input_encoding) { Encoding::GB18030 }
  1626. it 'does not try it' do
  1627. expect { subject.encode('utf-8', 'gb2312') }
  1628. .to raise_error(Encoding::InvalidByteSequenceError)
  1629. expect { subject.utf8_encode(from: 'gb2312') }
  1630. .not_to raise_error
  1631. end
  1632. it 'uses the detected input encoding instead' do
  1633. expect(subject.utf8_encode(from: 'gb2312')).to eq(original_string)
  1634. end
  1635. end
  1636. end
  1637. context 'performance' do
  1638. let(:subject) { original_string.encode(input_encoding) }
  1639. context 'with utf8_encode in iso-8859-1' do
  1640. let(:original_string) { 'äöü0' * 999_999 }
  1641. let(:input_encoding) { Encoding::ISO_8859_1 }
  1642. it 'detects the input encoding' do
  1643. Timeout.timeout(1) do
  1644. expect(subject.utf8_encode(from: 'iso-8859-1')).to eq(original_string)
  1645. end
  1646. end
  1647. end
  1648. context 'with utf8_encode in utf-8' do
  1649. let(:original_string) { 'äöü0' * 999_999 }
  1650. let(:input_encoding) { Encoding::UTF_8 }
  1651. it 'detects the input encoding' do
  1652. Timeout.timeout(1) do
  1653. expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
  1654. end
  1655. end
  1656. end
  1657. context 'with utf8_encode in iso-8859-1 and charset detection' do
  1658. let(:original_string) { 'äöü0' * 199_999 }
  1659. let(:input_encoding) { Encoding::ISO_8859_1 }
  1660. it 'detects the input encoding' do
  1661. Timeout.timeout(18) do
  1662. expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
  1663. end
  1664. end
  1665. end
  1666. end
  1667. end
  1668. end