string_spec.rb 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970
  1. # frozen_string_literal: true
  2. require 'rails_helper'
  3. RSpec.describe String do
  4. describe '#strip' do
  5. context 'default behavior' do
  6. it 'removes leading/trailing spaces' do
  7. expect(' test '.strip).to eq('test')
  8. end
  9. it 'removes trailing newlines' do
  10. expect("test\n".strip).to eq('test')
  11. end
  12. it 'does not remove internal spaces / newlines' do
  13. expect("test \n test".strip).to eq("test \n test")
  14. end
  15. end
  16. context 'monkey-patched behavior' do
  17. it 'removes leading/trailing zero-width spaces, but not internal ones' do
  18. expect(" \r\n test \u{200B} \n test\u{200B} \u{200B}".strip)
  19. .to eq("test \u{200B} \n test")
  20. end
  21. it 'does not break on non-unicode strings' do
  22. expect(described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT').strip)
  23. .to eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  24. end
  25. end
  26. end
  27. describe '#strip!' do
  28. context 'default behavior' do
  29. it 'removes leading/trailing spaces (in place)' do
  30. str = +' test '
  31. expect(str.strip!).to be(str).and eq('test')
  32. end
  33. it 'removes trailing newlines (in place)' do
  34. str = +"test\n"
  35. expect(str.strip!).to be(str).and eq('test')
  36. end
  37. it 'does not remove internal spaces / newlines (in place)' do
  38. str = +"test \n test "
  39. expect(str.strip!).to be(str).and eq(str)
  40. end
  41. end
  42. context 'monkey-patched behavior' do
  43. it 'removes leading/trailing zero-width spaces, but not internal ones (in place)' do
  44. str = +" \r\n test \u{200B} \n test\u{200B} \u{200B}"
  45. expect(str.strip!).to be(str).and eq("test \u{200B} \n test")
  46. end
  47. it 'does not break on invalid-unicode strings (in place)' do
  48. str = described_class.new("\xC2\xA92011 Z ", encoding: 'ASCII-8BIT')
  49. expect(str.strip!)
  50. .to be(str).and eq(described_class.new("\xC2\xA92011 Z", encoding: 'ASCII-8BIT'))
  51. end
  52. end
  53. end
  54. describe '#to_filename' do
  55. it 'does not modify strings in place' do
  56. %w[test Some::File].each do |str|
  57. expect { str.to_filename }.not_to change { str }
  58. end
  59. end
  60. it 'leaves all-downcase strings as-is' do
  61. expect('test'.to_filename).to eq('test')
  62. end
  63. it 'converts camelcase Ruby constant paths to snakecase file paths' do
  64. expect('Some::File'.to_filename).to eq('some/file')
  65. end
  66. end
  67. describe '#to_classname' do
  68. it 'does not modify strings in place' do
  69. %w[test some/file].each do |str|
  70. expect { str.to_classname }.not_to change { str }
  71. end
  72. end
  73. it 'capitalizes all-downcase strings' do
  74. expect('test'.to_classname).to eq('Test')
  75. end
  76. it 'converts snakecase file paths to camelcase Ruby constant paths' do
  77. expect('some/file'.to_classname).to eq('Some::File')
  78. end
  79. context 'unlike ActiveSupport’s #classify' do
  80. it 'preserves pluralized names' do
  81. expect('some/files'.to_classname).to eq('Some::Files')
  82. expect('some_test/files'.to_classname).to eq('SomeTest::Files')
  83. end
  84. end
  85. end
  86. describe '#html2text' do
  87. it 'does not modify strings in place' do
  88. %w[test <div>test</div>].each do |str|
  89. expect { str.html2text }.not_to change { str }
  90. end
  91. end
  92. it 'leaves human-readable text as-is' do
  93. expect('test'.html2text).to eq('test')
  94. end
  95. it 'strips leading/trailing spaces' do
  96. expect(' test '.html2text).to eq('test')
  97. end
  98. it 'also strips leading/trailing newlines' do
  99. expect("\n\n test \n\n\n".html2text).to eq('test')
  100. end
  101. it 'strips HTML tags around text content' do
  102. expect('<div>test</div>'.html2text).to eq('test')
  103. end
  104. it 'strips trailing <br> inside last <div>' do
  105. expect('<div>test<br></div>'.html2text).to eq('test')
  106. end
  107. it 'strips trailing <br> and newlines inside last <div>' do
  108. expect("<div>test<br><br><br>\n<br>\n<br>\n</div>".html2text).to eq('test')
  109. end
  110. it 'strips trailing <br>, newlines, and spaces inside last <div>' do
  111. expect("<div>test<br><br> <br> \n<br> \n<br> \n</div>".html2text).to eq('test')
  112. end
  113. it 'strips trailing <br>, newlines, and &nbsp; inside last <div>' do
  114. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>".html2text).to eq('test')
  115. end
  116. it 'strips trailing whitespace (including &nbsp; & <br>) both inside and after last tag' do
  117. expect("<div>test<br><br>&nbsp;<br>&nbsp;\n<br>&nbsp;\n<br>&nbsp;\n</div>&nbsp;".html2text).to eq('test')
  118. end
  119. it 'also strips nested HTML tags' do
  120. expect("<p><span>Was\nsoll verbessert werden:</span></p>".html2text)
  121. .to eq('Was soll verbessert werden:')
  122. end
  123. it 'in <pre> elements, collapses multiple newlines into one' do
  124. expect("<pre>test\n\ntest</pre>".html2text).to eq("test\ntest")
  125. end
  126. it 'in <code> elements, collapses multiple newlines into one' do
  127. expect("<code>test\n\ntest</code>".html2text).to eq("test\ntest")
  128. end
  129. it 'converts <table> cells and row to space-separated lines' do
  130. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  131. <table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>
  132. HTML
  133. test col
  134. test 4711
  135. TEXT
  136. end
  137. it 'strips HTML comments' do
  138. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  139. <!-- some comment -->
  140. <div>
  141. test<br><br><br>
  142. <br>
  143. <br>
  144. </div>
  145. HTML
  146. test
  147. TEXT
  148. end
  149. it 'converts <a> elements to plain text with numerical references' do
  150. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  151. <div><a href="https://zammad.org">Best Tool of the World</a>
  152. some other text</div>
  153. <div>
  154. HTML
  155. [1] Best Tool of the Worldsome other text
  156. [1] https://zammad.org
  157. TEXT
  158. end
  159. it 'converts <hr> elements to separate paragraphs containing only "___"' do
  160. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  161. <!-- some comment -->
  162. <div>
  163. test<br><br><br>
  164. <hr/>
  165. <br>
  166. </div>
  167. HTML
  168. test
  169. ___
  170. TEXT
  171. end
  172. it 'converts <br> elements to newlines (max. 2)' do
  173. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  174. test<br><br><br>--<br>abc</div>
  175. HTML
  176. test
  177. --
  178. abc
  179. TEXT
  180. end
  181. it 'strips Microsoft Outlook conditional comments' do
  182. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  183. Ihr RZ-Team<br />
  184. <br />
  185. <!--[if gte mso 9]><xml> <o:DocumentProperties> <o:Author>test</o:Author> =
  186. <o:Template>A75DB76E.dotm</o:Template> <o:LastAuthor>test</o:LastAuthor> =
  187. <o:Revision>5</o:Revision> <o:Created>2011-05-18T07:08:00Z</o:Created> <=
  188. o:LastSaved>2011-07-04T17:59:00Z</o:LastSaved> <o:Pages>1</o:Pages> <o:Wo=
  189. rds>189</o:Words> <o:Characters>1192</o:Characters> <o:Lines>9</o:Lines> =
  190. <o:Paragraphs>2</o:Paragraphs> <o:CharactersWithSpaces>1379</o:Characters=
  191. WithSpaces> <o:Version>11.5606</o:Version> </o:DocumentProperties></xml><!=
  192. [endif]-->
  193. HTML
  194. Ihr RZ-Team
  195. TEXT
  196. end
  197. it 'strips <img> elements' do
  198. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  199. <html>
  200. <head>
  201. <title>Neues Fax von 1234-93900</title>
  202. </head>
  203. <body style="margin: 0px;padding: 0px;font-family: Arial, sans-serif;font-size: 12px;">
  204. <table cellpadding="0" cellspacing="0" width="100%" height="100%" bgcolor="#d9e7f0" id="mailbg"
  205. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 100%;height: 100%;background-color: #d9e7f0;padding: 0px;margin: 0px;">
  206. <tr>
  207. <td valign="top">
  208. <center>
  209. <br><br>
  210. <table width="560" cellpadding="0" cellspacing="0" bgcolor="#FFFFFF" id="mailcontainer"
  211. style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;width: 560px;margin: 0px auto;padding: 0px;background-color: #FFFFFF;">
  212. <tr>
  213. <td colspan="3" width="560" id="mail_header" valign="top" style="width: 560px;background-color: #FFFFFF;font-family: Arial, sans-serif;color: #000000;padding: 0px;margin: 0px;">
  214. <table width="560" cellpadding="0" cellspacing="0" style="empty-cells:show;font-size: 12px;line-height: 18px;color: #000000;font-family: Arial, sans-serif;">
  215. <tr>
  216. <td height="10" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;height:10px;">
  217. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_header.gif" style="padding: 0px;margin: 0px;">
  218. </td>
  219. </tr>
  220. <tr>
  221. <td height="12" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  222. </tr>
  223. <tr>
  224. <td height="27" width="30"> </td>
  225. <td height="27" width="397"><span class="mailtitle" style="font-family: Arial, sans-serif;color: #000000;font-size: 18px;line-height: 18px;font-weight: normal;">Neues Fax</span></td>
  226. <td height="27" width="103"><img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_logo-example.gif" style="padding: 0px;margin: 0px;"></td>
  227. <td height="27" width="30"></td>
  228. </tr>
  229. <tr>
  230. <td height="20" colspan="4"><span style="font-size:0px;line-height:0px;"> </span></td>
  231. </tr>
  232. <tr>
  233. <td height="1" colspan="4" style="font-size:0px;line-height: 0px;padding:0px;">
  234. <img src="http://www.example.docm/static/example.docm/mailtemplates/de_DE/team/img/tpl_line-grey.gif" style="padding: 0px;margin: 0px;">
  235. </td>
  236. </tr>
  237. </table>
  238. </td>
  239. </tr>
  240. <tr>
  241. <td colspan="3" width="560"> </td>
  242. </tr>
  243. <tr>
  244. <td width="30"> </td>
  245. <td width="500" height="30" valign="middle" align="right">
  246. <span class="accountno" style="font-family: Arial, sans-serif;font-size: 10px;color: #666666;">Ihre Kundennummer: 12345678</span>
  247. </td>
  248. <td width="30"> </td>
  249. </tr>
  250. HTML
  251. Neues Fax von 1234-93900
  252. Neues Fax
  253. Ihre Kundennummer: 12345678
  254. TEXT
  255. end
  256. it 'converts characters written in HTML ampersand code' do
  257. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  258. line&nbsp;1<br>
  259. you<br/>
  260. -----&amp;
  261. HTML
  262. line\u00A01
  263. you
  264. -----&
  265. TEXT
  266. end
  267. it 'converts <ul> to asterisk-demarcated list' do
  268. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  269. \u0020<ul><li>#1</li><li>#2</li></ul>
  270. HTML
  271. * #1
  272. * #2
  273. TEXT
  274. end
  275. it 'strips HTML frontmatter and <head> element' do
  276. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  277. <!DOCTYPE html>
  278. <html>
  279. <head>
  280. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
  281. <head>
  282. <body style="font-family:Geneva,Helvetica,Arial,sans-serif; font-size: 12px;">
  283. <div>&gt; Welcome!</div><div>&gt;</div><div>&gt; Thank you for installing Zammad.</div><div>&gt;</div>
  284. </body>
  285. </html>
  286. HTML
  287. > Welcome!
  288. >
  289. > Thank you for installing Zammad.
  290. >
  291. TEXT
  292. end
  293. it 'strips <style> elements' do
  294. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  295. \u0020 <style type="text/css">
  296. body {
  297. width:90% !important;
  298. -webkit-text-size-adjust:90%;
  299. -ms-text-size-adjust:90%;
  300. font-family:\'helvetica neue\', helvetica, arial, geneva, sans-serif; f=
  301. ont-size: 12px;;
  302. }
  303. img {
  304. outline:none; text-decoration:none; -ms-interpolation-mode: bicubic;
  305. }
  306. a img {
  307. border:none;
  308. }
  309. table td {
  310. border-collapse: collapse;
  311. }
  312. table {
  313. border-collapse: collapse; mso-table-lspace:0pt; mso-table-rspace:0pt;
  314. }
  315. p, table, div, td {
  316. max-width: 600px;
  317. }
  318. p {
  319. margin: 0;
  320. }
  321. blockquote, pre {
  322. margin: 0px;
  323. padding: 8px 12px 8px 12px;
  324. }
  325. </style><p>some other content</p>
  326. HTML
  327. some other content
  328. TEXT
  329. end
  330. it 'strips <meta> elements' do
  331. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  332. \u0020 IT-Infrastruktur</span><br>
  333. <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  334. <meta name="Generator" content="Microsoft Word 14 (filtered
  335. medium)">
  336. <!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
  337. o\:* {behavior:url(#default#VML);}
  338. w\:* {behavior:url(#default#VML);}
  339. .shape {behavior:url(#default#VML);}
  340. </style><![endif]-->
  341. <style><!--
  342. @font-face
  343. {font-family:calibri;
  344. panose-1:2 15 5 2 2 2 4 3 2 4;}
  345. @font-face
  346. {font-family:tahoma;
  347. panose-1:2 11 6 4 3 5 4 4 2 4;}
  348. p.msonormal, li.msonormal, div.msonormal
  349. {margin:0cm;
  350. margin-bottom:.0001pt;
  351. font-size:11.0pt;
  352. font-family:"calibri","sans-serif";
  353. mso-fareast-language:en-us;}
  354. a:link, span.msohyperlink
  355. {mso-style-priority:99;
  356. color:blue;
  357. text-decoration:underline;}
  358. a:visited, span.msohyperlinkfollowed
  359. {mso-style-priority:99;
  360. color:purple;
  361. text-decoration:underline;}
  362. p.msoacetate, li.msoacetate, div.msoacetate
  363. {mso-style-priority:99;
  364. mso-style-link:"sprechblasentext zchn";
  365. margin:0cm;
  366. margin-bottom:.0001pt;
  367. font-size:8.0pt;
  368. font-family:"tahoma","sans-serif";
  369. mso-fareast-language:en-us;}
  370. span.e-mailformatvorlage17
  371. {mso-style-type:personal;
  372. font-family:"calibri","sans-serif";
  373. color:windowtext;}
  374. span.sprechblasentextzchn
  375. {mso-style-name:"sprechblasentext zchn";
  376. mso-style-priority:99;
  377. mso-style-link:sprechblasentext;
  378. font-family:"tahoma","sans-serif";}
  379. .msochpdefault
  380. {mso-style-type:export-only;
  381. font-family:"calibri","sans-serif";
  382. mso-fareast-language:en-us;}
  383. @page wordsection1
  384. {size:612.0pt 792.0pt;
  385. margin:70.85pt 70.85pt 2.0cm 70.85pt;}
  386. div.wordsection1
  387. {page:wordsection1;}
  388. --></style><!--[if gte mso 9]><xml>
  389. <o:shapedefaults v:ext="edit" spidmax="1026" />
  390. </xml><![endif]--><!--[if gte mso 9]><xml>
  391. <o:shapelayout v:ext="edit">
  392. <o:idmap v:ext="edit" data="1" />
  393. </o:shapelayout></xml><![endif]-->
  394. HTML
  395. IT-Infrastruktur
  396. TEXT
  397. end
  398. it 'separates block-level elements by one newline (<p> following a non-<p> block gets two)' do
  399. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  400. <h1>some head</h1>
  401. some content
  402. <blockquote>
  403. <p>line 1</p>
  404. <p>line 2</p>
  405. </blockquote>
  406. <p>some text later</p>
  407. HTML
  408. some head
  409. some content
  410. > line 1
  411. > line 2
  412. some text later
  413. TEXT
  414. end
  415. it 'formats <blockquote> contents with leading "> "' do
  416. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  417. <h1>some head</h1>
  418. some content
  419. <blockquote>
  420. line 1<br/>
  421. line 2<br>
  422. </blockquote>
  423. <p>some text later</p>
  424. HTML
  425. some head
  426. some content
  427. > line 1
  428. > line 2
  429. some text later
  430. TEXT
  431. end
  432. it 'adds max. 2 newlines between block-level <blockquote> contents' do
  433. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  434. <h1>some head</h1>
  435. some content
  436. <blockquote>
  437. <div><div>line 1</div><br></div>
  438. <div><div>line 2</div><br></div>
  439. </blockquote>
  440. some text later
  441. HTML
  442. some head
  443. some content
  444. > line 1
  445. >
  446. > line 2
  447. some text later
  448. TEXT
  449. end
  450. it 'places numerical <a> references at end of text string' do
  451. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  452. <p>Best regards,</p>
  453. <p><i>Your Team Team</i></p>
  454. <p>P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click
  455. <a href="http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx">here</a> to unsubscribe from further e-mails.</p>
  456. -----------------------------
  457. <br />
  458. HTML
  459. Best regards,
  460. Your Team Team
  461. P.S.: You receive this e-mail because you are listed in our database as person who ordered a Team license. Please click [1] here to unsubscribe from further e-mails.
  462. -----------------------------
  463. [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx
  464. TEXT
  465. end
  466. it 'handles elements with missing closing tags' do
  467. expect(<<~HTML.chomp.html2text).to eq(<<~TEXT.chomp)
  468. <div><br>Dave and leaned her
  469. days adam.</div><span style="color:#F7F3FF; font-size:8px">Maybe we
  470. want any help me that.<br>Next morning charlie saw at their
  471. father.<br>Well as though adam took out here. Melvin will be more money.\u0020
  472. Called him into this one last thing.<br>Men-----------------------
  473. <br />
  474. HTML
  475. Dave and leaned her days adam.
  476. Maybe we want any help me that.
  477. Next morning charlie saw at their father.
  478. Well as though adam took out here. Melvin will be more money. Called him into this one last thing.
  479. Men-----------------------
  480. TEXT
  481. end
  482. context 'html encoding' do
  483. it 'converts &Auml; in Ä' do
  484. expect('<div>test something.&Auml;</div>'.html2text)
  485. .to eq('test something.Ä')
  486. end
  487. it 'strips invalid html encoding chars' do
  488. expect('<div>test something.&#55357;</div>'.html2text)
  489. .to eq('test something.í ˝')
  490. end
  491. end
  492. context 'performance tests' do
  493. let(:filler) do
  494. %(#{%(<p>some word <a href="http://example.com?domain?example.com">some url</a> and the end.</p>\n) * 11}\n)
  495. end
  496. it 'converts a 1076-byte unicode file in under 2s' do
  497. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  498. <html>
  499. <title>some title</title>
  500. <body>
  501. <div>hello</div>
  502. #{filler}
  503. </body>
  504. </html>
  505. HTML
  506. end
  507. it 'converts a 2.21 MiB unicode file in under 2s' do
  508. expect { Timeout.timeout(2) { <<~HTML.chomp.html2text } }.not_to raise_error
  509. <html>
  510. <title>some title</title>
  511. <body>
  512. <div>hello</div>
  513. #{filler * 2312}
  514. </body>
  515. </html>
  516. HTML
  517. end
  518. end
  519. end
  520. describe '#html2html_strict' do
  521. it 'leaves human-readable text as-is' do
  522. expect('test'.html2html_strict).to eq('test')
  523. end
  524. it 'strips leading/trailing spaces' do
  525. expect(' test '.html2html_strict).to eq('test')
  526. end
  527. it 'also strips leading/trailing newlines' do
  528. expect("\n\n test \n\n\n".html2html_strict).to eq('test')
  529. end
  530. it 'also strips leading <br>' do
  531. expect('<br><br><div>abc</div>'.html2html_strict).to eq('<div>abc</div>')
  532. end
  533. it 'also strips trailing <br> & spaces' do
  534. expect('<div>abc</div><br> <br>'.html2html_strict).to eq('<div>abc</div>')
  535. end
  536. it 'leaves <b> as-is' do
  537. expect('<b>test</b>'.html2html_strict).to eq('<b>test</b>')
  538. end
  539. it 'downcases tag names' do
  540. expect('<B>test</B>'.html2html_strict).to eq('<b>test</b>')
  541. end
  542. it 'leaves <i> as-is' do
  543. expect('<i>test</i>'.html2html_strict).to eq('<i>test</i>')
  544. end
  545. it 'leaves <h1> as-is' do
  546. expect('<h1>test</h1>'.html2html_strict).to eq('<h1>test</h1>')
  547. end
  548. it 'leaves <h2> as-is' do
  549. expect('<h2>test</h2>'.html2html_strict).to eq('<h2>test</h2>')
  550. end
  551. it 'leaves <h3> as-is' do
  552. expect('<h3>test</h3>'.html2html_strict).to eq('<h3>test</h3>')
  553. end
  554. it 'leaves <pre> as-is' do
  555. expect("<pre>a\nb\nc</pre>".html2html_strict).to eq("<pre>a\nb\nc</pre>")
  556. end
  557. it 'leaves <pre> nested inside <div> as-is' do
  558. expect("<div><pre>a\nb\nc</pre></div>".html2html_strict).to eq("<div><pre>a\nb\nc</pre></div>")
  559. end
  560. it 'strips HTML comments' do
  561. expect('<h3>test</h3><!-- some comment -->'.html2html_strict).to eq('<h3>test</h3>')
  562. end
  563. it 'strips <html>/<body> tags & <head> elements' do
  564. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  565. <html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hello Martin,<o:p></o:p></span></div>
  566. HTML
  567. <div lang="DE">Hello Martin,</div>
  568. TEXT
  569. end
  570. it 'strips <span> tags' do
  571. expect('<span></span>'.html2html_strict).to eq('')
  572. end
  573. it 'keeps style with color in <span>' do
  574. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  575. <span style="color: red; bgcolor: red">Hello Martin,</span>
  576. HTML
  577. <span style="color: red;">Hello Martin,</span>
  578. TEXT
  579. end
  580. it 'remove style=#ffffff with color in <span>' do
  581. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  582. <span style="color: #ffffff; bgcolor: red">Hello Martin,</span>
  583. HTML
  584. Hello Martin,
  585. TEXT
  586. end
  587. it 'strips <span> tags, id/class attrs, and <o:*> (MS Office) tags' do
  588. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  589. <div id="123" class="WordSection1">
  590. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau Koppenhagen,<o:p></o:p></span></p>
  591. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  592. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  593. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  594. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  595. <o:p></o:p></span></p>
  596. <div>
  597. HTML
  598. <div>
  599. <p><span style="color:#1f497d;">Guten Morgen, Frau Koppenhagen,</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p></div>
  600. TEXT
  601. end
  602. it 'strips <font> tags' do
  603. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  604. <p><font size="2"><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></font></p>
  605. HTML
  606. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  607. TEXT
  608. end
  609. it 'strips extraneous whitespace from end of opening tag' do
  610. expect('<b >test</b>'.html2html_strict).to eq('<b>test</b>')
  611. end
  612. it 'strips extraneous whitespace from closing tag' do
  613. expect('<b >test</b >'.html2html_strict).to eq('<b>test</b>')
  614. end
  615. it 'does not detect < /b > as closing tag; converts chars and auto-closes tag' do
  616. expect('<b >test< /b >'.html2html_strict).to eq('<b>test&lt; /b &gt;</b>')
  617. end
  618. it 'does not detect <\n/b> as closing tag; converts chars and auto-closes tag' do
  619. expect("<b\n>test<\n/b>".html2html_strict).to eq('<b>test&lt; /b&gt;</b>')
  620. end
  621. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  622. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  623. <p> </p><p> </p><p> </p>
  624. HTML
  625. <p>&nbsp;</p>
  626. TEXT
  627. end
  628. it 'keeps lang attr on <p>' do
  629. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  630. <p lang="DE"><b><span></span></b></p>
  631. HTML
  632. <p lang="DE"></p>
  633. TEXT
  634. end
  635. it 'strips <span> inside <p>' do
  636. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  637. <p lang="DE"><b><span>Hello Martin,</span></b></p>
  638. HTML
  639. <p lang="DE"><b>Hello Martin,</b></p>
  640. TEXT
  641. end
  642. it 'strips empty <p> keep <p>s with content' do
  643. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  644. <p> </p><p>123</p><p></p>
  645. HTML
  646. <p>&nbsp;</p><p>123</p>
  647. TEXT
  648. end
  649. it 'strips <br> between <p>' do
  650. expect('<p>&nbsp;</p><br><br><p>&nbsp;</p>'.html2html_strict).to eq('<p>&nbsp;</p><p>&nbsp;</p>')
  651. end
  652. it 'auto-adds missing closing brackets on tags, but not opening brackets' do
  653. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  654. <b id=123 classs="
  655. some_class"
  656. >test<
  657. /b>
  658. HTML
  659. <b>test&lt; /b&gt;</b>
  660. TEXT
  661. end
  662. it 'auto-adds missing closing tags' do
  663. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  664. <ul id=123 classs="
  665. some_class"
  666. ><li>test</li>
  667. <li class="asasd">test</li><
  668. /ul>
  669. HTML
  670. <ul>
  671. <li>test</li>
  672. <li>test</li>&lt; /ul&gt;</ul>
  673. TEXT
  674. end
  675. it 'auto-closes <div> with missing closing tag; removes </p> with missing opening tag' do
  676. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  677. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" class="">Link des Adventkalenders</a> in<br class="">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class="">&nbsp;
  678. HTML
  679. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href="http://newsletters.cylex.de/" rel="nofollow noreferrer noopener" target="_blank" title="http://newsletters.cylex.de/">Link des Adventkalenders</a> in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>
  680. TEXT
  681. end
  682. it 'intelligently inserts missing </td> & </tr> tags (and ignores misplaced </table> tags)' do
  683. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  684. <table>
  685. <tr>
  686. <td bgcolor=white><font size=2 face="sans-serif"><b>Franz Schäfer</b></font>
  687. <tr>
  688. <td bgcolor=white><font size=2 face="sans-serif">Manager Information Systems</font></table>
  689. <br>
  690. <table>
  691. <tr>
  692. <td bgcolor=white><font size=2 face="sans-serif">Telefon &nbsp;</font>
  693. <td bgcolor=white><font size=2 face="sans-serif">+49 000 000 8565</font>
  694. <tr>
  695. <td colspan=2 bgcolor=white><font size=2 face="sans-serif">christian.schaefer@example.com</font></table>
  696. <br>
  697. <table>
  698. HTML
  699. <table>
  700. <tr>
  701. <td>
  702. <b>Franz Schäfer</b>
  703. </td>
  704. </tr>
  705. <tr>
  706. <td>Manager Information Systems</td>
  707. </tr>
  708. </table>
  709. <br>
  710. <table>
  711. <tr>
  712. <td> Telefon </td>
  713. <td> +49 000 000 8565 </td>
  714. </tr>
  715. <tr>
  716. <td colspan="2">christian.schaefer@example.com</td>
  717. </tr>
  718. </table>
  719. TEXT
  720. end
  721. it 'ignores invalid (misspelled) attrs' do
  722. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  723. <b id=123 classs="
  724. some_class">test</b>
  725. HTML
  726. <b>test</b>
  727. TEXT
  728. end
  729. it 'strips incomplete CSS rules' do
  730. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  731. <p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>
  732. HTML
  733. <p><a href="http://www.example.com/?wm=mail" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>
  734. TEXT
  735. end
  736. context 'for whitespace-only <div>' do
  737. it 'preserves a single space' do
  738. expect('<div> </div>'.html2html_strict).to eq('<div> </div>')
  739. end
  740. it 'converts a lone <br> to &nbsp;' do
  741. expect('<div><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  742. end
  743. it 'converts three <br> to one &nbsp;' do
  744. expect('<div style="max-width: 600px;"><br><br><br></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  745. end
  746. it 'collapses two nested, whitespace-only <div> into a single &nbsp;' do
  747. expect('<div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  748. end
  749. it 'collapses three nested, whitespace-only <div> into a single &nbsp;' do
  750. expect('<div><div> </div><div> </div><div> </div></div>'.html2html_strict).to eq('<div>&nbsp;</div>')
  751. end
  752. it 'collapses 2+ nested, whitespace-only <p> into \n<p>&nbsp;</p>' do
  753. expect('<div><p> </p><p> </p></div>'.html2html_strict).to eq("<div>\n<p>&nbsp;</p></div>")
  754. end
  755. end
  756. context 'for <div> with content' do
  757. it 'also strips trailing/leading newlines inside <div>' do
  758. expect("<div>\n\n\ntest\n\n\n</div>".html2html_strict).to eq('<div>test</div>')
  759. end
  760. it 'also strips trailing/leading newlines & tabs inside <div>' do
  761. expect("<div>\n\t\ntest\n\t\n</div>".html2html_strict).to eq('<div>test</div>')
  762. end
  763. it 'also strips trailing/leading newlines & tabs inside <div>, but not internal spaces' do
  764. expect("<div>\n\t\ntest 123\n\t\n</div>".html2html_strict).to eq('<div>test 123</div>')
  765. end
  766. it 'strips newlines from trailing whitespace; leaves up to two <br> (with spaces) as-is' do
  767. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  768. <div>
  769. <br> <p><b>Description</b></p>
  770. <br> <br> </div>
  771. HTML
  772. <div>
  773. <br> <p><b>Description</b></p><br> <br> </div>
  774. TEXT
  775. end
  776. it 'strips newlines from trailing whitespace; collapses 3+ <br> into two' do
  777. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  778. <div>
  779. <br> <p><b>Description</b></p>
  780. <br> <br> <br> </div>
  781. HTML
  782. <div>
  783. <br> <p><b>Description</b></p><br><br></div>
  784. TEXT
  785. end
  786. it 'removes unnecessary <div> nesting' do
  787. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  788. <div><div>Hello Martin,</div></div>
  789. HTML
  790. <div>Hello Martin,</div>
  791. TEXT
  792. end
  793. it 'keeps innermost <div> when removing nesting' do
  794. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  795. <div lang="DE"><div><div>Hello Martin,</div></div></div>
  796. HTML
  797. <div lang="DE">Hello Martin,</div>
  798. TEXT
  799. end
  800. it 'keeps style with color in <div>' do
  801. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  802. <div style="color: red; bgcolor: red">Hello Martin,</div>
  803. HTML
  804. <div style="color: red;">Hello Martin,</div>
  805. TEXT
  806. end
  807. it 'remove style=#ffffff with color in <div>' do
  808. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  809. <div style="color: #ffffff; bgcolor: red">Hello Martin,</div>
  810. HTML
  811. <div>Hello Martin,</div>
  812. TEXT
  813. end
  814. it 'rearranges whitespace in nested <div>' do
  815. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  816. <div lang="DE"><div><div>Hello Martin,</div> </div></div>
  817. HTML
  818. <div lang="DE">
  819. <div>Hello Martin,</div></div>
  820. TEXT
  821. end
  822. it 'adds newline where <br> starts or ends <div> content' do
  823. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  824. <div style="max-width: 600px;"><br>abc<br><br></div>
  825. HTML
  826. <div>
  827. <br>abc<br><br>
  828. </div>
  829. TEXT
  830. end
  831. it 'leaves <s> nested in <div> as-is (?)' do
  832. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  833. <div><s>abc</s></div>
  834. HTML
  835. <div><s>abc</s></div>
  836. TEXT
  837. end
  838. it 'collapses multiple whitespace-only <p> into one with &nbsp;' do
  839. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  840. <div><p> </p>
  841. <p> </p>
  842. <p> </p>
  843. </div>
  844. HTML
  845. <div>
  846. <p>&nbsp;</p></div>
  847. TEXT
  848. end
  849. it 'strips <div> tags when they contain only <p>' do
  850. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  851. <div>lala<div lang="DE"><p><span>Hello Martin,</span></p></div></div>
  852. HTML
  853. <div>lala<div lang="DE"><p>Hello Martin,</p></div></div>
  854. TEXT
  855. end
  856. end
  857. context 'link handling' do
  858. it 'adds rel & target attrs to <a> tags' do
  859. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  860. <a href="http://web.de">web.de</a>
  861. HTML
  862. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  863. TEXT
  864. end
  865. it 'removes id attrs' do
  866. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  867. <a id="123" href="http://web.de">web.de</a>
  868. HTML
  869. <a href="http://web.de" rel="nofollow noreferrer noopener" target="_blank">web.de</a>
  870. TEXT
  871. end
  872. it 'removes class/id attrs' do
  873. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  874. <a href="http://example.com" class="abc" id="123">http://example.com</a>
  875. HTML
  876. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  877. TEXT
  878. end
  879. it 'downcases <a> tags' do
  880. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  881. <A href="http://example.com?a=1;">http://example.com?a=1;</A>
  882. HTML
  883. <a href="http://example.com?a=1;" rel="nofollow noreferrer noopener" target="_blank">http://example.com?a=1;</a>
  884. TEXT
  885. end
  886. it 'doesn’t downcase href attr or inner text' do
  887. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  888. <A href="http://example.com/withSoMeUpper/And/downCase">http://example.com/withSoMeUpper/And/downCase</A>
  889. HTML
  890. <a href="http://example.com/withSoMeUpper/And/downCase" rel="nofollow noreferrer noopener" target="_blank">http://example.com/withSoMeUpper/And/downCase</a>
  891. TEXT
  892. end
  893. it 'automatically wraps <a> tags around valid URLs' do
  894. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  895. <div>https://www.facebook.com/test</div>
  896. HTML
  897. <div>\n<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>\n</div>
  898. TEXT
  899. end
  900. it 'does not wrap URLs if leading https?:// is missing' do
  901. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  902. some text www.example.com some other text
  903. HTML
  904. some text www.example.com some other text
  905. TEXT
  906. end
  907. it 'adds missing http:// to href attr (but not inner text)' do
  908. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  909. web <a href="www.example.com"><span style="color:blue">www.example.com</span></a>
  910. HTML
  911. web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a>
  912. TEXT
  913. end
  914. it 'includes URL parameters when wrapping URL in <a> tag' do
  915. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  916. <p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>
  917. HTML
  918. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;a=1;#ldap</a></p>
  919. TEXT
  920. end
  921. it 'does not rewrap valid URLs that already have <a> tags' do
  922. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  923. <a href="http://example.com">http://example.com</a>
  924. HTML
  925. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  926. TEXT
  927. end
  928. it 'recognizes URL parameters when matching href to inner text' do
  929. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  930. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  931. HTML
  932. <p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap" rel="nofollow noreferrer noopener" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&amp;#ldap</a></p>
  933. TEXT
  934. end
  935. it 'recognizes <br> as URL boundary' do
  936. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  937. <div><br>https://www.facebook.com/test<br></div>
  938. HTML
  939. <div>
  940. <br><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a><br>\n</div>
  941. TEXT
  942. end
  943. it 'recognizes space as URL boundary' do
  944. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  945. some text http://example.com some other text
  946. HTML
  947. some text <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a> some other text
  948. TEXT
  949. end
  950. it 'wraps valid URLs from <div> elements in <a> tags' do
  951. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  952. <div>http://example.com</div>
  953. HTML
  954. <div>
  955. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>
  956. </div>
  957. TEXT
  958. end
  959. it 'recognizes trailing dot as URL boundary' do
  960. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  961. <div>http://example.com.</div>
  962. HTML
  963. <div>
  964. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  965. TEXT
  966. end
  967. it 'does not add a leading newline if <div> begins with non-URL text' do
  968. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  969. <div>lala http://example.com.</div>
  970. HTML
  971. <div>lala <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>.</div>
  972. TEXT
  973. end
  974. it 'recognizes trailing comma as URL boundary' do
  975. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  976. <div>http://example.com, and so on</div>
  977. HTML
  978. <div>
  979. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://example.com</a>, and so on</div>
  980. TEXT
  981. end
  982. it 'recognizes trailing comma as URL boundary (immediately following URL parameters)' do
  983. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  984. <div>http://example.com?lala=me, and so on</div>
  985. HTML
  986. <div>
  987. <a href="http://example.com?lala=me" rel="nofollow noreferrer noopener" target="_blank">http://example.com?lala=me</a>, and so on</div>
  988. TEXT
  989. end
  990. it 'strips <a> tags when no href is present' do
  991. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  992. <a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>
  993. HTML
  994. <span style="color:#44546a;">Hello Mr Smith,</span>
  995. TEXT
  996. end
  997. context 'when <a> inner text is HTML elements' do
  998. it 'leaves <img> elements as-is' do
  999. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1000. <a href="http://example.com/?abc=123&123=abc" class="abc\n"\n><img src="cid:123"></a>
  1001. HTML
  1002. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com/?abc=123&amp;123=abc"><img src="cid:123"></a>
  1003. TEXT
  1004. end
  1005. it 'strips <span> tags, but not content' do
  1006. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1007. <a href="http://facebook.de/examplesrbog"><span lang="EN-US" style='color:blue'>http://facebook.de/examplesrbog</span></a>
  1008. HTML
  1009. <a href="http://facebook.de/examplesrbog" rel="nofollow noreferrer noopener" target="_blank"><span lang="EN-US" style="color:blue;">http://facebook.de/examplesrbog</span></a>
  1010. TEXT
  1011. end
  1012. it 'also strips surrounding <span> and <o:p> tags' do
  1013. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1014. <span style="font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1015. <a href="http://www.example.com"><span style="color:blue">www.example.com</span></a><o:p></o:p></span>
  1016. HTML
  1017. <span style="color:#1f497d;">web <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank"><span style="color:blue;">www.example.com</span></a></span>
  1018. TEXT
  1019. end
  1020. end
  1021. context 'when <a> inner text and href do not match' do
  1022. it 'adds title attr' do
  1023. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1024. <a href="http://example.com">http://what-different.example.com</a>
  1025. HTML
  1026. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com">http://what-different.example.com</a>
  1027. TEXT
  1028. end
  1029. it 'converts unsafe characters in href attr and title' do
  1030. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1031. <a href="http://example.com %22test%22">http://what-different.example.com</a>
  1032. HTML
  1033. <a href="http://example.com%20%22test%22" rel="nofollow noreferrer noopener" target="_blank" title="http://example.com%20%22test%22">http://what-different.example.com</a>
  1034. TEXT
  1035. end
  1036. it 'does not add title attr (for different capitalization)' do
  1037. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1038. <a href="http://example.com">http://EXAMPLE.com</a>
  1039. HTML
  1040. <a href="http://example.com" rel="nofollow noreferrer noopener" target="_blank">http://EXAMPLE.com</a>
  1041. TEXT
  1042. end
  1043. it 'does not add title attr (for URL-safe/unsafe characters)' do
  1044. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1045. <a href="http://example.com/?abc=123&123=abc">http://example.com?abc=123&amp;123=abc</a>
  1046. HTML
  1047. <a href="http://example.com/?abc=123&amp;123=abc" rel="nofollow noreferrer noopener" target="_blank">http://example.com?abc=123&amp;123=abc</a>
  1048. TEXT
  1049. end
  1050. end
  1051. context 'for email links' do
  1052. it 'strips <a> tags' do
  1053. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1054. <a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1055. HTML
  1056. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1057. TEXT
  1058. end
  1059. it 'strips <a> tags (even with upcased "MAILTO:")' do
  1060. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1061. <a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1062. HTML
  1063. <a href="MAILTO:john.smith@example.com">john.smith@example.com</a>
  1064. TEXT
  1065. end
  1066. it 'extracts destination address when it differs from <a> innertext' do
  1067. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1068. <a href="MAILTO:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>
  1069. HTML
  1070. <a href="MAILTO:john.smith2@example.com">john.smith@example.com</a>
  1071. TEXT
  1072. end
  1073. end
  1074. end
  1075. context 'for <img> tags' do
  1076. it 'removes color CSS rule from style attr' do
  1077. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1078. <img src="/some.png" style="color: blue; width: 30px; height: 50px">
  1079. HTML
  1080. <img src="/some.png" style=" width: 30px; height: 50px;">
  1081. TEXT
  1082. end
  1083. it 'converts width/height attrs to CSS rules' do
  1084. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1085. <img src="/some.png" width="30px" height="50px">
  1086. HTML
  1087. <img src="/some.png" style="width:30px;height:50px;">
  1088. TEXT
  1089. end
  1090. it 'automatically adds terminal semicolons to CSS rules' do
  1091. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1092. <img style="width: 181px; height: 125px" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1093. HTML
  1094. <img style="width: 181px; height: 125px;" src="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/...">
  1095. TEXT
  1096. end
  1097. context 'when <img> nested in <a>, nested in <p>' do
  1098. it 'sanitizes those elements as normal' do
  1099. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1100. <p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>
  1101. HTML
  1102. <p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank" title="http://www.example.com/"><span style="color:blue;"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></span></a></p>
  1103. TEXT
  1104. end
  1105. end
  1106. end
  1107. context 'sample email input' do
  1108. it 'handles sample input 1' do
  1109. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1110. <div>
  1111. abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1112. HTML
  1113. <div>abc<span class=\"js-signatureMarker\"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1114. TEXT
  1115. end
  1116. it 'handles sample input 2' do
  1117. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1118. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1119. HTML
  1120. <div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1121. TEXT
  1122. end
  1123. it 'handles sample input 3' do
  1124. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1125. <div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p> </div>
  1126. HTML
  1127. <div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>
  1128. TEXT
  1129. end
  1130. it 'handles sample input 4' do
  1131. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1132. <div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Mit freundlichem Gruß<span class="Apple-converted-space">&nbsp;</span><br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.<o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="mailto:john.smith@example.com" style=color: blue; text-decoration: underline; ">john.smith@example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; "><a href="http://www.example.com" style="color: blue; text-decoration: underline; ">www.example.com</a></span><span style="font-size: 10pt; font-family: Arial, sans-serif; "><o:p></o:p></span></div>
  1133. HTML
  1134. <div><span>Mit freundlichem Gruß <br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.</span></div><div>
  1135. <span>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span>
  1136. </div><div>
  1137. <span>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span>
  1138. </div><div>
  1139. <a href="mailto:john.smith@example.com">john.smith@example.com</a>
  1140. </div><div>
  1141. <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a>
  1142. </div>
  1143. TEXT
  1144. end
  1145. it 'handles sample input 5' do
  1146. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1147. <body lang="DE" link="blue" vlink="purple"><div class="WordSection1">
  1148. <p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau ABC,<o:p></o:p></span></p>
  1149. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1150. <p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
  1151. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1152. <p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
  1153. <o:p></o:p></span></p>
  1154. <div>
  1155. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D"><o:p>&nbsp;</o:p></span></b></p>
  1156. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">Anna Smith<o:p></o:p></span></b></p>
  1157. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc SEV GmbH<o:p></o:p></span></b></p>
  1158. <p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc TRAV<o:p></o:p></span></b></p>
  1159. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">Marktstätte 123<o:p></o:p></span></p>
  1160. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">123456 Dorten<o:p></o:p></span></p>
  1161. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-1<o:p></o:p></span></p>
  1162. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-0<o:p></o:p></span></p>
  1163. <p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">F: &#43;49 (0) 12345/1234560-2<o:p></o:p></span></p>
  1164. <p class="MsoNormal"><a href="mailto:annad@example.com"><span style="font-size:9.0pt">annad@example.com</span></a><span style="font-size:9.0pt;color:#C00000"><o:p></o:p></span></p>
  1165. <p class="MsoNormal"><a href="http://www.example.com/"><span style="font-size:9.0pt">www.example.com</span></a><span style="font-size:9.0pt;color:#1F497D">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
  1166. </span><a href="http://www.ABC.com/"><span style="font-size:9.0pt">www.ABC.com</span></a><span style="font-size:9.0pt;color:#1F497D"><o:p></o:p></span></p>
  1167. <p class="MsoNormal"><span style="font-size:8.0pt;color:#1F497D">Geschäftsführer Vor Nach, VorUndZu Nach&nbsp;&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp;&nbsp; Amtsgericht Dort HRB 12345&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp; Ein Unternehmer der ABC Gruppe<o:p></o:p></span></p>
  1168. HTML
  1169. <div>
  1170. <p><span style="color:#1f497d;">Guten Morgen, Frau ABC,</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><p><span style="color:#1f497d;">Nochmals vielen Dank und herzliche Grüße </span></p><div>
  1171. <p><b><span style="color:#1f497d;"><p>&nbsp;</p></span></b></p><p><b><span style="color:#1f497d;">Anna Smith</span></b></p><p><b><span style="color:#1f497d;">art abc SEV GmbH</span></b></p><p><b><span style="color:#1f497d;">art abc TRAV</span></b></p><p><span style="color:#1f497d;">Marktstätte 123</span></p><p><span style="color:#1f497d;">123456 Dorten</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-1</span></p><p><span style="color:#1f497d;">T: +49 (0) 12345/1234560-0</span></p><p><span style="color:#1f497d;">F: +49 (0) 12345/1234560-2</span></p><p><a href="mailto:annad@example.com">annad@example.com</a><span style="color:#c00000;"></span></p><p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">www.example.com</a><span style="color:#1f497d;"> </span><a href="http://www.ABC.com/" rel="nofollow noreferrer noopener" target="_blank">www.ABC.com</a><span style="color:#1f497d;"></span></p><p><span style="color:#1f497d;">Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe</span></p></div></div>
  1172. TEXT
  1173. end
  1174. it 'handles sample input 6' do
  1175. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1176. <p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
  1177. <div>
  1178. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1179. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">Von:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1180. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1181. <b>An:</b> \'Amaia Epalza\'<br>
  1182. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017<o:p></o:p></span></p>
  1183. </div>
  1184. </div>
  1185. <p class="MsoNormal"><o:p>&nbsp;</o:p></p>
  1186. <p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am
  1187. </span></b><o:p></o:p></p>
  1188. <p class="MsoNormal"><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">&nbsp;</span><o:p></o:p></p>
  1189. <p class="MsoNormal">Guten Morgen Frau Epalza,<o:p></o:p></p>
  1190. HTML
  1191. <p><span style="color:#1f497d;"><p>&nbsp;</p></span></p><div>
  1192. <div>
  1193. <span class="js-signatureMarker"></span><p><b>Von:</b><span> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
  1194. <b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
  1195. <b>An:</b> 'Amaia Epalza'<br>
  1196. <b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017</span></p></div></div><p>&nbsp;</p><p><b><span style="color:#1f497d;">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am </span></b></p><p><span style="color:#1f497d;"> </span></p><p>Guten Morgen Frau Epalza,</p>
  1197. TEXT
  1198. end
  1199. it 'handles sample input 7' do
  1200. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1201. <div class="">Wir brauchen also die Instanz <a href="http://example.zammad.com" class="">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div class=""><br class=""></div><div class="">Bitte um Freischaltung.</div><div class=""><br class=""></div><div class=""><br class=""><div class="">
  1202. HTML
  1203. <div>Wir brauchen also die Instanz <a href="http://example.zammad.com" rel="nofollow noreferrer noopener" target="_blank">example.zammad.com</a>, kann die aber nicht mehr nutzen.</div><div>&nbsp;</div><div>Bitte um Freischaltung.</div><div>&nbsp;</div>
  1204. TEXT
  1205. end
  1206. it 'handles sample input 8' do
  1207. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1208. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US">oh jeee … Zauberwort vergessen ;-) Können Sie mir
  1209. <b>bitte</b> noch meine Testphase verlängern?<o:p></o:p></span></p>
  1210. <p class="MsoNormal"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#1F497D;mso-fareast-language:EN-US"><o:p>&nbsp;</o:p></span></p>
  1211. HTML
  1212. <p><span style="color:#1f497d;">oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</span></p><p><span style="color:#1f497d;"><p>&nbsp;</p></span></p>
  1213. TEXT
  1214. end
  1215. it 'handles sample input 9' do
  1216. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1217. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1218. HTML
  1219. <div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>
  1220. TEXT
  1221. end
  1222. it 'handles sample input 10' do
  1223. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1224. <tr style="height: 15pt;" class=""><td width="170" nowrap="" valign="bottom" style="width: 127.5pt; border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt; height: 15pt;" class=""><p class="MsoNormal" align="center" style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;"><span style="" class="">&nbsp;</span></p></td><td width="58" nowrap="" valign="bottom" style="width: 43.5pt; padding: 0cm 5.4pt; height: 15pt;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="" class="">20-29</span></div></td><td width="47" nowrap="" valign="bottom" style="width: 35pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">200</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">-1</span></div></td><td width="76" nowrap="" valign="bottom" style="width: 57pt; border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="color: rgb(156, 0, 6);" class="">201</span></div></td><td width="107" nowrap="" valign="bottom" style="width: 80pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="85" nowrap="" valign="bottom" style="width: 64pt; padding: 0cm 5.4pt; height: 15pt;" class=""></td><td width="101" nowrap="" valign="bottom" style="width: 76pt; border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><b class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">country</span></b><span style="font-size: 11pt; font-family: Calibri, sans-serif;" class=""></span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Target (gross)</span></div></td><td width="123" nowrap="" valign="bottom" style="width: 92pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Remaining Recruits</span></div></td><td width="87" nowrap="" valign="bottom" style="width: 65pt; border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt; height: 15pt; background-position: initial initial; background-repeat: initial initial;" class=""><div style="margin: 0cm 0cm 0.0001pt; font-size: 12pt; font-family: \'Times New Roman\', serif; text-align: center;" class=""><span style="font-size: 10pt; font-family: Arial, sans-serif;" class="">Total Recruits</span></div></td></tr>
  1225. HTML
  1226. <tr>
  1227. <td valign="bottom" style=" border-style: none none none solid; border-left-width: 1pt; border-left-color: windowtext; padding: 0cm 5.4pt;"><p>&nbsp;</p></td>
  1228. <td valign="bottom" style=" padding: 0cm 5.4pt;"><div>20-29</div></td>
  1229. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">200</span></div></td>
  1230. <td valign="bottom" style=" background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">-1</span></div></td>
  1231. <td valign="bottom" style=" border-style: none solid none none; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(255, 199, 206); padding: 0cm 5.4pt;"><div><span style="color: rgb(156, 0, 6);">201</span></div></td>
  1232. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1233. <td valign="bottom" style=" padding: 0cm 5.4pt;"></td>
  1234. <td valign="bottom" style=" border-style: none solid solid; border-left-width: 1pt; border-left-color: windowtext; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>
  1235. <b>country</b>
  1236. </div></td>
  1237. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Target (gross)</div></td>
  1238. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: gray; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Remaining Recruits</div></td>
  1239. <td valign="bottom" style=" border-style: none solid solid none; border-bottom-width: 1pt; border-bottom-color: gray; border-right-width: 1pt; border-right-color: windowtext; background-color: rgb(242, 242, 242); padding: 0cm 5.4pt;"><div>Total Recruits</div></td>
  1240. </tr>
  1241. TEXT
  1242. end
  1243. it 'handles sample input 11' do
  1244. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1245. <div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:Arial"><div>Dear Bob<span style="line-height: 23.8px;">:</span><span style="color: rgb(255, 255, 255); line-height: 1.7;">Mr/Mrs</span></div><div><br></div><div><span style="line-height: 1.7;">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src="cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com" orgwidth="1101" orgheight="637" data-image="1" style="width: 722.7px; height: 418px; border: none;"></div><div>Best regards!</div><div><br></div><div><b style="line-height: 1.7;"><i><u><span lang="EL" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style="position:relative;zoom:1"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div><div style="clear:both"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>
  1246. HTML
  1247. <div>\n<div>Dear Bob:<span style="color: rgb(255, 255, 255);">Mr/Mrs</span>
  1248. </div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src="cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com" style="width: 722.7px; height: 418px;"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u><span lang="EL" style="color:#17365d;">Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</span></u></i></b></div><div>\n<div>Bob Smith</div><div>\n<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div><span style="color:#17365d;">Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</span></div><div>Web: www.example.com</div></div></div></div>
  1249. TEXT
  1250. end
  1251. it 'handles sample input 12' do
  1252. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1253. <li><a style="font-size:15px; font-family:Arial;color:#0f7246" class="text_link" href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh"><span style="color: rgb(0, 0, 0);">Luxemburg</span></a></li>
  1254. HTML
  1255. <li><a href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh" rel="nofollow noreferrer noopener" target="_blank" title="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh">Luxemburg</a></li>
  1256. TEXT
  1257. end
  1258. end
  1259. context 'signature recognition' do
  1260. let(:marker) { '<span class="js-signatureMarker"></span>' }
  1261. it 'places marker before "--" line (surrounded by <br>)' do
  1262. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1263. lalala<br>--<br>Max Mix
  1264. HTML
  1265. lalala#{marker}<br>--<br>Max Mix
  1266. TEXT
  1267. end
  1268. it 'places marker before "--" line (surrounded by <br/>)' do
  1269. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1270. lalala<br/>--<br/>Max Mix
  1271. HTML
  1272. lalala#{marker}<br>--<br>Max Mix
  1273. TEXT
  1274. end
  1275. it 'places marker before "--" line (preceded by <br/>\n)' do
  1276. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1277. lalala<br/>
  1278. --<br/>Max Mix
  1279. HTML
  1280. lalala#{marker}<br> --<br>Max Mix
  1281. TEXT
  1282. end
  1283. it 'places marker before "--" line (surrounded by <p>)' do
  1284. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1285. lalala<p>--</p>Max Mix
  1286. HTML
  1287. lalala#{marker}<p>--</p>Max Mix
  1288. TEXT
  1289. end
  1290. it 'places marker before "__" line (surrounded by <br>)' do
  1291. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1292. lalala<br>__<br>Max Mix
  1293. HTML
  1294. lalala#{marker}<br>__<br>Max Mix
  1295. TEXT
  1296. end
  1297. it 'places marker before quoted reply’s "Von:" header (in German)' do
  1298. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1299. den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1300. HTML
  1301. den.<br>#{marker}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten
  1302. TEXT
  1303. end
  1304. it 'places marker before quoted reply’s "Von:" header (as <p> with stripped parent <div>)' do
  1305. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1306. <div><div style="border:none;border-top:solid #e1e1e1 1.0pt;padding:3.0pt 0cm 0cm 0cm"><p class="MsoNormal"><b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif">Von:</span></b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b>\u0020
  1307. HTML
  1308. <div>#{marker}<p><b><span lang="DE">Von:</span></b><span lang="DE"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b> </span></p></div>
  1309. TEXT
  1310. end
  1311. it 'places marker before quoted reply’s "Von:" header (as <p> with parent <div>)' do
  1312. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1313. <div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
  1314. <p class="MsoNormal" style="margin-left:35.4pt"><b><span style="font-family:Calibri;color:black">Von:
  1315. </span></b><span style="font-family:Calibri;color:black">Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
  1316. <b>Organisation: </b>Znuny Group<br>
  1317. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br>
  1318. HTML
  1319. <div>
  1320. #{marker}<p><b>Von: </b><span>Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
  1321. <b>Organisation: </b>Znuny Group<br>
  1322. <b>Datum: </b>Montag, 6. März 2017 um 13:32<br></span></p></div>
  1323. TEXT
  1324. end
  1325. it 'places marker before quoted reply’s "Von:" header (as <div>)' do
  1326. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1327. <div><br>
  1328. <br>
  1329. <br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
  1330. &nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
  1331. <br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
  1332. &nbsp;</font></div>
  1333. HTML
  1334. #{marker}<div><br>Von: Hotel &lt;info@example.com&gt; <br>An: </div>
  1335. TEXT
  1336. end
  1337. it 'places marker before English quoted text intro (as <blockquote>)' do
  1338. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1339. <br class=""><div><blockquote type="cite" class=""><div class="">On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com" class="">oliver@example.com</a>&gt; wrote:</div><br class="Apple-interchange-newline">
  1340. HTML
  1341. <div>#{marker}<blockquote type="cite">
  1342. <div>On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com">oliver@example.com</a>&gt; wrote:</div><br>
  1343. </blockquote></div>
  1344. TEXT
  1345. end
  1346. it 'does not place marker if blockquote doesn’t contain a quoted text intro' do
  1347. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1348. <br class=""><div><blockquote type="cite" class=""><div class="">some note</div><br class="Apple-interchange-newline">
  1349. HTML
  1350. <div><blockquote type="cite">
  1351. <div>some note</div><br>
  1352. </blockquote></div>
  1353. TEXT
  1354. end
  1355. it 'does not place marker if quoted text intro isn’t followed by a <blockquote>' do
  1356. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1357. <div>
  1358. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1359. <br>
  1360. </div>
  1361. HTML
  1362. <div>
  1363. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1364. <br>
  1365. </div>
  1366. TEXT
  1367. end
  1368. it 'places marker before German quoted text intro (before <blockquote>)' do
  1369. expect(<<~HTML.chomp.html2html_strict).to eq(<<~TEXT.chomp)
  1370. <div>
  1371. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1372. <br>
  1373. </div>
  1374. <blockquote type="cite">
  1375. <div>Dear Mr. Smith,<br></div>
  1376. </blockquote>
  1377. HTML
  1378. #{marker}<div>
  1379. <br> Am 17.03.2017 um 17:03 schrieb Martin Edenhofer via Zammad Helpdesk &lt;support@example.com&gt;:<br>
  1380. <br>
  1381. </div><blockquote type="cite">
  1382. <div>Dear Mr. Smith,<br>
  1383. </div></blockquote>
  1384. TEXT
  1385. end
  1386. end
  1387. end
  1388. describe '#signature_identify' do
  1389. let(:marker) { '######SIGNATURE_MARKER######' }
  1390. context 'with no signature present' do
  1391. it 'leaves string as-is' do
  1392. expect((+'foo').signature_identify('text', true)).to eq('foo')
  1393. end
  1394. end
  1395. context 'with signature present' do
  1396. it 'places marker at start of "--" line' do
  1397. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1398. foo
  1399. --
  1400. bar
  1401. SRC
  1402. foo
  1403. #{marker}--
  1404. bar
  1405. MARKED
  1406. end
  1407. it 'places marker before English quoted text intro' do
  1408. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1409. On 01/04/15 10:55, Bob Smith wrote:
  1410. SRC
  1411. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1412. MARKED
  1413. end
  1414. it 'places marker before German quoted text intro' do
  1415. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1416. Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:
  1417. SRC
  1418. #{marker}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:
  1419. MARKED
  1420. end
  1421. it 'ignores trailing empty line' do
  1422. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1423. test 123
  1424. test 123
  1425. --
  1426. Bob Smith
  1427. SRC
  1428. test 123
  1429. test 123
  1430. #{marker}--
  1431. Bob Smith
  1432. MARKED
  1433. end
  1434. it 'ignores trailing double empty lines' do
  1435. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1436. test 123
  1437. test 123
  1438. --
  1439. Bob Smith
  1440. SRC
  1441. test 123
  1442. test 123
  1443. #{marker}--
  1444. Bob Smith
  1445. MARKED
  1446. end
  1447. it 'ignores leading/trailing empty lines' do
  1448. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1449. test 123\u0020
  1450. 1
  1451. 2
  1452. 3
  1453. 4
  1454. 5
  1455. 6
  1456. 7
  1457. 8
  1458. 9
  1459. --
  1460. Bob Smith
  1461. SRC
  1462. test 123\u0020
  1463. 1
  1464. 2
  1465. 3
  1466. 4
  1467. 5
  1468. 6
  1469. 7
  1470. 8
  1471. 9
  1472. #{marker}--
  1473. Bob Smith
  1474. MARKED
  1475. end
  1476. it 'ignores lines starting with "--" but containing more text' do
  1477. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1478. test 123\u0020
  1479. --no not match--
  1480. --
  1481. Bob Smith
  1482. SRC
  1483. test 123\u0020
  1484. --no not match--
  1485. #{marker}--
  1486. Bob Smith
  1487. MARKED
  1488. end
  1489. it 'places marker at start of " -- " line' do
  1490. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1491. test 123\u0020
  1492. --no not match--
  1493. --\u0020
  1494. Bob Smith
  1495. SRC
  1496. test 123\u0020
  1497. --no not match--
  1498. #{marker} --\u0020
  1499. Bob Smith
  1500. MARKED
  1501. end
  1502. it 'places marker on empty line if possible / only places one marker' do
  1503. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1504. test 123\u0020
  1505. --
  1506. Bob Smith
  1507. --
  1508. Bob Smith
  1509. SRC
  1510. test 123\u0020
  1511. #{marker}
  1512. --
  1513. Bob Smith
  1514. --
  1515. Bob Smith
  1516. MARKED
  1517. end
  1518. context 'for Apple email quote text' do
  1519. context 'in English' do
  1520. it 'places two markers, one before quoted text intro and one at start of "--" line' do
  1521. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1522. test 123\u0020
  1523. --no not match--
  1524. Bob Smith
  1525. On 01/04/15 10:55, Bob Smith wrote:
  1526. lalala
  1527. --
  1528. some test
  1529. SRC
  1530. test 123\u0020
  1531. --no not match--
  1532. Bob Smith
  1533. #{marker}On 01/04/15 10:55, Bob Smith wrote:
  1534. lalala
  1535. #{marker}--
  1536. some test
  1537. MARKED
  1538. end
  1539. end
  1540. context 'auf Deutsch' do
  1541. it 'places marker before quoted text intro' do
  1542. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1543. test 123\u0020
  1544. --no not match--
  1545. Bob Smith
  1546. Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1547. lalala
  1548. SRC
  1549. test 123\u0020
  1550. --no not match--
  1551. Bob Smith
  1552. #{marker}Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:
  1553. lalala
  1554. MARKED
  1555. end
  1556. end
  1557. end
  1558. context 'for MS email quote text' do
  1559. context 'in English' do
  1560. it 'places marker before quoted text intro' do
  1561. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1562. test 123test 123\u0020
  1563. --no not match--
  1564. Bob Smith
  1565. From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1566. Sent: Donnerstag, 2. April 2015 10:00
  1567. lalala</div>
  1568. SRC
  1569. test 123test 123\u0020
  1570. --no not match--
  1571. Bob Smith
  1572. #{marker}From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1573. Sent: Donnerstag, 2. April 2015 10:00
  1574. lalala</div>
  1575. MARKED
  1576. end
  1577. end
  1578. context 'auf Deutsch' do
  1579. it 'places marker before quoted text intro' do
  1580. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1581. test 123\u0020
  1582. --no not match--
  1583. Bob Smith
  1584. Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1585. Gesendet: Donnerstag, 2. April 2015 10:00
  1586. Betreff: lalala
  1587. SRC
  1588. test 123\u0020
  1589. --no not match--
  1590. Bob Smith
  1591. #{marker}Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1592. Gesendet: Donnerstag, 2. April 2015 10:00
  1593. Betreff: lalala
  1594. MARKED
  1595. end
  1596. end
  1597. context 'en francais' do
  1598. it 'places marker before quoted text intro' do
  1599. expect(<<~SRC.chomp.signature_identify('text', true)).to eq(<<~MARKED.chomp)
  1600. test 123\u0020
  1601. --no not match--
  1602. Bob Smith
  1603. De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1604. Envoyé : mercredi 29 avril 2015 17:31
  1605. Objet : lalala
  1606. SRC
  1607. test 123\u0020
  1608. --no not match--
  1609. Bob Smith
  1610. #{marker}De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  1611. Envoyé : mercredi 29 avril 2015 17:31
  1612. Objet : lalala
  1613. MARKED
  1614. end
  1615. end
  1616. end
  1617. end
  1618. end
  1619. describe '#utf8_encode' do
  1620. context 'on valid, UTF-8-encoded strings' do
  1621. let(:subject) { 'hello' }
  1622. it 'returns an identical copy' do
  1623. expect(subject.utf8_encode).to eq(subject)
  1624. expect(subject.utf8_encode.encoding).to be(subject.encoding)
  1625. expect(subject.utf8_encode).not_to be(subject)
  1626. end
  1627. context 'which are incorrectly set to other, technically valid encodings' do
  1628. let(:subject) { described_class.new('ö', encoding: 'tis-620') }
  1629. it 'sets input encoding to UTF-8 instead of attempting conversion' do
  1630. expect(subject.utf8_encode).to eq(subject.dup.force_encoding('utf-8'))
  1631. end
  1632. end
  1633. end
  1634. context 'on strings in other encodings' do
  1635. let(:subject) { original_string.encode(input_encoding) }
  1636. context 'with no from: option' do
  1637. let(:original_string) { 'Tschüss!' }
  1638. let(:input_encoding) { Encoding::ISO_8859_2 }
  1639. it 'detects the input encoding' do
  1640. expect(subject.utf8_encode).to eq(original_string)
  1641. end
  1642. end
  1643. context 'with a valid from: option' do
  1644. let(:original_string) { 'Tschüss!' }
  1645. let(:input_encoding) { Encoding::ISO_8859_2 }
  1646. it 'uses the specified input encoding' do
  1647. expect(subject.utf8_encode(from: 'iso-8859-2')).to eq(original_string)
  1648. end
  1649. it 'uses any valid input encoding, even if not correct' do
  1650. expect(subject.utf8_encode(from: 'gb18030')).to eq('Tsch黶s!')
  1651. end
  1652. end
  1653. context 'with an invalid from: option' do
  1654. let(:original_string) { '―陈志' }
  1655. let(:input_encoding) { Encoding::GB18030 }
  1656. it 'does not try it' do
  1657. expect { subject.encode('utf-8', 'gb2312') }
  1658. .to raise_error(Encoding::InvalidByteSequenceError)
  1659. expect { subject.utf8_encode(from: 'gb2312') }
  1660. .not_to raise_error
  1661. end
  1662. it 'uses the detected input encoding instead' do
  1663. expect(subject.utf8_encode(from: 'gb2312')).to eq(original_string)
  1664. end
  1665. end
  1666. end
  1667. context 'performance' do
  1668. let(:subject) { original_string.encode(input_encoding) }
  1669. context 'with utf8_encode in iso-8859-1' do
  1670. let(:original_string) { 'äöü0' * 999_999 }
  1671. let(:input_encoding) { Encoding::ISO_8859_1 }
  1672. it 'detects the input encoding' do
  1673. Timeout.timeout(1) do
  1674. expect(subject.utf8_encode(from: 'iso-8859-1')).to eq(original_string)
  1675. end
  1676. end
  1677. end
  1678. context 'with utf8_encode in utf-8' do
  1679. let(:original_string) { 'äöü0' * 999_999 }
  1680. let(:input_encoding) { Encoding::UTF_8 }
  1681. it 'detects the input encoding' do
  1682. Timeout.timeout(1) do
  1683. expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
  1684. end
  1685. end
  1686. end
  1687. context 'with utf8_encode in iso-8859-1 and charset detection' do
  1688. let(:original_string) { 'äöü0' * 199_999 }
  1689. let(:input_encoding) { Encoding::ISO_8859_1 }
  1690. it 'detects the input encoding' do
  1691. Timeout.timeout(18) do
  1692. expect(subject.utf8_encode(from: 'utf-8')).to eq(original_string)
  1693. end
  1694. end
  1695. end
  1696. end
  1697. end
  1698. end