string.rb 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. class String
  2. def message_quote
  3. quote = split("\n")
  4. body_quote = ''
  5. quote.each do |line|
  6. body_quote = body_quote + '> ' + line + "\n"
  7. end
  8. body_quote
  9. end
  10. def word_wrap(*args)
  11. options = args.extract_options!
  12. unless args.blank?
  13. options[:line_width] = args[0] || 82
  14. end
  15. options.reverse_merge!(line_width: 82)
  16. lines = self
  17. lines.split("\n").collect do |line|
  18. line.length > options[:line_width] ? line.gsub(/(.{1,#{options[:line_width]}})(\s+|$)/, "\\1\n").strip : line
  19. end * "\n"
  20. end
  21. =begin
  22. filename = 'Some::Module'.to_filename
  23. returns
  24. 'some/module'
  25. =end
  26. def to_filename
  27. camel_cased_word = "#{self}" # rubocop:disable Style/UnneededInterpolation
  28. camel_cased_word.gsub(/::/, '/')
  29. .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
  30. .gsub(/([a-z\d])([A-Z])/, '\1_\2')
  31. .tr('-', '_').downcase
  32. end
  33. =begin
  34. filename = 'some/module.rb'.to_classname
  35. returns
  36. 'Some::Module'
  37. =end
  38. def to_classname
  39. camel_cased_word = "#{self}" # rubocop:disable Style/UnneededInterpolation
  40. camel_cased_word.gsub!(/\.rb$/, '')
  41. camel_cased_word.split('/').map(&:camelize).join('::')
  42. end
  43. # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis)
  44. # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes
  45. # More details: http://pjambet.github.io/blog/emojis-and-mysql/
  46. def utf8_to_3bytesutf8
  47. return self if Rails.application.config.db_4bytes_utf8
  48. each_char.select { |c|
  49. if c.bytes.count > 3
  50. Rails.logger.warn "strip out 4 bytes utf8 chars '#{c}' of '#{self}'"
  51. next
  52. end
  53. c
  54. }
  55. .join('')
  56. end
  57. =begin
  58. text = html_string.html2text
  59. returns
  60. 'string with text only'
  61. =end
  62. def html2text(string_only = false, strict = false)
  63. string = "#{self}" # rubocop:disable Style/UnneededInterpolation
  64. # in case of invalid encodeing, strip invalid chars
  65. # see also test/fixtures/mail21.box
  66. # note: string.encode!('UTF-8', 'UTF-8', :invalid => :replace, :replace => '?') was not detecting invalid chars
  67. if !string.valid_encoding?
  68. string = string.chars.select(&:valid_encoding?).join
  69. end
  70. # remove html comments
  71. string.gsub!(/<!--.+?-->/m, '')
  72. # find <a href=....> and replace it with [x]
  73. link_list = ''
  74. counter = 0
  75. if !string_only
  76. string.gsub!(/<a[[:space:]].*?href=("|')(.+?)("|').*?>/ix) {
  77. link = $2
  78. counter = counter + 1
  79. link_list += "[#{counter}] #{link}\n"
  80. "[#{counter}] "
  81. }
  82. else
  83. string.gsub!(%r{<a[[:space:]]+(|\S+[[:space:]]+)href=("|')(.+?)("|')([[:space:]]*|[[:space:]]+[^>]*)>(.+?)<[[:space:]]*/a[[:space:]]*>}mxi) { |_placeholder|
  84. link = $3
  85. text = $6
  86. text.gsub!(/\<.+?\>/, '')
  87. link_compare = link.dup
  88. if !link_compare.empty?
  89. link.strip!
  90. link_compare.strip!
  91. link_compare.downcase!
  92. link_compare.sub!(%r{/$}, '')
  93. end
  94. text_compare = text.dup
  95. if !text_compare.empty?
  96. text.strip!
  97. text_compare.strip!
  98. text_compare.downcase!
  99. text_compare.sub!(%r{/$}, '')
  100. end
  101. placeholder = if !link_compare.empty? && text_compare.empty?
  102. link
  103. elsif link_compare.empty? && !text_compare.empty?
  104. text
  105. elsif link_compare && link_compare =~ /^mailto/i
  106. text
  107. elsif !link_compare.empty? && !text_compare.empty? && (link_compare == text_compare || link_compare == "mailto:#{text}".downcase || link_compare == "http://#{text}".downcase)
  108. "######LINKEXT:#{link}/TEXT:#{text}######"
  109. elsif text !~ /^http/
  110. "#{text} (######LINKRAW:#{link}######)"
  111. else
  112. "#{link} (######LINKRAW:#{text}######)"
  113. end
  114. }
  115. end
  116. # remove style tags with content
  117. string.gsub!(%r{<style(|[[:space:]].+?)>(.+?)</style>}im, '')
  118. # remove empty lines
  119. string.gsub!(/^[[:space:]]*/m, '')
  120. if strict
  121. string.gsub!(%r{< [[:space:]]* (/*) [[:space:]]* (b|i|ul|ol|li|u|h1|h2|h3|hr) ([[:space:]]*|[[:space:]]+[^>]*) >}mxi, '######\1\2######')
  122. end
  123. # pre/code handling 1/2
  124. string.gsub!(%r{<pre>(.+?)</pre>}m) { |placeholder|
  125. placeholder = placeholder.gsub(/\n/, '###BR###')
  126. }
  127. string.gsub!(%r{<code>(.+?)</code>}m) { |placeholder|
  128. placeholder = placeholder.gsub(/\n/, '###BR###')
  129. }
  130. # insert spaces on [A-z]\n[A-z]
  131. string.gsub!(/([A-z])[[:space:]]([A-z])/m, '\1 \2')
  132. # remove all new lines
  133. string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, '')
  134. # blockquote handling
  135. string.gsub!(%r{<blockquote(| [^>]*)>(.+?)</blockquote>}m) {
  136. "\n" + $2.html2text(true).gsub(/^(.*)$/, '&gt; \1') + "\n"
  137. }
  138. # pre/code handling 2/2
  139. string.gsub!(/###BR###/, "\n")
  140. # add counting
  141. string.gsub!(/<li(| [^>]*)>/i, "\n* ")
  142. # add hr
  143. string.gsub!(%r{<hr(|/| [^>]*)>}i, "\n___\n")
  144. # add h\d
  145. string.gsub!(%r{</h\d>}i, "\n")
  146. # add new lines
  147. string.gsub!(%r{</div><div(|[[:space:]].+?)>}im, "\n")
  148. string.gsub!(%r{</p><p(|[[:space:]].+?)>}im, "\n")
  149. string.gsub!(%r{<(div|p|pre|br|table|tr|h)(|/| [^>]*)>}i, "\n")
  150. string.gsub!(%r{</(p|br|div)(|[[:space:]].+?)>}i, "\n")
  151. string.gsub!(%r{</td>}i, ' ')
  152. # strip all other tags
  153. string.gsub!(/\<.+?\>/, '')
  154. # replace multiple spaces with one
  155. string.gsub!(/ /, ' ')
  156. # add hyperlinks
  157. if strict
  158. string.gsub!(%r{([[:space:]])((http|https|ftp|tel)://.+?|(www..+?))([[:space:]]|\.[[:space:]]|,[[:space:]])}mxi) { |_placeholder|
  159. pre = $1
  160. content = $2
  161. post = $5
  162. if content =~ /^www/i
  163. content = "http://#{content}"
  164. end
  165. placeholder = if content =~ /^(http|https|ftp|tel)/i
  166. "#{pre}######LINKRAW:#{content}#######{post}"
  167. else
  168. "#{pre}#{content}#{post}"
  169. end
  170. }
  171. end
  172. # try HTMLEntities, if it fails on invalid signes, use manual way
  173. begin
  174. coder = HTMLEntities.new
  175. string = coder.decode(string)
  176. rescue
  177. # strip all &amp; &lt; &gt; &quot;
  178. string.gsub!('&amp;', '&')
  179. string.gsub!('&lt;', '<')
  180. string.gsub!('&gt;', '>')
  181. string.gsub!('&quot;', '"')
  182. string.gsub!('&nbsp;', ' ')
  183. # encode html entities like "&#8211;"
  184. string.gsub!(/(&\#(\d+);?)/x) {
  185. $2.chr
  186. }
  187. # encode html entities like "&#3d;"
  188. string.gsub!(/(&\#[xX]([0-9a-fA-F]+);?)/x) {
  189. chr_orig = $1
  190. hex = $2.hex
  191. if hex
  192. chr = hex.chr
  193. if chr
  194. chr_orig = chr
  195. else
  196. chr_orig
  197. end
  198. else
  199. chr_orig
  200. end
  201. # check valid encoding
  202. begin
  203. if !chr_orig.encode('UTF-8').valid_encoding?
  204. chr_orig = '?'
  205. end
  206. rescue
  207. chr_orig = '?'
  208. end
  209. chr_orig
  210. }
  211. end
  212. # remove tailing empty spaces
  213. string.gsub!(/[[:blank:]]+$/, '')
  214. # remove double multiple empty lines
  215. string.gsub!(/\n\n\n+/, "\n\n")
  216. # add extracted links
  217. if link_list != ''
  218. string += "\n\n\n" + link_list
  219. end
  220. # remove double multiple empty lines
  221. string.gsub!(/\n\n\n+/, "\n\n")
  222. string.strip
  223. end
  224. =begin
  225. html = text_string.text2html
  226. =end
  227. def text2html
  228. text = CGI.escapeHTML(self)
  229. text.gsub!(/\n/, '<br>')
  230. text.chomp
  231. end
  232. =begin
  233. html = text_string.text2html
  234. =end
  235. def html2html_strict(force = false)
  236. string = html2text(true, true)
  237. string.signature_identify(force)
  238. string = string.text2html
  239. string.gsub!(%r{######LINKEXT:(.+?)/TEXT:(.+?)######}, '<a href="\1" target="_blank">\2</a>')
  240. string.gsub!(/######LINKRAW:(.+?)######/, '<a href="\1" target="_blank">\1</a>')
  241. marker_template = '<span class="js-signatureMarker"></span>'
  242. string.sub!(/######SIGNATURE_MARKER######/, marker_template)
  243. string.gsub!(/######SIGNATURE_MARKER######/, '')
  244. string.gsub!(/######(.+?)######/, '<\1>')
  245. string.chomp
  246. end
  247. def signature_identify(force = false)
  248. string = self
  249. # if we do have less then 10 lines and less then 300 chars ignore this
  250. if !force
  251. lines = string.split("\n")
  252. return if lines.count < 10 && string.length < 300
  253. end
  254. marker = '######SIGNATURE_MARKER######'
  255. # search for signature separator "--\n"
  256. string.sub!(/^\s{0,2}--\s{0,2}$/) { |placeholder|
  257. placeholder = "#{marker}#{placeholder}"
  258. }
  259. map = {}
  260. # Apple Mail
  261. # On 01/04/15 10:55, Bob Smith wrote:
  262. map['apple-en'] = '^(On)[[:space:]].{6,20}[[:space:]].{3,10}[[:space:]].{1,250}[[:space:]](wrote):'
  263. # Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:
  264. map['apple-de'] = '^(Am)[[:space:]].{6,20}[[:space:]](um)[[:space:]].{3,10}[[:space:]](schrieb)[[:space:]].{1,250}:'
  265. # Thunderbird
  266. # Am 04.03.2015 um 12:47 schrieb Alf Aardvark:
  267. map['thunderbird-de'] = '^(Am)[[:space:]].{6,20}[[:space:]](um)[[:space:]].{3,10}[[:space:]](schrieb)[[:space:]].{1,250}:'
  268. # Thunderbird default - http://kb.mozillazine.org/Reply_header_settings
  269. # On 01-01-2007 11:00 AM, Alf Aardvark wrote:
  270. map['thunderbird-en-default'] = '^(On)[[:space:]].{6,20}[[:space:]].{3,10},[[:space:]].{1,250}(wrote):'
  271. # http://kb.mozillazine.org/Reply_header_settings
  272. # Alf Aardvark wrote, on 01-01-2007 11:00 AM:
  273. map['thunderbird-en'] = '^.{1,250}[[:space:]](wrote),[[:space:]]on[[:space:]].{3,20}:'
  274. # otrs
  275. # 25.02.2015 10:26 - edv hotline wrote:
  276. # 25.02.2015 10:26 - edv hotline schrieb:
  277. map['otrs-en-de'] = '^.{6,10}[[:space:]].{3,10}[[:space:]]-[[:space:]].{1,250}[[:space:]](wrote|schrieb):'
  278. # Ms
  279. # rubocop:disable Style/AsciiComments
  280. # From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]
  281. # Send: Donnerstag, 2. April 2015 10:00
  282. # To/Cc/Bcc: xxx
  283. # Subject: xxx
  284. # - or -
  285. # From: xxx
  286. # To/Cc/Bcc: xxx
  287. # Date: 01.04.2015 12:41
  288. # Subject: xxx
  289. # - or -
  290. # De : xxx
  291. # À/?/?: xxx
  292. # Envoyé : mercredi 29 avril 2015 17:31
  293. # Objet : xxx
  294. # rubocop:enable Style/AsciiComments
  295. # en/de/fr | sometimes ms adds a space to "xx : value"
  296. map['ms-en-de-fr_from'] = '^(From|Von|De)( ?):[[:space:]].+?'
  297. map['ms-en-de-fr_from_html'] = "\n######b######(From|Von|De)([[:space:]]?):([[:space:]]?)(######\/b######)[[:space:]].+?"
  298. # word 14
  299. # edv hotline wrote:
  300. # edv hotline schrieb:
  301. #map['word-en-de'] = "[^#{marker}].{1,250}\s(wrote|schrieb):"
  302. map.each { |_key, regexp|
  303. string.sub!(/#{regexp}/) { |placeholder|
  304. placeholder = "#{marker}#{placeholder}"
  305. }
  306. }
  307. string
  308. end
  309. end