string.rb 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. class String
  2. def message_quote
  3. quote = split("\n")
  4. body_quote = ''
  5. quote.each do |line|
  6. body_quote = body_quote + '> ' + line + "\n"
  7. end
  8. body_quote
  9. end
  10. def word_wrap(*args)
  11. options = args.extract_options!
  12. unless args.blank?
  13. options[:line_width] = args[0] || 82
  14. end
  15. options.reverse_merge!(line_width: 82)
  16. lines = self
  17. lines.split("\n").collect do |line|
  18. line.length > options[:line_width] ? line.gsub(/(.{1,#{options[:line_width]}})(\s+|$)/, "\\1\n").strip : line
  19. end * "\n"
  20. end
  21. =begin
  22. filename = 'Some::Module'.to_filename
  23. returns
  24. 'some/module'
  25. =end
  26. def to_filename
  27. camel_cased_word = "#{self}"
  28. camel_cased_word.gsub(/::/, '/').downcase
  29. end
  30. =begin
  31. filename = 'some/module.rb'.to_classname
  32. returns
  33. 'Some::Module'
  34. =end
  35. def to_classname
  36. camel_cased_word = "#{self}"
  37. camel_cased_word.gsub!(/\.rb$/, '')
  38. camel_cased_word.split('/').map(&:camelize).join('::')
  39. end
  40. # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis)
  41. # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes
  42. # More details: http://pjambet.github.io/blog/emojis-and-mysql/
  43. def utf8_to_3bytesutf8
  44. return if ActiveRecord::Base.connection_config[:adapter] != 'mysql2'
  45. each_char.select {|c|
  46. if c.bytes.count > 3
  47. Rails.logger.warn "strip out 4 bytes utf8 chars '#{c}' of '#{self}'"
  48. next
  49. end
  50. c
  51. }
  52. .join('') # rubocop:disable Style/MultilineOperationIndentation
  53. end
  54. =begin
  55. text = html_string.html2text
  56. returns
  57. 'string with text only'
  58. =end
  59. def html2text(string_only = false)
  60. string = "#{self}"
  61. # in case of invalid encodeing, strip invalid chars
  62. # see also test/fixtures/mail21.box
  63. # note: string.encode!('UTF-8', 'UTF-8', :invalid => :replace, :replace => '?') was not detecting invalid chars
  64. if !string.valid_encoding?
  65. string = string.chars.select(&:valid_encoding?).join
  66. end
  67. # find <a href=....> and replace it with [x]
  68. link_list = ''
  69. counter = 0
  70. if !string_only
  71. string.gsub!( /<a\s.*?href=("|')(.+?)("|').*?>/ix ) {
  72. link = $2
  73. counter = counter + 1
  74. link_list += "[#{counter}] #{link}\n"
  75. "[#{counter}] "
  76. }
  77. end
  78. # remove style tags with content
  79. string.gsub!( %r{<style(|\s.+?)>(.+?)</style>}im, '')
  80. # remove empty lines
  81. string.gsub!( /^\s*/m, '' )
  82. # pre/code handling 1/2
  83. string.gsub!( %r{<pre>(.+?)</pre>}m ) { |placeholder|
  84. placeholder = placeholder.gsub(/\n/, '###BR###')
  85. }
  86. string.gsub!( %r{<code>(.+?)</code>}m ) { |placeholder|
  87. placeholder = placeholder.gsub(/\n/, '###BR###')
  88. }
  89. # remove all new lines
  90. string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, '')
  91. # blockquote handling
  92. string.gsub!( %r{<blockquote(| [^>]*)>(.+?)</blockquote>}m ) {
  93. "\n" + $2.html2text(true).gsub(/^(.*)$/, '&gt; \1') + "\n"
  94. }
  95. # pre/code handling 2/2
  96. string.gsub!(/###BR###/, "\n" )
  97. # add counting
  98. string.gsub!(/<li(| [^>]*)>/i, "\n* ")
  99. # add hr
  100. string.gsub!(%r{<hr(|/| [^>]*)>}i, "\n___\n")
  101. # add h\d
  102. string.gsub!(%r{</h\d>}i, "\n")
  103. # add new lines
  104. string.gsub!( %r{</div><div(|\s.+?)>}im, "\n" )
  105. string.gsub!( %r{</p><p(|\s.+?)>}im, "\n" )
  106. string.gsub!( %r{<(div|p|pre|br|table|h)(|/| [^>]*)>}i, "\n" )
  107. string.gsub!( %r{</(tr|p|br|div)(|\s.+?)>}i, "\n" )
  108. string.gsub!( %r{</td>}i, ' ' )
  109. # strip all other tags
  110. string.gsub!( /\<.+?\>/, '' )
  111. # replace multiple spaces with one
  112. string.gsub!(/ /, ' ')
  113. # strip all &amp; &lt; &gt; &quot;
  114. string.gsub!( '&amp;', '&' )
  115. string.gsub!( '&lt;', '<' )
  116. string.gsub!( '&gt;', '>' )
  117. string.gsub!( '&quot;', '"' )
  118. string.gsub!( '&nbsp;', ' ' )
  119. # encode html entities like "&#8211;"
  120. string.gsub!( /(&\#(\d+);?)/x ) {
  121. $2.chr
  122. }
  123. # encode html entities like "&#3d;"
  124. string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) {
  125. chr_orig = $1
  126. hex = $2.hex
  127. if hex
  128. chr = hex.chr
  129. if chr
  130. chr_orig = chr
  131. else
  132. chr_orig
  133. end
  134. else
  135. chr_orig
  136. end
  137. # check valid encoding
  138. begin
  139. if !chr_orig.encode('UTF-8').valid_encoding?
  140. chr_orig = '?'
  141. end
  142. rescue
  143. chr_orig = '?'
  144. end
  145. chr_orig
  146. }
  147. # remove tailing empty spaces
  148. string.gsub!(/\s+\n$/, "\n")
  149. # remove multiple empty lines
  150. string.gsub!(/\n\n\n/, "\n\n")
  151. string.strip!
  152. # add extracted links
  153. if link_list != ''
  154. string += "\n\n\n" + link_list
  155. end
  156. string.strip
  157. end
  158. =begin
  159. html = text_string.text2html
  160. =end
  161. def text2html
  162. text = CGI.escapeHTML( self )
  163. text.gsub!(/\n/, '<br>')
  164. text.chomp
  165. end
  166. end