string.rb 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. class String
  2. def message_quote
  3. quote = split("\n")
  4. body_quote = ''
  5. quote.each do |line|
  6. body_quote = body_quote + '> ' + line + "\n"
  7. end
  8. body_quote
  9. end
  10. def word_wrap(*args)
  11. options = args.extract_options!
  12. unless args.blank?
  13. options[:line_width] = args[0] || 82
  14. end
  15. options.reverse_merge!(line_width: 82)
  16. lines = self
  17. lines.split("\n").collect do |line|
  18. line.length > options[:line_width] ? line.gsub(/(.{1,#{options[:line_width]}})(\s+|$)/, "\\1\n").strip : line
  19. end * "\n"
  20. end
  21. =begin
  22. filename = 'Some::Module'.to_filename
  23. returns
  24. 'some/module'
  25. =end
  26. def to_filename
  27. camel_cased_word = "#{self}" # rubocop:disable Style/UnneededInterpolation
  28. camel_cased_word.gsub(/::/, '/')
  29. .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
  30. .gsub(/([a-z\d])([A-Z])/, '\1_\2')
  31. .tr('-', '_').downcase
  32. end
  33. =begin
  34. filename = 'some/module.rb'.to_classname
  35. returns
  36. 'Some::Module'
  37. =end
  38. def to_classname
  39. camel_cased_word = "#{self}" # rubocop:disable Style/UnneededInterpolation
  40. camel_cased_word.gsub!(/\.rb$/, '')
  41. camel_cased_word.split('/').map(&:camelize).join('::')
  42. end
  43. # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis)
  44. # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes
  45. # More details: http://pjambet.github.io/blog/emojis-and-mysql/
  46. def utf8_to_3bytesutf8
  47. return self if Rails.application.config.db_4bytes_utf8
  48. each_char.select {|c|
  49. if c.bytes.count > 3
  50. Rails.logger.warn "strip out 4 bytes utf8 chars '#{c}' of '#{self}'"
  51. next
  52. end
  53. c
  54. }
  55. .join('')
  56. end
  57. =begin
  58. text = html_string.html2text
  59. returns
  60. 'string with text only'
  61. =end
  62. def html2text(string_only = false)
  63. string = "#{self}" # rubocop:disable Style/UnneededInterpolation
  64. # in case of invalid encodeing, strip invalid chars
  65. # see also test/fixtures/mail21.box
  66. # note: string.encode!('UTF-8', 'UTF-8', :invalid => :replace, :replace => '?') was not detecting invalid chars
  67. if !string.valid_encoding?
  68. string = string.chars.select(&:valid_encoding?).join
  69. end
  70. # find <a href=....> and replace it with [x]
  71. link_list = ''
  72. counter = 0
  73. if !string_only
  74. string.gsub!( /<a\s.*?href=("|')(.+?)("|').*?>/ix ) {
  75. link = $2
  76. counter = counter + 1
  77. link_list += "[#{counter}] #{link}\n"
  78. "[#{counter}] "
  79. }
  80. end
  81. # remove style tags with content
  82. string.gsub!( %r{<style(|\s.+?)>(.+?)</style>}im, '')
  83. # remove empty lines
  84. string.gsub!( /^\s*/m, '' )
  85. # pre/code handling 1/2
  86. string.gsub!( %r{<pre>(.+?)</pre>}m ) { |placeholder|
  87. placeholder = placeholder.gsub(/\n/, '###BR###')
  88. }
  89. string.gsub!( %r{<code>(.+?)</code>}m ) { |placeholder|
  90. placeholder = placeholder.gsub(/\n/, '###BR###')
  91. }
  92. # insert spaces on [A-z]\n[A-z]
  93. string.gsub!( /([A-z])\n([A-z])/m, '\1 \2' )
  94. # remove all new lines
  95. string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, '')
  96. # blockquote handling
  97. string.gsub!( %r{<blockquote(| [^>]*)>(.+?)</blockquote>}m ) {
  98. "\n" + $2.html2text(true).gsub(/^(.*)$/, '&gt; \1') + "\n"
  99. }
  100. # pre/code handling 2/2
  101. string.gsub!(/###BR###/, "\n" )
  102. # add counting
  103. string.gsub!(/<li(| [^>]*)>/i, "\n* ")
  104. # add hr
  105. string.gsub!(%r{<hr(|/| [^>]*)>}i, "\n___\n")
  106. # add h\d
  107. string.gsub!(%r{</h\d>}i, "\n")
  108. # add new lines
  109. string.gsub!( %r{</div><div(|\s.+?)>}im, "\n" )
  110. string.gsub!( %r{</p><p(|\s.+?)>}im, "\n" )
  111. string.gsub!( %r{<(div|p|pre|br|table|h)(|/| [^>]*)>}i, "\n" )
  112. string.gsub!( %r{</(tr|p|br|div)(|\s.+?)>}i, "\n" )
  113. string.gsub!( %r{</td>}i, ' ' )
  114. # strip all other tags
  115. string.gsub!( /\<.+?\>/, '' )
  116. # replace multiple spaces with one
  117. string.gsub!(/ /, ' ')
  118. # try HTMLEntities, if it fails on invalid signes, use manual way
  119. begin
  120. coder = HTMLEntities.new
  121. string = coder.decode(string)
  122. rescue
  123. # strip all &amp; &lt; &gt; &quot;
  124. string.gsub!( '&amp;', '&' )
  125. string.gsub!( '&lt;', '<' )
  126. string.gsub!( '&gt;', '>' )
  127. string.gsub!( '&quot;', '"' )
  128. string.gsub!( '&nbsp;', ' ' )
  129. # encode html entities like "&#8211;"
  130. string.gsub!( /(&\#(\d+);?)/x ) {
  131. $2.chr
  132. }
  133. # encode html entities like "&#3d;"
  134. string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) {
  135. chr_orig = $1
  136. hex = $2.hex
  137. if hex
  138. chr = hex.chr
  139. if chr
  140. chr_orig = chr
  141. else
  142. chr_orig
  143. end
  144. else
  145. chr_orig
  146. end
  147. # check valid encoding
  148. begin
  149. if !chr_orig.encode('UTF-8').valid_encoding?
  150. chr_orig = '?'
  151. end
  152. rescue
  153. chr_orig = '?'
  154. end
  155. chr_orig
  156. }
  157. end
  158. # remove tailing empty spaces
  159. string.gsub!(/\s+\n$/, "\n")
  160. # remove multiple empty lines
  161. string.gsub!(/\n\n\n/, "\n\n")
  162. string.strip!
  163. # add extracted links
  164. if link_list != ''
  165. string += "\n\n\n" + link_list
  166. end
  167. string.strip
  168. end
  169. =begin
  170. html = text_string.text2html
  171. =end
  172. def text2html
  173. text = CGI.escapeHTML(self)
  174. text.gsub!(/\n/, '<br>')
  175. text.chomp
  176. end
  177. end