string.rb 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. class String
  2. def message_quote
  3. quote = self.split("\n")
  4. body_quote = ''
  5. quote.each do |line|
  6. body_quote = body_quote + '> ' + line + "\n"
  7. end
  8. body_quote
  9. end
  10. def word_wrap(*args)
  11. options = args.extract_options!
  12. unless args.blank?
  13. options[:line_width] = args[0] || 82
  14. end
  15. options.reverse_merge!(:line_width => 82)
  16. lines = self
  17. lines.split("\n").collect do |line|
  18. line.length > options[:line_width] ? line.gsub(/(.{1,#{options[:line_width]}})(\s+|$)/, "\\1\n").strip : line
  19. end * "\n"
  20. end
  21. def to_filename
  22. camel_cased_word = self.to_s
  23. camel_cased_word.gsub(/::/, '/').downcase
  24. end
  25. # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis)
  26. # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes
  27. # More details: http://pjambet.github.io/blog/emojis-and-mysql/
  28. def utf8_to_3bytesutf8
  29. return if ActiveRecord::Base.connection_config[:adapter] != 'mysql2'
  30. self.each_char.select {|c|
  31. if c.bytes.count > 3
  32. puts "WARNING: strip out 4 bytes utf8 chars '#{c}' of '#{ self }'"
  33. next
  34. end
  35. c
  36. }
  37. .join('')
  38. end
  39. =begin
  40. text = html_string.html2text
  41. =end
  42. def html2text
  43. string = self
  44. # in case of invalid encodeing, strip invalid chars
  45. # see also test/fixtures/mail21.box
  46. # note: string.encode!('UTF-8', 'UTF-8', :invalid => :replace, :replace => '?') was not detecting invalid chars
  47. if !string.valid_encoding?
  48. string = string.chars.select { |c| c.valid_encoding? }.join
  49. end
  50. # find <a href=....> and replace it with [x]
  51. link_list = ''
  52. counter = 0
  53. string.gsub!( /<a\s.*?href=("|')(.+?)("|').*?>/ix ) { |item|
  54. link = $2
  55. counter = counter + 1
  56. link_list += "[#{counter}] #{link}\n"
  57. "[#{counter}] "
  58. }
  59. # remove empty lines
  60. string.gsub!( /^\s*/m, '' )
  61. # pre/code handling 1/2
  62. string.gsub!( /<pre>(.+?)<\/pre>/m ) { |placeholder|
  63. placeholder = placeholder.gsub(/\n/, "###BR###")
  64. }
  65. string.gsub!( /<code>(.+?)<\/code>/m ) { |placeholder|
  66. placeholder = placeholder.gsub(/\n/, "###BR###")
  67. }
  68. # remove all new lines
  69. string.gsub!( /(\n\r|\r\r\n|\r\n|\n)/, '' )
  70. # pre/code handling 2/2
  71. string.gsub!( /###BR###/, "\n" )
  72. # add counting
  73. string.gsub!(/<li(| [^>]*)>/i, "\n* ")
  74. # add quoting
  75. string.gsub!(/<blockquote(| [^>]*)>/i, '> ')
  76. # add hr
  77. string.gsub!(/<hr(|\/| [^>]*)>/i, "___\n")
  78. # add new lines
  79. string.gsub!( /\<(br|table)(|\/| [^>]*)\>/i, "\n" )
  80. string.gsub!( /\<\/(div|p|pre|blockquote|table|tr)(|\s.+?)\>/i, "\n" )
  81. string.gsub!( /\<\/td\>/i, ' ' )
  82. # strip all other tags
  83. string.gsub!( /\<.+?\>/, '' )
  84. # strip all &amp; &lt; &gt; &quot;
  85. string.gsub!( '&amp;', '&' )
  86. string.gsub!( '&lt;', '<' )
  87. string.gsub!( '&gt;', '>' )
  88. string.gsub!( '&quot;', '"' )
  89. string.gsub!( '&nbsp;', ' ' )
  90. # encode html entities like "&#8211;"
  91. string.gsub!( /(&\#(\d+);?)/x ) { |item|
  92. $2.chr
  93. }
  94. # encode html entities like "&#3d;"
  95. string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) { |item|
  96. chr_orig = $1
  97. hex = $2.hex
  98. if hex
  99. chr = hex.chr
  100. if chr
  101. chr_orig = chr
  102. else
  103. chr_orig
  104. end
  105. else
  106. chr_orig
  107. end
  108. # check valid encoding
  109. begin
  110. if !chr_orig.encode('UTF-8').valid_encoding?
  111. chr_orig = '?'
  112. end
  113. rescue
  114. chr_orig = '?'
  115. end
  116. chr_orig
  117. }
  118. # remove tailing empty spaces
  119. string.gsub!(/\s+\n$/, "\n")
  120. # remove multible empty lines
  121. string.gsub!(/\n\n\n/, "\n\n")
  122. # add extracted links
  123. if link_list != ''
  124. string += "\n\n" + link_list
  125. end
  126. string.strip
  127. end
  128. =begin
  129. html = text_string.text2html
  130. =end
  131. def text2html
  132. text = CGI.escapeHTML( self )
  133. text.gsub!(/\n/, '<br>')
  134. text.chomp
  135. end
  136. end