encode.rb 1010 B

12345678910111213141516171819202122232425262728293031323334353637
  1. #require 'iconv'
  2. module Encode
  3. def self.conv (charset, string)
  4. # return if string is false
  5. return string if !string
  6. # if no charset is given, use LATIN1 as default
  7. if !charset || charset == 'US-ASCII' || charset == 'ASCII-8BIT'
  8. charset = 'ISO-8859-15'
  9. end
  10. # validate already existing utf8 strings
  11. if charset.downcase == 'utf8' || charset.downcase == 'utf-8'
  12. begin
  13. # return if encoding is valid
  14. utf8 = string.force_encoding('UTF-8')
  15. return utf8 if utf8.valid_encoding?
  16. # try to encode from Windows-1252 to utf8
  17. string.encode!( 'UTF-8', 'Windows-1252' )
  18. rescue EncodingError => e
  19. puts "Bad encoding: #{string.inspect}"
  20. string = string.encode!( 'UTF-8', invalid: :replace, undef: :replace, replace: '?' )
  21. end
  22. return string
  23. end
  24. # puts '-------' + charset
  25. # puts string
  26. # convert string
  27. string.encode!( 'UTF-8', charset )
  28. # Iconv.conv( 'UTF8', charset, string )
  29. end
  30. end