Browse Source

Because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis). Unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes. More details: http://pjambet.github.io/blog/emojis-and-mysql/

Martin Edenhofer 10 years ago
parent
commit
bf42e989bb
4 changed files with 28 additions and 4 deletions
  1. 6 1
      app/models/application_model.rb
  2. 15 0
      lib/core_ext/string.rb
  3. 3 1
      test/unit/ticket_test.rb
  4. 4 2
      test/unit/twitter_test.rb

+ 6 - 1
app/models/application_model.rb

@@ -863,9 +863,14 @@ check string/varchar size and cut them if needed
         current_length = attribute[1].to_s.length
         if limit < current_length
           puts "WARNING: cut string because of database length #{self.class.to_s}.#{attribute[0]}(#{limit} but is #{current_length}:#{attribute[1].to_s})"
-          self[attribute[0]] = attribute[1][ 0, limit ]
+          self[ attribute[0] ] = attribute[1][ 0, limit ]
         end
       end
+
+      # strip 4 bytes utf8 chars if needed
+      if column && self[ attribute[0] ]
+        self[attribute[0]] = self[ attribute[0] ].utf8_to_3bytesutf8
+      end
     }
   end
 

+ 15 - 0
lib/core_ext/string.rb

@@ -23,4 +23,19 @@ class String
     camel_cased_word = self.to_s
     camel_cased_word.gsub(/::/, '/').downcase
   end
+
+  # because of mysql inno_db limitations, strip 4 bytes utf8 chars (e. g. emojis)
+  # unfortunaly UTF8mb4 will raise other limitaions of max varchar and lower index sizes
+  # More details: http://pjambet.github.io/blog/emojis-and-mysql/
+  def utf8_to_3bytesutf8
+    return if ActiveRecord::Base.connection_config[:adapter] != 'mysql2'
+    self.each_char.select {|c|
+      if c.bytes.count > 3
+        puts "WARNING: strip out 4 bytes utf8 chars '#{c}' of '#{ self }'"
+        next
+      end
+      c
+    }
+    .join('')
+  end
 end

+ 3 - 1
test/unit/ticket_test.rb

@@ -25,13 +25,15 @@ class TicketTest < ActiveSupport::TestCase
       :to                     => 'some_recipient@example.com',
       :subject                => 'some subject',
       :message_id             => 'some@id',
-      :body                   => 'some message',
+      :body                   => 'some message article_inbound 😍😍😍',
       :internal               => false,
       :ticket_article_sender  => Ticket::Article::Sender.where(:name => 'Customer').first,
       :ticket_article_type    => Ticket::Article::Type.where(:name => 'email').first,
       :updated_by_id          => 1,
       :created_by_id          => 1,
     )
+    assert_equal( article_inbound.body, 'some message article_inbound 😍😍😍'.utf8_to_3bytesutf8, 'article_inbound.body verify - inbound' )
+
     ticket = Ticket.find(ticket.id)
     assert_equal( ticket.article_count, 1, 'ticket.article_count verify - inbound' )
     assert_equal( ticket.last_contact.to_s, article_inbound.created_at.to_s, 'ticket.last_contact verify - inbound' )

+ 4 - 2
test/unit/twitter_test.rb

@@ -96,7 +96,7 @@ class TwitterTest < ActiveSupport::TestCase
     end
 
     reply_hash = '#weather' + rand(9999).to_s
-    reply_text = '@armin_theo on my side the weather is also nice! ' + reply_hash
+    reply_text = '@armin_theo on my side the weather is also nice! 😍😍😍 ' + reply_hash
     tweet = client.update(
       reply_text,
       {
@@ -109,10 +109,12 @@ class TwitterTest < ActiveSupport::TestCase
     # fetch check system account
     Channel.fetch
 
+    reply_text = reply_text.utf8_to_3bytesutf8
+
     # check if follow up article has been created
     assert_equal( article.ticket.articles.count, 2 )
     reply_article = article.ticket.articles.last
-    assert_equal( reply_article.body, reply_text )
+    assert_equal( reply_article.body,  )
 
   end