Browse Source

Improved email parsing (charset handling).

Martin Edenhofer 13 years ago
parent
commit
2a1e9dd65d

+ 79 - 50
app/models/channel/email_parser.rb

@@ -1,24 +1,79 @@
 require 'mail'
-
+require 'iconv'
 class Channel::EmailParser
-
-  def parse (channel, msg)
+  def conv (charset, string)
+    if charset == 'US-ASCII' then
+      charset = 'LATIN1'
+    end
+    Iconv.conv("UTF8", charset, string)
+  end
+  
+  def parse (msg)
+    data = {}
     mail = Mail.new( msg )
-    from_email        = Mail::Address.new( mail[:from].value ).address
-    from_display_name = Mail::Address.new( mail[:from].value ).display_name
+
+    # headers
+    data[:from_email]        = Mail::Address.new( mail[:from].value ).address
+    data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name
+    ['from', 'to', 'cc', 'subject'].each {|key|
+      data[key.to_sym] = mail[key] ? conv( mail[key].charset || 'LATIN1', mail[key].to_s) : nil
+    }
+
+    # message id
+    data[:message_id] = mail['message_id'] ? mail['message_id'].to_s : nil
+
+    # body
+   #   plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
+  #    html_part = message.html_part ? message.html_part.body.decoded : nil
+    data[:plain_part] = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
+    data[:plain_part] = conv( mail.body.charset || 'LATIN1', data[:plain_part] )
+
+    # attachments
+    if mail.attachments
+      data[:attachments] = []
+      mail.attachments.each do |attachment|
+        
+        # get file preferences
+        headers = {}
+        attachment.header.fields.each do |f|
+          headers[f.name] = f.value
+        end
+        headers_store = {}
+        headers_store['Mime-Type'] = attachment.mime_type
+        if attachment.charset
+          headers_store['Charset'] = attachment.charset
+        end
+        ['Content-ID', 'Content-Type'].each do |item|
+          if headers[item]
+            headers_store[item] = headers[item]
+          end
+        end
+        attachment = {
+          :data        => attachment.body.decoded,
+          :filename    => attachment.filename,
+          :preferences => headers_store          
+        }
+        data[:attachments].push attachment
+      end
+    end
+    return data
+  end
+
+  def process(channel, msg)
+    mail = parse( msg )
 
     # use transaction
     ActiveRecord::Base.transaction do
 
-      user = User.where( :email => from_email ).first
+      user = User.where( :email => mail[:from_email] ).first
       if !user then
         puts 'create user...'
         roles = Role.where( :name => 'Customer' )
         user = User.create(
-          :login          => from_email,
-          :firstname      => from_display_name,
+          :login          => mail[:from_email],
+          :firstname      => mail[:from_display_name],
           :lastname       => '',
-          :email          => from_email,
+          :email          => mail[:from_email],
           :password       => '',
           :active         => true,
           :roles          => roles,
@@ -29,16 +84,9 @@ class Channel::EmailParser
       # set current user
       UserInfo.current_user_id = user.id
   
-      def conv (charset, string)
-        if charset == 'US-ASCII' then
-          charset = 'LATIN1'
-        end
-        Iconv.conv("UTF8", charset, string)
-      end
-
       # get ticket# from subject
-      ticket = Ticket.number_check( mail[:subject].value )
-      
+      ticket = Ticket.number_check( mail[:subject] )
+
       # set ticket state to open if not new
       if ticket
         ticket_state      = Ticket::State.find( ticket.ticket_state_id )
@@ -54,7 +102,7 @@ class Channel::EmailParser
         ticket = Ticket.create(
           :group_id           => channel[:group_id],
           :customer_id        => user.id,
-          :title              => conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s),
+          :title              => mail[:subject],
           :ticket_state_id    => Ticket::State.where(:name => 'new').first.id,
           :ticket_priority_id => Ticket::Priority.where(:name => '2 normal').first.id,
           :created_by_id      => user.id
@@ -62,19 +110,17 @@ class Channel::EmailParser
       end
   
       # import mail
-      plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
-  #    html_part = message.html_part ? message.html_part.body.decoded : nil
       article = Ticket::Article.create(
         :created_by_id            => user.id,
         :ticket_id                => ticket.id, 
         :ticket_article_type_id   => Ticket::Article::Type.where(:name => 'email').first.id,
         :ticket_article_sender_id => Ticket::Article::Sender.where(:name => 'Customer').first.id,
-        :body                     => conv(mail.body.charset || 'LATIN1', plain_part), 
-        :from                     => mail['from']       ? conv(mail['from'].charset    || 'LATIN1', mail['from'].to_s) : nil,
-        :to                       => mail['to']         ? conv(mail['to'].charset      || 'LATIN1', mail['to'].to_s) : nil,
-        :cc                       => mail['cc']         ? conv(mail['cc'].charset      || 'LATIN1', mail['cc'].to_s) : nil,
-        :subject                  => mail['subject']    ? conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s) : nil,
-        :message_id               => mail['message_id'] ? mail['message_id'].to_s : nil,
+        :body                     => mail[:plain_part], 
+        :from                     => mail[:from],
+        :to                       => mail[:to],
+        :cc                       => mail[:cc],
+        :subject                  => mail[:subject],
+        :message_id               => mail[:message_id],
         :internal                 => false 
       )
 
@@ -88,35 +134,18 @@ class Channel::EmailParser
       )
 
       # store attachments
-      if mail.attachments
-        mail.attachments.each do |attachment|
-          
-          # get file preferences
-          headers = {}
-          attachment.header.fields.each do |f|
-            headers[f.name] = f.value
-          end
-          headers_store = {}
-          headers_store['Mime-Type'] = attachment.mime_type
-          if attachment.charset
-            headers_store['Charset'] = attachment.charset
-          end
-          ['Content-ID', 'Content-Type'].each do |item|
-            if headers[item]
-              headers_store[item] = headers[item]
-            end
-          end
-          
-          # store file
+      if mail[:attachments]
+        mail[:attachments].each do |attachment|
           Store.add(
             :object      => 'Ticket::Article',
             :o_id        => article.id,
-            :data        => attachment.body.decoded,
-            :filename    => attachment.filename,
-            :preferences => headers_store
+            :data        => attachment[:data],
+            :filename    => attachment[:filename],
+            :preferences => attachment[:preferences]
           )
         end
       end
+      return ticket, article, user
     end
 
     # execute ticket events      

+ 2 - 2
app/models/channel/imap.rb

@@ -6,7 +6,7 @@ class Channel::IMAP < Channel::EmailParser
   def fetch (channel)
     puts "fetching imap (#{channel[:options][:host]}/#{channel[:options][:user]})"
 
-    imap = Net::IMAP.new(channel[:options][:host], 993, true )
+    imap = Net::IMAP.new(channel[:options][:host], 993, true, nil, false )
     imap.authenticate('LOGIN', channel[:options][:user], channel[:options][:password])
     imap.select('INBOX')
     count     = 0
@@ -18,7 +18,7 @@ class Channel::IMAP < Channel::EmailParser
 #      puts msg.to_s
 
       # delete email from server after article was created      
-      if parse(channel, msg)
+      if process(channel, msg)
         imap.store(message_id, "+FLAGS", [:Deleted])
       end
     end

+ 1 - 1
app/models/channel/pop3.rb

@@ -16,7 +16,7 @@ class Channel::POP3 < Channel::EmailParser
       puts " - message #{count.to_s}/#{count_all.to_s}"
 
       # delete email from server after article was created
-      if parse(channel, m.pop)
+      if process(channel, m.pop)
         m.delete
       end
     end

+ 194 - 0
test/fixtures/mail1.box

@@ -0,0 +1,194 @@
+From martin@example.com  Thu May  3 12:04:29 2012
+Return-Path: <martin@example.com>
+X-Original-To: info@example.com
+Delivered-To: box@samba.example.com
+Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1])
+	by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D
+	for <info@example.com>; Thu,  3 May 2012 12:04:28 +0100 (BST)
+Subject: =?iso-8859-1?Q?CI_Daten_f=FCr_PublicView_?=
+Mime-Version: 1.0 (Apple Message framework v1257)
+Content-Type: multipart/alternative; boundary="Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394"
+From: <John.Smith@example.com>
+Resent-From: Martin Test <martin@example.com>
+Date: Thu, 3 May 2012 11:36:43 +0200
+Resent-Date: Thu, 3 May 2012 13:04:31 +0200
+Resent-To: info@example.com
+Message-Id: <053EA3703574649ABDAF24D43A05604F327A130@MEMASFRK004.example.com>
+To: <martin@example.com>
+X-Mailer: Apple Mail (2.1257)
+Status: RO
+X-Status: 
+X-Keywords:                 
+X-UID: 82
+
+
+--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394
+Content-Transfer-Encoding: quoted-printable
+Content-Type: text/plain;
+	charset=iso-8859-1
+
+Hallo Martin,
+=20
+wie besprochen hier noch die Daten f=FCr die Intranetseite:
+=20
+Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht unterst=FCtzt =
+oder nicht vorhanden wird Arial 11 Pt genommen
+Schriftfarbe: Schwarz
+Farbe f=FCr die Balken in der Grafik: D7DDE9 (Blau)
+=20
+Wenn noch was fehlt oder du was brauchst sag mir Bescheid.
+=20
+Mit freundlichem Gru=DF=20
+
+John Smith
+Service und Support
+
+Example Service AG & Co.
+Management OHG
+Someware-Str. 4
+xxxxx Someware
+
+Tel.: +49 001 000 46
+Fax: +49 001 000 47
+john.smith@example.com
+www.example.com
+
+OHG mit Sitz in Someware
+AG: Someware - HRA XXX 
+Gesch=E4ftsf=FChrung: Tilman Test, Klaus J=FCrgen Test,
+Bernhard Test, Ulrich Test
+USt-IdNr. DE 1010101010
+
+Pers=F6nlich haftende gesch=E4ftsf=FChrende Gesellschafterin:
+Marie Test Example Stiftung, Someware
+Vorstand: Rolf Test
+
+Pers=F6nlich haftende Gesellschafterin:
+Example Service AG, Someware
+AG: Someware - HRB xxx
+Vorstand: Marie Test
+=20=
+
+--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394
+Content-Transfer-Encoding: quoted-printable
+Content-Type: text/html;
+	charset=iso-8859-1
+
+<html><head><base href=3D"x-msg://2849/"></head><body style=3D"word-wrap: =
+break-word; -webkit-nbsp-mode: space; -webkit-line-break: =
+after-white-space; "><span class=3D"Apple-style-span" =
+style=3D"border-collapse: separate; font-family: Helvetica; font-style: =
+normal; font-variant: normal; font-weight: normal; letter-spacing: =
+normal; line-height: normal; orphans: 2; text-align: -webkit-auto; =
+text-indent: 0px; text-transform: none; white-space: normal; widows: 2; =
+word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; =
+-webkit-border-vertical-spacing: 0px; =
+-webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: =
+auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div =
+lang=3D"DE" link=3D"blue" vlink=3D"purple"><div class=3D"Section1" =
+style=3D"page: Section1; "><div style=3D"margin-top: 0cm; margin-right: =
+0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
+font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
+font-family: Arial, sans-serif; ">Hallo =
+Martin,<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; =
+"><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; ">wie besprochen hier noch die =
+Daten f=FCr die Intranetseite:<o:p></o:p></span></div><div =
+style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
+margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
+sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
+sans-serif; "><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: =
+0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
+font-size: 11pt; font-family: Calibri, sans-serif; "><span =
+style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
+">Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht =
+unterst=FCtzt oder nicht vorhanden wird Arial 11 Pt =
+genommen<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; ">Schriftfarbe: =
+Schwarz<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; ">Farbe f=FCr die Balken in der =
+Grafik: D7DDE9 (Blau)<o:p></o:p></span></div><div style=3D"margin-top: =
+0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
+font-size: 11pt; font-family: Calibri, sans-serif; "><span =
+style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
+"><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; ">Wenn noch was fehlt oder du was =
+brauchst sag mir Bescheid.<o:p></o:p></span></div><div =
+style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
+margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
+sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
+sans-serif; "><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: =
+0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
+font-size: 11pt; font-family: Calibri, sans-serif; "><span =
+style=3D"font-size: 10pt; font-family: Arial, sans-serif; ">Mit =
+freundlichem Gru=DF<span =
+class=3D"Apple-converted-space">&nbsp;</span><br><br>John =
+Smith<br>Service und Support<br><br>Example Service AG &amp; =
+Co.<o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
+0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
+font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
+font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. =
+4<br>xxxxx Someware<br><br></span><span style=3D"font-size: 10pt; =
+font-family: Arial, sans-serif; "><o:p></o:p></span></div><div =
+style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
+margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
+sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
+sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 =
+472</span><span style=3D"font-size: 10pt; font-family: Arial, =
+sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
+10pt; font-family: Arial, sans-serif; "><a =
+href=3D"mailto:john.smith@example.com" style=3D"color: blue; =
+text-decoration: underline; ">john.smith@example.com</a></span><span =
+style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
+"><o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
+0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
+font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
+font-family: Arial, sans-serif; "><a href=3D"http://www.example.com" =
+style=3D"color: blue; text-decoration: underline; =
+">www.example.com</a></span><span style=3D"font-size: 10pt; font-family: =
+Arial, sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: =
+0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
+font-size: 11pt; font-family: Calibri, sans-serif; "><span =
+style=3D"font-size: 8pt; font-family: Arial, sans-serif; "><br>OHG mit =
+Sitz in Someware<br>AG: Someware - HRA 4158<br>Gesch=E4ftsf=FChrung: =
+Tilman Test, Klaus J=FCrgen Test,</span><span style=3D"font-size: 8pt; =
+font-family: Arial, sans-serif; "><o:p></o:p></span></div><div =
+style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
+margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
+sans-serif; "><span style=3D"font-size: 8pt; font-family: Arial, =
+sans-serif; ">Bernhard Test, Ulrich Test<br>USt-IdNr. DE =
+1010101010<br><br>Pers=F6nlich haftende gesch=E4ftsf=FChrende =
+Gesellschafterin:</span><span style=3D"font-size: 8pt; font-family: =
+Arial, sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: =
+0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
+font-size: 11pt; font-family: Calibri, sans-serif; "><span =
+style=3D"font-size: 8pt; font-family: Arial, sans-serif; ">Marie =
+Test Example Stiftung, Someware<br>Vorstand: Rolf =
+Test<br><br>Pers=F6nlich haftende Gesellschafterin:</span><span =
+style=3D"font-size: 8pt; font-family: Arial, sans-serif; =
+"><o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
+0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
+font-family: Calibri, sans-serif; "><span style=3D"font-size: 8pt; =
+font-family: Arial, sans-serif; ">Example Service AG, =
+Someware<br>AG: Someware - HRB xxx<br>Vorstand: Marie =
+Test</span><span style=3D"font-size: 8pt; font-family: Arial, =
+sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
+margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
+11pt; font-family: Calibri, sans-serif; =
+"><o:p>&nbsp;</o:p></div></div></div></span></body></html>=
+
+--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394--
+

+ 32 - 0
test/unit/email_parser_test.rb

@@ -0,0 +1,32 @@
+# encoding: utf-8
+require 'test_helper'
+ 
+class EmailParserTest < ActiveSupport::TestCase
+  test 'parse' do
+    files = [
+      {
+        :data     => IO.read('test/fixtures/mail1.box'),
+        :body_md5 => 'fb6ed5070ffbb821b67b15b83239e1db',
+        :params   => {
+          :from               => 'John.Smith@example.com',
+          :from_email         => 'John.Smith@example.com',
+          :from_display_name  => nil,
+          :subject            => 'CI Daten für PublicView ',
+        },
+      },
+    ]
+
+    files.each { |file|
+ 
+      parser = Channel::EmailParser.new
+      data = parser.parse( file[:data] )
+      
+      # create md5 of body
+      md5 = Digest::MD5.hexdigest( data[:plain_part] )
+      assert_equal( file[:body_md5], md5 )      
+      file[:params].each { |key, value|
+        assert_equal( file[:params][key.to_sym], data[key.to_sym] )      
+      }
+    }
+  end
+end