Added proving backend to detect signatures by diff module 'diffy' to reduce redundancy in article views.

Rolf Schmidt 9 years ago

 gem 'eventmachine'
 gem 'em-websocket'
+gem 'diffy'
 # Gems used only for develop/test and not required
 # in production environments by default.
 group :development, :test do

     delayed_job_active_record (4.1.0)
       activerecord (>= 3.0, < 5)
       delayed_job (>= 3.0, < 5)
+    diffy (3.0.7)
     docile (1.1.5)
     eco (1.0.0)
+  diffy

+module SignatureDetection
+try to detect the signature in list of articles for example
+  signature = SignatureDetection.find_signature(string_list)
+  signature = '...signature possible match...'
+  def self.find_signature(string_list)
+    # hash with possible signature and count of matches in string list
+    possible_signatures = {}
+    # loop all strings in array
+    #for main_string_index in 0 .. string_list.length - 1
+    ( 0..string_list.length - 1 ).each {|main_string_index|
+      break if main_string_index + 1 > string_list.length - 1
+      # loop all all strings in array except of the previous index
+      ( main_string_index + 1..string_list.length - 1 ).each {|second_string_index|
+        # get content of string 1
+        string1_content = string_list[main_string_index]
+        # get content of string 2
+        string2_content = string_list[second_string_index]
+        # diff strings
+        diff_result =, string2_content)
+        # split diff result by new line
+        diff_result_array = diff_result.to_s.split("\n")
+        # define start index for blocks with no difference
+        match_block = nil
+        # loop of lines of the diff result
+        ( 0..diff_result_array.length - 1 ).each {|diff_string_index|
+          # if no block with difference is defined then we try to find a string block without a difference
+          if !match_block
+            match_block = diff_string_index
+          end
+          # get line of diff result with current loop inde
+          line = diff_result_array[diff_string_index]
+          # check if the line starts with
+          # + = new content incoming
+          # - = removed content
+          # \ = end of file
+          # or if the current line is the last line of the diff result
+          next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1
+          # if the count of the lines without any difference is higher than 5 lines
+          if diff_string_index - match_block > 5
+            # define the block size without any difference
+            # except "-" because in this case 1 line is removed to much
+            match_block_total = diff_string_index + (line =~ /^(\\|\+)/i ? -1 : 0)
+            # get string of possible signature
+            match_content = ''
+            ( match_block..match_block_total ).each {|match_block_index|
+              match_content += "#{diff_result_array[match_block_index][1..-1]}\n"
+            }
+            # count the match of the signature in string list to rank
+            # the signature
+            possible_signatures[match_content] ||= 0
+            possible_signatures[match_content] += 1
+          end
+          match_block = nil
+        }
+      }
+    }
+    # loop all possible signature by rating and return highest rating
+    possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score|
+      return content.chomp
+    end
+    nil
+  end
+this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
+  signature_line = SignatureDetection.find_signature_line(signature, string)
+  signature_line = 123
+  or
+  signature_line = nil
+  def self.find_signature_line(signature, string)
+    # try to find the char position of the signature
+    search_position = string.index(signature)
+    return if search_position.nil?
+    # count new lines up to signature
+    search_newlines  = string[0..search_position].split("\n").length + 1
+    search_newlines
+  end

 uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll.
 Das Modul Custom/Kernel/System/Ticket/ sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder?

 Hi Martin,
 ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen).
 Vielen Dank!
 Mit freundlichen Grüßen

   test 'test case I - sender a' do
     # fixtures of sender a
-    fixture_files = [
-      'email_signature_detection/client_a_1.txt',
-      'email_signature_detection/client_a_2.txt',
-      'email_signature_detection/client_a_3.txt',
-    ]
-    # detect signature
-    match_structure = ''
-    # tests
-    # 'email_signature_detection/client_a_1.txt'
-    result_should = {
-      line: 9
+    fixture_files = {
+      'email_signature_detection/client_a_1.txt' => { line: 10 },
+      'email_signature_detection/client_a_2.txt' => { line: 20 },
+      'email_signature_detection/client_a_3.txt' => { line: 6 },
-    # 'email_signature_detection/client_a_2.txt'
-    result_should = {
-      line: 7
-    }
+    fixture_files_string_list = []
-    # 'email_signature_detection/client_a_3.txt'
-    result_should = {
-      line: 7
-    }
-    assert(true)
+    fixture_files.keys.each do |filepath|
+      file_content = ''
+      file ="#{Rails.root}/test/fixtures/#{filepath}", 'r')
+      while (line = file.gets)
+        file_content += line
+      end
+      file.close
+      fixture_files[filepath][:content] = file_content
+      fixture_files_string_list.push(file_content)
+    end
+    signature = SignatureDetection.find_signature(fixture_files_string_list)
+    expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2\n12345 Hornhausen\nTel.: +49 911 6760\nFax: +49 911 85 6760\nMobil: +49 173 911\nE-Mail:\n\n\nMusik AG | Kastanien 2 | 12345 Hornhausen\nSitz der AG: Hornhausen, HRB xxxxx | USt.-ID: DE 111222333444\nVorstand: Marc Smith, Weber Huber\nAufsichtsrat: Max Mix (Vors.)"
+    assert_equal(expected_signature, signature)
+    fixture_files.keys.each do |filepath|
+      expected_signature_position = fixture_files[filepath][:line]
+      assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
+    end
   test 'test case II - sender b' do
-    # fixtures of sender a
-    fixture_files = [
-      'email_signature_detection/client_b_1.txt',
-      'email_signature_detection/client_b_2.txt',
-      'email_signature_detection/client_b_3.txt',
-    ]
-    # detect signature
-    match_structure = ''
-    # tests
-    # 'email_signature_detection/client_b_1.txt'
-    result_should = {
-      line: 27
+    fixture_files = {
+      'email_signature_detection/client_b_1.txt' => { line: 26 },
+      'email_signature_detection/client_b_2.txt' => { line: 4 },
+      'email_signature_detection/client_b_3.txt' => { line: 6 },
-    # 'email_signature_detection/client_b_2.txt'
-    result_should = {
-      line: 5
-    }
+    fixture_files_string_list = []
-    # 'email_signature_detection/client_b_3.txt'
-    result_should = {
-      line: 7
-    }
-    assert(true)
+    fixture_files.keys.each do |filepath|
+      file_content = ''
+      file ="#{Rails.root}/test/fixtures/#{filepath}", 'r')
+      while (line = file.gets)
+        file_content += line
+      end
+      file.close
+      fixture_files[filepath][:content] = file_content
+      fixture_files_string_list.push(file_content)
+    end
+    signature = SignatureDetection.find_signature(fixture_files_string_list)
+    expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139\nE-Mail Günter.Lä<mailto:Günter.Lä>\n\nExample. Zusammen für eine bessere Welt.\n[cid:image001.png@01CE92A6.EC495B60]<>\n\n[cid:image002.png@01CE92A6.EC495B60]<>\n\n[cid:image003.png@01CE92A6.EC495B60]<>\n\n[cid:image004.png@01CE92A6.EC495B60]<>\n\n[cid:image005.jpg@01CE92A6.EC495B60]<>\n\n[cid:image006.png@01CE92A6.EC495B60]<>\n\nSitz der Gesellschaft: Max City, Amtsgericht Max City HRB Nr. 1234\nGeschäftsführer: Bob Smith\nVorsitzender des Aufsichtsrats: Alex Marx"
+    assert_equal(expected_signature, signature)
+    fixture_files.keys.each do |filepath|
+      expected_signature_position = fixture_files[filepath][:line]
+      assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
+    end