signature_detection.rb 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. module SignatureDetection
  2. =begin
  3. try to detect the signature in list of articles for example
  4. messages = [
  5. {
  6. content: 'some content',
  7. content_type: 'text/plain',
  8. },
  9. ]
  10. signature = SignatureDetection.find_signature(messages)
  11. returns
  12. signature = '...signature possible match...'
  13. =end
  14. def self.find_signature(messages)
  15. string_list = []
  16. messages.each do |message|
  17. if message[:content_type].match?(%r{text/html}i)
  18. string_list.push message[:content].html2text(true)
  19. next
  20. end
  21. string_list.push message[:content]
  22. end
  23. # hash with possible signature and count of matches in string list
  24. possible_signatures = {}
  25. # loop all strings in array
  26. string_list.each_with_index do |_main_string, main_string_index|
  27. break if main_string_index + 1 > string_list.length - 1
  28. # loop all all strings in array except of the previous index
  29. ( main_string_index + 1..string_list.length - 1 ).each do |second_string_index|
  30. # get content of string 1
  31. string1_content = string_list[main_string_index]
  32. # get content of string 2
  33. string2_content = string_list[second_string_index]
  34. # diff strings
  35. diff_result = Diffy::Diff.new(string1_content, string2_content)
  36. # split diff result by new line
  37. diff_result_array = diff_result.to_s.split("\n")
  38. # define start index for blocks with no difference
  39. match_block = nil
  40. # loop of lines of the diff result
  41. ( 0..diff_result_array.length - 1 ).each do |diff_string_index|
  42. # if no block with difference is defined then we try to find a string block without a difference
  43. if !match_block
  44. match_block = diff_string_index
  45. end
  46. # get line of diff result with current loop inde
  47. line = diff_result_array[diff_string_index]
  48. # check if the line starts with
  49. # + = new content incoming
  50. # - = removed content
  51. # \ = end of file
  52. # or if the current line is the last line of the diff result
  53. next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1
  54. # if the count of the lines without any difference is higher than 4 lines
  55. if diff_string_index - match_block > 4
  56. # define the block size without any difference
  57. # except "-" because in this case 1 line is removed to much
  58. match_block_total = diff_string_index + (line.match?(/^(\\|\+)/i) ? -1 : 0)
  59. # get string of possible signature, use only the first 10 lines
  60. match_max_content = 0
  61. match_content = ''
  62. ( match_block..match_block_total ).each do |match_block_index|
  63. break if match_max_content == 10
  64. match_max_content += 1
  65. match_content += "#{diff_result_array[match_block_index][1..-1]}\n"
  66. end
  67. # count the match of the signature in string list to rank
  68. # the signature
  69. possible_signatures[match_content] ||= 0
  70. possible_signatures[match_content] += 1
  71. break
  72. end
  73. match_block = nil
  74. end
  75. end
  76. end
  77. # loop all possible signature by rating and return highest rating
  78. possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score|
  79. return content.chomp
  80. end
  81. nil
  82. end
  83. =begin
  84. this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
  85. signature_line = SignatureDetection.find_signature_line(signature, message, content_type)
  86. returns
  87. signature_line = 123
  88. or
  89. signature_line = nil
  90. =end
  91. def self.find_signature_line(signature, string, content_type)
  92. if content_type.match?(%r{text/html}i)
  93. string = string.html2text(true)
  94. end
  95. # try to find the char position of the signature
  96. search_position = string.index(signature)
  97. return if search_position.nil?
  98. # count new lines up to signature
  99. string[0..search_position].split("\n").length + 1
  100. end
  101. =begin
  102. find signature line of message by user and article
  103. signature_line = SignatureDetection.find_signature_line_by_article(user, article)
  104. returns
  105. signature_line = 123
  106. or
  107. signature_line = nil
  108. =end
  109. def self.find_signature_line_by_article(user, article)
  110. return if !user.preferences[:signature_detection]
  111. SignatureDetection.find_signature_line(
  112. user.preferences[:signature_detection],
  113. article.body,
  114. article.content_type,
  115. )
  116. end
  117. =begin
  118. this function will search for a signature string in all articles of a given user_id
  119. signature = SignatureDetection.by_user_id(user_id)
  120. returns
  121. signature = '...signature possible match...'
  122. =end
  123. def self.by_user_id(user_id)
  124. type = Ticket::Article::Type.lookup(name: 'email')
  125. sender = Ticket::Article::Sender.lookup(name: 'Customer')
  126. tickets = Ticket.where(
  127. created_by_id: user_id,
  128. create_article_type_id: type.id,
  129. create_article_sender_id: sender.id
  130. ).limit(5).order(id: :desc)
  131. article_bodies = []
  132. tickets.each do |ticket|
  133. article = ticket.articles.first
  134. next if !article
  135. data = {
  136. content: article.body,
  137. content_type: article.content_type,
  138. }
  139. article_bodies.push data
  140. end
  141. find_signature(article_bodies)
  142. end
  143. =begin
  144. rebuild signature for each user
  145. SignatureDetection.rebuild_all_user
  146. returns
  147. true/false
  148. =end
  149. def self.rebuild_all_user
  150. User.select('id').where(active: true).order(id: :desc).each do |local_user|
  151. rebuild_user(local_user.id)
  152. end
  153. true
  154. end
  155. =begin
  156. rebuild signature detection for user
  157. SignatureDetection.rebuild_user(user_id)
  158. returns
  159. true/false
  160. =end
  161. def self.rebuild_user(user_id)
  162. signature_detection = by_user_id(user_id)
  163. return if !signature_detection
  164. user = User.find(user_id)
  165. return if user.preferences[:signature_detection] == signature_detection
  166. user.preferences[:signature_detection] = signature_detection
  167. user.save
  168. true
  169. end
  170. =begin
  171. rebuild signature for all articles
  172. SignatureDetection.rebuild_all_articles
  173. returns
  174. true/false
  175. =end
  176. def self.rebuild_all_articles
  177. article_type = Ticket::Article::Type.lookup(name: 'email')
  178. Ticket::Article.select('id').where(type_id: article_type.id).order(id: :desc).each do |local_article|
  179. article = Ticket::Article.find(local_article.id)
  180. user = User.find(article.created_by_id)
  181. next if !user.preferences[:signature_detection]
  182. signature_line = find_signature_line(
  183. user.preferences[:signature_detection],
  184. article.body,
  185. article.content_type,
  186. )
  187. next if !signature_line
  188. next if article.preferences[:signature_detection] == signature_line
  189. article.preferences[:signature_detection] = signature_line
  190. article.save
  191. end
  192. true
  193. end
  194. end