signature_detection.rb 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. # Copyright (C) 2012-2024 Zammad Foundation, https://zammad-foundation.org/
  2. module SignatureDetection
  3. =begin
  4. try to detect the signature in list of articles for example
  5. messages = [
  6. {
  7. content: 'some content',
  8. content_type: 'text/plain',
  9. },
  10. ]
  11. signature = SignatureDetection.find_signature(messages)
  12. returns
  13. signature = '...signature possible match...'
  14. =end
  15. def self.find_signature(messages)
  16. signature_candidates = Hash.new(0) # <potential_signature>: <score>
  17. messages = messages.map { |m| m[:content_type].match?(%r{text/html}i) ? m[:content].html2text(true) : m[:content] }
  18. message_pairs = messages.each_cons(2).to_a
  19. diffs = message_pairs.map { |msg_pair| Diffy::Diff.new(*msg_pair).to_s }
  20. # Find the first 5- to 10-line common substring in each diff
  21. diffs.map { |d| d.split("\n") }.each do |diff_lines|
  22. # Get line numbers in diff representing changes (those starting with +, -, \)
  23. delta_indices = diff_lines.map.with_index { |l, i| l.start_with?(' ') ? nil : i }.compact
  24. # Add boundaries at start and end
  25. delta_indices.unshift(-1).push(diff_lines.length)
  26. # Find first gap of 5+ lines between deltas (i.e., the common substring's location)
  27. sig_range = delta_indices.each_cons(2)
  28. .map { |head, tail| [head + 1, tail - 1] }
  29. .find { |head, tail| tail > head + 4 }
  30. next if sig_range.nil?
  31. # Take up to 10 lines from this "gap" (i.e., the common substring)
  32. match_content = diff_lines[sig_range.first..sig_range.last]
  33. .map { |l| l.sub(%r{^.}, '') }
  34. .first(10).join("\n")
  35. # Invalid html signature detection for exchange warning boxes #3571
  36. next if match_content.include?('CAUTION:')
  37. # Add this substring to the signature_candidates hash and increment its match score
  38. signature_candidates[match_content] += 1
  39. end
  40. signature_candidates.max_by { |_, score| score }&.first
  41. end
  42. =begin
  43. this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
  44. signature_line = SignatureDetection.find_signature_line(signature, message, content_type)
  45. returns
  46. signature_line = 123
  47. or
  48. signature_line = nil
  49. =end
  50. def self.find_signature_line(signature, string, content_type)
  51. string = string.html2text(true) if content_type.match?(%r{text/html}i)
  52. # try to find the char position of the signature
  53. search_position = string.index(signature)
  54. # count new lines up to signature
  55. string[0..search_position].split("\n").length + 1 if search_position.present?
  56. end
  57. =begin
  58. find signature line of message by user and article
  59. signature_line = SignatureDetection.find_signature_line_by_article(user, article)
  60. returns
  61. signature_line = 123
  62. or
  63. signature_line = nil
  64. =end
  65. def self.find_signature_line_by_article(user, article)
  66. return if !user.preferences[:signature_detection]
  67. SignatureDetection.find_signature_line(
  68. user.preferences[:signature_detection],
  69. article.body,
  70. article.content_type,
  71. )
  72. end
  73. =begin
  74. this function will search for a signature string in all articles of a given user_id
  75. signature = SignatureDetection.by_user_id(user_id)
  76. returns
  77. signature = '...signature possible match...'
  78. =end
  79. def self.by_user_id(user_id)
  80. type = Ticket::Article::Type.lookup(name: 'email')
  81. sender = Ticket::Article::Sender.lookup(name: 'Customer')
  82. tickets = Ticket.where(
  83. created_by_id: user_id,
  84. create_article_type_id: type.id,
  85. create_article_sender_id: sender.id
  86. ).limit(5).reorder(id: :desc)
  87. article_bodies = []
  88. tickets.each do |ticket|
  89. article = ticket.articles.first
  90. next if !article
  91. data = {
  92. content: article.body,
  93. content_type: article.content_type,
  94. }
  95. article_bodies.push data
  96. end
  97. find_signature(article_bodies)
  98. end
  99. =begin
  100. rebuild signature for each user
  101. SignatureDetection.rebuild_all_user
  102. returns
  103. true/false
  104. =end
  105. def self.rebuild_all_user
  106. User.select('id').where(active: true).reorder(id: :desc).each do |local_user|
  107. rebuild_user(local_user.id)
  108. end
  109. true
  110. end
  111. =begin
  112. rebuild signature detection for user
  113. SignatureDetection.rebuild_user(user_id)
  114. returns
  115. true/false
  116. =end
  117. def self.rebuild_user(user_id)
  118. signature_detection = by_user_id(user_id)
  119. return if !signature_detection
  120. user = User.find(user_id)
  121. return if user.preferences[:signature_detection] == signature_detection
  122. user.preferences[:signature_detection] = signature_detection
  123. user.save
  124. true
  125. end
  126. =begin
  127. rebuild signature for all articles
  128. SignatureDetection.rebuild_all_articles
  129. returns
  130. true/false
  131. =end
  132. def self.rebuild_all_articles
  133. article_type = Ticket::Article::Type.lookup(name: 'email')
  134. Ticket::Article.where(type_id: article_type.id)
  135. .reorder(id: :desc)
  136. .find_each(batch_size: 10) do |article|
  137. user = User.lookup(id: article.created_by_id)
  138. next if !user.preferences[:signature_detection]
  139. signature_line = find_signature_line(
  140. user.preferences[:signature_detection],
  141. article.body,
  142. article.content_type,
  143. )
  144. next if !signature_line
  145. article.preferences[:signature_detection] = signature_line
  146. article.save if article.changed?
  147. end
  148. true
  149. end
  150. end