search_index_backend.rb 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. # Copyright (C) 2012-2016 Zammad Foundation, http://zammad-foundation.org/
  2. class SearchIndexBackend
  3. =begin
  4. info about used search index machine
  5. SearchIndexBackend.info
  6. =end
  7. def self.info
  8. url = Setting.get('es_url').to_s
  9. return if url.blank?
  10. Rails.logger.info "# curl -X GET \"#{url}\""
  11. response = UserAgent.get(
  12. url,
  13. {},
  14. {
  15. json: true,
  16. open_timeout: 8,
  17. read_timeout: 12,
  18. user: Setting.get('es_user'),
  19. password: Setting.get('es_password'),
  20. }
  21. )
  22. Rails.logger.info "# #{response.code}"
  23. if response.success?
  24. installed_version = response.data.dig('version', 'number')
  25. raise "Unable to get elasticsearch version from response: #{response.inspect}" if installed_version.blank?
  26. version_supported = Gem::Version.new(installed_version) < Gem::Version.new('5.7')
  27. raise "Version #{installed_version} of configured elasticsearch is not supported" if !version_supported
  28. return response.data
  29. end
  30. raise humanized_error(
  31. verb: 'GET',
  32. url: url,
  33. response: response,
  34. )
  35. end
  36. =begin
  37. update processors
  38. SearchIndexBackend.processors(
  39. _ingest/pipeline/attachment: {
  40. description: 'Extract attachment information from arrays',
  41. processors: [
  42. {
  43. foreach: {
  44. field: 'ticket.articles.attachments',
  45. processor: {
  46. attachment: {
  47. target_field: '_ingest._value.attachment',
  48. field: '_ingest._value.data'
  49. }
  50. }
  51. }
  52. }
  53. ]
  54. }
  55. )
  56. =end
  57. def self.processors(data)
  58. data.each do |key, items|
  59. url = "#{Setting.get('es_url')}/#{key}"
  60. items.each do |item|
  61. if item[:action] == 'delete'
  62. Rails.logger.info "# curl -X DELETE \"#{url}\""
  63. response = UserAgent.delete(
  64. url,
  65. {
  66. json: true,
  67. open_timeout: 8,
  68. read_timeout: 12,
  69. user: Setting.get('es_user'),
  70. password: Setting.get('es_password'),
  71. }
  72. )
  73. Rails.logger.info "# #{response.code}"
  74. next if response.success?
  75. next if response.code.to_s == '404'
  76. raise humanized_error(
  77. verb: 'DELETE',
  78. url: url,
  79. response: response,
  80. )
  81. end
  82. Rails.logger.info "# curl -X PUT \"#{url}\" \\"
  83. Rails.logger.debug { "-d '#{data.to_json}'" }
  84. item.delete(:action)
  85. response = UserAgent.put(
  86. url,
  87. item,
  88. {
  89. json: true,
  90. open_timeout: 8,
  91. read_timeout: 12,
  92. user: Setting.get('es_user'),
  93. password: Setting.get('es_password'),
  94. }
  95. )
  96. Rails.logger.info "# #{response.code}"
  97. next if response.success?
  98. raise humanized_error(
  99. verb: 'PUT',
  100. url: url,
  101. payload: item,
  102. response: response,
  103. )
  104. end
  105. end
  106. true
  107. end
  108. =begin
  109. create/update/delete index
  110. SearchIndexBackend.index(
  111. :action => 'create', # create/update/delete
  112. :data => {
  113. :mappings => {
  114. :Ticket => {
  115. :properties => {
  116. :articles => {
  117. :type => 'nested',
  118. :properties => {
  119. 'attachment' => { :type => 'attachment' }
  120. }
  121. }
  122. }
  123. }
  124. }
  125. }
  126. )
  127. SearchIndexBackend.index(
  128. :action => 'delete', # create/update/delete
  129. :name => 'Ticket', # optional
  130. )
  131. SearchIndexBackend.index(
  132. :action => 'delete', # create/update/delete
  133. )
  134. =end
  135. def self.index(data)
  136. url = build_url(data[:name])
  137. return if url.blank?
  138. if data[:action] && data[:action] == 'delete'
  139. return SearchIndexBackend.remove(data[:name])
  140. end
  141. Rails.logger.info "# curl -X PUT \"#{url}\" \\"
  142. Rails.logger.debug { "-d '#{data[:data].to_json}'" }
  143. # note that we use a high read timeout here because
  144. # otherwise the request will be retried (underhand)
  145. # which leads to an "index_already_exists_exception"
  146. # HTTP 400 status error
  147. # see: https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts/issues/8
  148. # Improving the Elasticsearch config is probably the proper solution
  149. response = UserAgent.put(
  150. url,
  151. data[:data],
  152. {
  153. json: true,
  154. open_timeout: 8,
  155. read_timeout: 30,
  156. user: Setting.get('es_user'),
  157. password: Setting.get('es_password'),
  158. }
  159. )
  160. Rails.logger.info "# #{response.code}"
  161. return true if response.success?
  162. raise humanized_error(
  163. verb: 'PUT',
  164. url: url,
  165. payload: data[:data],
  166. response: response,
  167. )
  168. end
  169. =begin
  170. add new object to search index
  171. SearchIndexBackend.add('Ticket', some_data_object)
  172. =end
  173. def self.add(type, data)
  174. url = build_url(type, data['id'])
  175. return if url.blank?
  176. Rails.logger.info "# curl -X POST \"#{url}\" \\"
  177. Rails.logger.debug { "-d '#{data.to_json}'" }
  178. response = UserAgent.post(
  179. url,
  180. data,
  181. {
  182. json: true,
  183. open_timeout: 8,
  184. read_timeout: 16,
  185. user: Setting.get('es_user'),
  186. password: Setting.get('es_password'),
  187. }
  188. )
  189. Rails.logger.info "# #{response.code}"
  190. return true if response.success?
  191. raise humanized_error(
  192. verb: 'POST',
  193. url: url,
  194. payload: data,
  195. response: response,
  196. )
  197. end
  198. =begin
  199. remove whole data from index
  200. SearchIndexBackend.remove('Ticket', 123)
  201. SearchIndexBackend.remove('Ticket')
  202. =end
  203. def self.remove(type, o_id = nil)
  204. url = build_url(type, o_id)
  205. return if url.blank?
  206. Rails.logger.info "# curl -X DELETE \"#{url}\""
  207. response = UserAgent.delete(
  208. url,
  209. {
  210. open_timeout: 8,
  211. read_timeout: 16,
  212. user: Setting.get('es_user'),
  213. password: Setting.get('es_password'),
  214. }
  215. )
  216. Rails.logger.info "# #{response.code}"
  217. return true if response.success?
  218. return true if response.code.to_s == '400'
  219. humanized_error = humanized_error(
  220. verb: 'DELETE',
  221. url: url,
  222. response: response,
  223. )
  224. Rails.logger.info "NOTICE: can't delete index: #{humanized_error}"
  225. false
  226. end
  227. =begin
  228. @param query [String] search query
  229. @param index [String, Array<String>, nil] indexes to search in (see search_by_index)
  230. @param options [Hash] search options (see build_query)
  231. @return search result
  232. @example Sample queries
  233. result = SearchIndexBackend.search('search query', ['User', 'Organization'], limit: limit)
  234. result = SearchIndexBackend.search('search query', 'User', limit: limit)
  235. result = SearchIndexBackend.search('search query', 'User', limit: limit, sort_by: ['updated_at'], order_by: ['desc'])
  236. result = [
  237. {
  238. :id => 123,
  239. :type => 'User',
  240. },
  241. {
  242. :id => 125,
  243. :type => 'User',
  244. },
  245. {
  246. :id => 15,
  247. :type => 'Organization',
  248. }
  249. ]
  250. =end
  251. def self.search(query, index = nil, options = {})
  252. if !index.is_a? Array
  253. return search_by_index(query, index, options)
  254. end
  255. index
  256. .map { |local_index| search_by_index(query, local_index, options) }
  257. .compact
  258. .flatten(1)
  259. end
  260. =begin
  261. @param query [String] search query
  262. @param index [String, Array<String>, nil] index name or list of index names. If index is nil or not present will, search will be performed globally
  263. @param options [Hash] search options (see build_query)
  264. @return search result
  265. =end
  266. def self.search_by_index(query, index = nil, options = {})
  267. return [] if query.blank?
  268. url = build_url
  269. return if url.blank?
  270. url += if index
  271. if index.is_a?(Array)
  272. "/#{index.join(',')}/_search"
  273. else
  274. "/#{index}/_search"
  275. end
  276. else
  277. '/_search'
  278. end
  279. # real search condition
  280. condition = {
  281. 'query_string' => {
  282. 'query' => append_wildcard_to_simple_query(query),
  283. 'default_operator' => 'AND',
  284. 'analyze_wildcard' => true,
  285. }
  286. }
  287. query_data = build_query(condition, options)
  288. Rails.logger.info "# curl -X POST \"#{url}\" \\"
  289. Rails.logger.debug { " -d'#{query_data.to_json}'" }
  290. response = UserAgent.get(
  291. url,
  292. query_data,
  293. {
  294. json: true,
  295. open_timeout: 5,
  296. read_timeout: 14,
  297. user: Setting.get('es_user'),
  298. password: Setting.get('es_password'),
  299. }
  300. )
  301. Rails.logger.info "# #{response.code}"
  302. if !response.success?
  303. Rails.logger.error humanized_error(
  304. verb: 'GET',
  305. url: url,
  306. payload: query_data,
  307. response: response,
  308. )
  309. return []
  310. end
  311. data = response.data&.dig('hits', 'hits')
  312. return [] if !data
  313. data.map do |item|
  314. Rails.logger.info "... #{item['_type']} #{item['_id']}"
  315. {
  316. id: item['_id'],
  317. type: item['_type'],
  318. }
  319. end
  320. end
  321. def self.search_by_index_sort(sort_by = nil, order_by = nil)
  322. result = []
  323. sort_by&.each_with_index do |value, index|
  324. next if value.blank?
  325. next if order_by&.at(index).blank?
  326. # for sorting values use .raw values (no analyzer is used - plain values)
  327. if value !~ /\./ && value !~ /_(time|date|till|id|ids|at)$/
  328. value += '.raw'
  329. end
  330. result.push(
  331. value => {
  332. order: order_by[index],
  333. },
  334. )
  335. end
  336. if result.blank?
  337. result.push(
  338. updated_at: {
  339. order: 'desc',
  340. },
  341. )
  342. end
  343. # add sorting by active if active is not part of the query
  344. if result.flat_map(&:keys).exclude?(:active)
  345. result.unshift(
  346. active: {
  347. order: 'desc',
  348. },
  349. )
  350. end
  351. result.push('_score')
  352. result
  353. end
  354. =begin
  355. get count of tickets and tickets which match on selector
  356. result = SearchIndexBackend.selectors(index, selector)
  357. example with a simple search:
  358. result = SearchIndexBackend.selectors('Ticket', { category: { operator: 'is', value: 'aa::ab' } })
  359. result = [
  360. { id: 1, type: 'Ticket' },
  361. { id: 2, type: 'Ticket' },
  362. { id: 3, type: 'Ticket' },
  363. ]
  364. you also can get aggregations
  365. result = SearchIndexBackend.selectors(index, selector, options, aggs_interval)
  366. example for aggregations within one year
  367. aggs_interval = {
  368. from: '2015-01-01',
  369. to: '2015-12-31',
  370. interval: 'month', # year, quarter, month, week, day, hour, minute, second
  371. field: 'created_at',
  372. }
  373. options = {
  374. limit: 123,
  375. current_user: User.find(123),
  376. }
  377. result = SearchIndexBackend.selectors('Ticket', { category: { operator: 'is', value: 'aa::ab' } }, options, aggs_interval)
  378. result = {
  379. hits:{
  380. total:4819,
  381. },
  382. aggregations:{
  383. time_buckets:{
  384. buckets:[
  385. {
  386. key_as_string:"2014-10-01T00:00:00.000Z",
  387. key:1412121600000,
  388. doc_count:420
  389. },
  390. {
  391. key_as_string:"2014-11-01T00:00:00.000Z",
  392. key:1414800000000,
  393. doc_count:561
  394. },
  395. ...
  396. ]
  397. }
  398. }
  399. }
  400. =end
  401. def self.selectors(index = nil, selectors = nil, options = {}, aggs_interval = nil)
  402. raise 'no selectors given' if !selectors
  403. url = build_url
  404. return if url.blank?
  405. url += if index
  406. if index.is_a?(Array)
  407. "/#{index.join(',')}/_search"
  408. else
  409. "/#{index}/_search"
  410. end
  411. else
  412. '/_search'
  413. end
  414. data = selector2query(selectors, options, aggs_interval)
  415. Rails.logger.info "# curl -X POST \"#{url}\" \\"
  416. Rails.logger.debug { " -d'#{data.to_json}'" }
  417. response = UserAgent.get(
  418. url,
  419. data,
  420. {
  421. json: true,
  422. open_timeout: 5,
  423. read_timeout: 14,
  424. user: Setting.get('es_user'),
  425. password: Setting.get('es_password'),
  426. }
  427. )
  428. Rails.logger.info "# #{response.code}"
  429. if !response.success?
  430. raise humanized_error(
  431. verb: 'GET',
  432. url: url,
  433. payload: data,
  434. response: response,
  435. )
  436. end
  437. Rails.logger.debug { response.data.to_json }
  438. if aggs_interval.blank? || aggs_interval[:interval].blank?
  439. ticket_ids = []
  440. response.data['hits']['hits'].each do |item|
  441. ticket_ids.push item['_id']
  442. end
  443. return {
  444. count: response.data['hits']['total'],
  445. ticket_ids: ticket_ids,
  446. }
  447. end
  448. response.data
  449. end
  450. DEFAULT_SELECTOR_OPTIONS = {
  451. limit: 10
  452. }.freeze
  453. def self.selector2query(selector, options, aggs_interval)
  454. options = DEFAULT_QUERY_OPTIONS.merge(options.deep_symbolize_keys)
  455. query_must = []
  456. query_must_not = []
  457. relative_map = {
  458. day: 'd',
  459. year: 'y',
  460. month: 'M',
  461. hour: 'h',
  462. minute: 'm',
  463. }
  464. if selector.present?
  465. selector.each do |key, data|
  466. key_tmp = key.sub(/^.+?\./, '')
  467. t = {}
  468. # use .raw in cases where query contains ::
  469. if data['value'].is_a?(Array)
  470. data['value'].each do |value|
  471. if value.is_a?(String) && value =~ /::/
  472. key_tmp += '.raw'
  473. break
  474. end
  475. end
  476. elsif data['value'].is_a?(String)
  477. if /::/.match?(data['value'])
  478. key_tmp += '.raw'
  479. end
  480. end
  481. # is/is not/contains/contains not
  482. if data['operator'] == 'is' || data['operator'] == 'is not' || data['operator'] == 'contains' || data['operator'] == 'contains not'
  483. if data['value'].is_a?(Array)
  484. t[:terms] = {}
  485. t[:terms][key_tmp] = data['value']
  486. else
  487. t[:term] = {}
  488. t[:term][key_tmp] = data['value']
  489. end
  490. if data['operator'] == 'is' || data['operator'] == 'contains'
  491. query_must.push t
  492. elsif data['operator'] == 'is not' || data['operator'] == 'contains not'
  493. query_must_not.push t
  494. end
  495. elsif data['operator'] == 'contains all' || data['operator'] == 'contains one' || data['operator'] == 'contains all not' || data['operator'] == 'contains one not'
  496. values = data['value'].split(',').map(&:strip)
  497. t[:query_string] = {}
  498. if data['operator'] == 'contains all'
  499. t[:query_string][:query] = "#{key_tmp}:\"#{values.join('" AND "')}\""
  500. query_must.push t
  501. elsif data['operator'] == 'contains one not'
  502. t[:query_string][:query] = "#{key_tmp}:\"#{values.join('" OR "')}\""
  503. query_must_not.push t
  504. elsif data['operator'] == 'contains one'
  505. t[:query_string][:query] = "#{key_tmp}:\"#{values.join('" OR "')}\""
  506. query_must.push t
  507. elsif data['operator'] == 'contains all not'
  508. t[:query_string][:query] = "#{key_tmp}:\"#{values.join('" AND "')}\""
  509. query_must_not.push t
  510. end
  511. # within last/within next (relative)
  512. elsif data['operator'] == 'within last (relative)' || data['operator'] == 'within next (relative)'
  513. range = relative_map[data['range'].to_sym]
  514. if range.blank?
  515. raise "Invalid relative_map for range '#{data['range']}'."
  516. end
  517. t[:range] = {}
  518. t[:range][key_tmp] = {}
  519. if data['operator'] == 'within last (relative)'
  520. t[:range][key_tmp][:gte] = "now-#{data['value']}#{range}"
  521. else
  522. t[:range][key_tmp][:lt] = "now+#{data['value']}#{range}"
  523. end
  524. query_must.push t
  525. # before/after (relative)
  526. elsif data['operator'] == 'before (relative)' || data['operator'] == 'after (relative)'
  527. range = relative_map[data['range'].to_sym]
  528. if range.blank?
  529. raise "Invalid relative_map for range '#{data['range']}'."
  530. end
  531. t[:range] = {}
  532. t[:range][key_tmp] = {}
  533. if data['operator'] == 'before (relative)'
  534. t[:range][key_tmp][:lt] = "now-#{data['value']}#{range}"
  535. else
  536. t[:range][key_tmp][:gt] = "now+#{data['value']}#{range}"
  537. end
  538. query_must.push t
  539. # before/after (absolute)
  540. elsif data['operator'] == 'before (absolute)' || data['operator'] == 'after (absolute)'
  541. t[:range] = {}
  542. t[:range][key_tmp] = {}
  543. if data['operator'] == 'before (absolute)'
  544. t[:range][key_tmp][:lt] = (data['value']).to_s
  545. else
  546. t[:range][key_tmp][:gt] = (data['value']).to_s
  547. end
  548. query_must.push t
  549. else
  550. raise "unknown operator '#{data['operator']}' for #{key}"
  551. end
  552. end
  553. end
  554. data = {
  555. query: {},
  556. size: options[:limit],
  557. }
  558. # add aggs to filter
  559. if aggs_interval.present?
  560. if aggs_interval[:interval].present?
  561. data[:size] = 0
  562. data[:aggs] = {
  563. time_buckets: {
  564. date_histogram: {
  565. field: aggs_interval[:field],
  566. interval: aggs_interval[:interval],
  567. }
  568. }
  569. }
  570. if aggs_interval[:timezone].present?
  571. data[:aggs][:time_buckets][:date_histogram][:time_zone] = aggs_interval[:timezone]
  572. end
  573. end
  574. r = {}
  575. r[:range] = {}
  576. r[:range][aggs_interval[:field]] = {
  577. from: aggs_interval[:from],
  578. to: aggs_interval[:to],
  579. }
  580. query_must.push r
  581. end
  582. data[:query][:bool] ||= {}
  583. if query_must.present?
  584. data[:query][:bool][:must] = query_must
  585. end
  586. if query_must_not.present?
  587. data[:query][:bool][:must_not] = query_must_not
  588. end
  589. # add sort
  590. if aggs_interval.present? && aggs_interval[:field].present? && aggs_interval[:interval].blank?
  591. sort = []
  592. sort[0] = {}
  593. sort[0][aggs_interval[:field]] = {
  594. order: 'desc'
  595. }
  596. sort[1] = '_score'
  597. data['sort'] = sort
  598. end
  599. data
  600. end
  601. =begin
  602. return true if backend is configured
  603. result = SearchIndexBackend.enabled?
  604. =end
  605. def self.enabled?
  606. return false if Setting.get('es_url').blank?
  607. true
  608. end
  609. def self.build_url(type = nil, o_id = nil)
  610. return if !SearchIndexBackend.enabled?
  611. index = "#{Setting.get('es_index')}_#{Rails.env}"
  612. url = Setting.get('es_url')
  613. url = if type
  614. url_pipline = Setting.get('es_pipeline')
  615. if url_pipline.present?
  616. url_pipline = "?pipeline=#{url_pipline}"
  617. end
  618. if o_id
  619. "#{url}/#{index}/#{type}/#{o_id}#{url_pipline}"
  620. else
  621. "#{url}/#{index}/#{type}#{url_pipline}"
  622. end
  623. else
  624. "#{url}/#{index}"
  625. end
  626. url
  627. end
  628. def self.humanized_error(verb:, url:, payload: nil, response:)
  629. prefix = "Unable to process #{verb} request to elasticsearch URL '#{url}'."
  630. suffix = "\n\nResponse:\n#{response.inspect}\n\nPayload:\n#{payload.inspect}"
  631. if payload.respond_to?(:to_json)
  632. suffix += "\n\nPayload size: #{payload.to_json.bytesize / 1024 / 1024}M"
  633. end
  634. message = if response&.error&.match?('Connection refused')
  635. "Elasticsearch is not reachable, probably because it's not running or even installed."
  636. elsif url.end_with?('pipeline/zammad-attachment', 'pipeline=zammad-attachment') && response.code == 400
  637. 'The installed attachment plugin could not handle the request payload. Ensure that the correct attachment plugin is installed (5.6 => ingest-attachment, 2.4 - 5.5 => mapper-attachments).'
  638. else
  639. 'Check the response and payload for detailed information: '
  640. end
  641. result = "#{prefix} #{message}#{suffix}"
  642. Rails.logger.error result.first(40_000)
  643. result
  644. end
  645. # add * on simple query like "somephrase23"
  646. def self.append_wildcard_to_simple_query(query)
  647. query.strip!
  648. query += '*' if !query.match?(/:/)
  649. query
  650. end
  651. =begin
  652. @param condition [Hash] search condition
  653. @param options [Hash] search options
  654. @option options [Integer] :from
  655. @option options [Integer] :limit
  656. @option options [Hash] :query_extension applied to ElasticSearch query
  657. @option options [Array<String>] :order_by ordering directions, desc or asc
  658. @option options [Array<String>] :sort_by fields to sort by
  659. =end
  660. DEFAULT_QUERY_OPTIONS = {
  661. from: 0,
  662. limit: 10
  663. }.freeze
  664. def self.build_query(condition, options = {})
  665. options = DEFAULT_QUERY_OPTIONS.merge(options.deep_symbolize_keys)
  666. data = {
  667. from: options[:from],
  668. size: options[:limit],
  669. sort: search_by_index_sort(options[:sort_by], options[:order_by]),
  670. query: {
  671. bool: {
  672. must: []
  673. }
  674. }
  675. }
  676. if (extension = options.dig(:query_extension))
  677. data[:query].deep_merge! extension.deep_dup
  678. end
  679. data[:query][:bool][:must].push condition
  680. data
  681. end
  682. end