elasticsearch.conf 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. # 'red' is a threshold, can't lookup the 'red' dimension - using simple pattern is a workaround.
  3. template: elasticsearch_cluster_health_status_red
  4. on: elasticsearch.cluster_health_status
  5. class: Errors
  6. type: SearchEngine
  7. component: Elasticsearch
  8. lookup: average -5s unaligned of *ed
  9. every: 10s
  10. units: status
  11. crit: $this == 1
  12. delay: down 5m multiplier 1.5 max 1h
  13. summary: Elasticsearch cluster ${label:cluster_name} status
  14. info: Elasticsearch cluster ${label:cluster_name} health status is red.
  15. to: sysadmin
  16. # the idea of '-10m' is to handle yellow status after node restart,
  17. # (usually) no action is required because Elasticsearch will automatically restore the green status.
  18. template: elasticsearch_cluster_health_status_yellow
  19. on: elasticsearch.cluster_health_status
  20. class: Errors
  21. type: SearchEngine
  22. component: Elasticsearch
  23. lookup: average -10m unaligned of yellow
  24. every: 1m
  25. units: status
  26. warn: $this == 1
  27. delay: down 5m multiplier 1.5 max 1h
  28. summary: Elasticsearch cluster ${label:cluster_name} status
  29. info: Elasticsearch cluster ${label:cluster_name} health status is yellow.
  30. to: sysadmin
  31. template: elasticsearch_node_index_health_red
  32. on: elasticsearch.node_index_health
  33. class: Errors
  34. type: SearchEngine
  35. component: Elasticsearch
  36. lookup: average -5s unaligned of *ed
  37. every: 10s
  38. units: status
  39. warn: $this == 1
  40. delay: down 5m multiplier 1.5 max 1h
  41. summary: Elasticsearch cluster ${label:cluster_name} index ${label:index} status
  42. info: Elasticsearch cluster ${label:cluster_name} index ${label:index} health status is red.
  43. to: sysadmin
  44. # don't convert 'lookup' value to seconds in 'calc' due to UI showing seconds as hh:mm:ss (0 as now).
  45. template: elasticsearch_node_indices_search_time_query
  46. on: elasticsearch.node_indices_search_time
  47. class: Workload
  48. type: SearchEngine
  49. component: Elasticsearch
  50. lookup: average -10m unaligned of query
  51. every: 10s
  52. units: milliseconds
  53. warn: $this > (($status >= $WARNING) ? (20 * 1000) : (30 * 1000))
  54. delay: down 5m multiplier 1.5 max 1h
  55. summary: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} query performance
  56. info: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} search performance is degraded, queries run slowly.
  57. to: sysadmin
  58. template: elasticsearch_node_indices_search_time_fetch
  59. on: elasticsearch.node_indices_search_time
  60. class: Workload
  61. type: SearchEngine
  62. component: Elasticsearch
  63. lookup: average -10m unaligned of fetch
  64. every: 10s
  65. units: milliseconds
  66. warn: $this > (($status >= $WARNING) ? (3 * 1000) : (5 * 1000))
  67. crit: $this > (($status == $CRITICAL) ? (5 * 1000) : (30 * 1000))
  68. delay: down 5m multiplier 1.5 max 1h
  69. summary: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} fetch performance
  70. info: Elasticsearch cluster ${label:cluster_name} node ${label:node_name} search performance is degraded, fetches run slowly.
  71. to: sysadmin