riakkv.conf 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. # Warn if a list keys operation is running.
  2. template: riakkv_list_keys_active
  3. on: riak.core.fsm_active
  4. class: Utilization
  5. type: Database
  6. component: Riak KV
  7. calc: $list_fsm_active
  8. units: state machines
  9. every: 10s
  10. warn: $list_fsm_active > 0
  11. info: number of currently running list keys finite state machines
  12. to: dba
  13. ## Timing healthchecks
  14. # KV GET
  15. template: riakkv_1h_kv_get_mean_latency
  16. on: riak.kv.latency.get
  17. class: Latency
  18. type: Database
  19. component: Riak KV
  20. calc: $node_get_fsm_time_mean
  21. lookup: average -1h unaligned of time
  22. every: 30s
  23. units: ms
  24. info: average time between reception of client GET request and \
  25. subsequent response to client over the last hour
  26. template: riakkv_kv_get_slow
  27. on: riak.kv.latency.get
  28. class: Latency
  29. type: Database
  30. component: Riak KV
  31. calc: $mean
  32. lookup: average -3m unaligned of time
  33. units: ms
  34. every: 10s
  35. warn: ($this > ($riakkv_1h_kv_get_mean_latency * 2) )
  36. crit: ($this > ($riakkv_1h_kv_get_mean_latency * 3) )
  37. info: average time between reception of client GET request and \
  38. subsequent response to the client over the last 3 minutes, \
  39. compared to the average over the last hour
  40. delay: down 5m multiplier 1.5 max 1h
  41. to: dba
  42. # KV PUT
  43. template: riakkv_1h_kv_put_mean_latency
  44. on: riak.kv.latency.put
  45. class: Latency
  46. type: Database
  47. component: Riak KV
  48. calc: $node_put_fsm_time_mean
  49. lookup: average -1h unaligned of time
  50. every: 30s
  51. units: ms
  52. info: average time between reception of client PUT request and \
  53. subsequent response to the client over the last hour
  54. template: riakkv_kv_put_slow
  55. on: riak.kv.latency.put
  56. class: Latency
  57. type: Database
  58. component: Riak KV
  59. calc: $mean
  60. lookup: average -3m unaligned of time
  61. units: ms
  62. every: 10s
  63. warn: ($this > ($riakkv_1h_kv_put_mean_latency * 2) )
  64. crit: ($this > ($riakkv_1h_kv_put_mean_latency * 3) )
  65. info: average time between reception of client PUT request and \
  66. subsequent response to the client over the last 3 minutes, \
  67. compared to the average over the last hour
  68. delay: down 5m multiplier 1.5 max 1h
  69. to: dba
  70. ## VM healthchecks
  71. # Default Erlang VM process limit: 262144
  72. # On systems observed, this is < 2000, but may grow depending on load.
  73. template: riakkv_vm_high_process_count
  74. on: riak.vm
  75. class: Utilization
  76. type: Database
  77. component: Riak KV
  78. calc: $sys_process_count
  79. units: processes
  80. every: 10s
  81. warn: $this > 10000
  82. crit: $this > 100000
  83. info: number of processes running in the Erlang VM
  84. to: dba