riakkv.conf 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # Warn if a list keys operation is running.
  2. template: riakkv_list_keys_active
  3. on: riak.core.fsm_active
  4. class: Utilization
  5. type: Database
  6. component: Riak KV
  7. calc: $list_fsm_active
  8. units: state machines
  9. every: 10s
  10. warn: $list_fsm_active > 0
  11. summary: Riak KV active list keys
  12. info: Number of currently running list keys finite state machines
  13. to: dba
  14. ## Timing healthchecks
  15. # KV GET
  16. template: riakkv_1h_kv_get_mean_latency
  17. on: riak.kv.latency.get
  18. class: Latency
  19. type: Database
  20. component: Riak KV
  21. calc: $node_get_fsm_time_mean
  22. lookup: average -1h unaligned of time
  23. every: 30s
  24. units: ms
  25. info: average time between reception of client GET request and \
  26. subsequent response to client over the last hour
  27. template: riakkv_kv_get_slow
  28. on: riak.kv.latency.get
  29. class: Latency
  30. type: Database
  31. component: Riak KV
  32. calc: $mean
  33. lookup: average -3m unaligned of time
  34. units: ms
  35. every: 10s
  36. warn: ($this > ($riakkv_1h_kv_get_mean_latency * 2) )
  37. crit: ($this > ($riakkv_1h_kv_get_mean_latency * 3) )
  38. summary: Riak KV GET latency
  39. info: Average time between reception of client GET request and \
  40. subsequent response to the client over the last 3 minutes, \
  41. compared to the average over the last hour
  42. delay: down 5m multiplier 1.5 max 1h
  43. to: dba
  44. # KV PUT
  45. template: riakkv_1h_kv_put_mean_latency
  46. on: riak.kv.latency.put
  47. class: Latency
  48. type: Database
  49. component: Riak KV
  50. calc: $node_put_fsm_time_mean
  51. lookup: average -1h unaligned of time
  52. every: 30s
  53. units: ms
  54. summary: Riak KV PUT mean latency
  55. info: Average time between reception of client PUT request and \
  56. subsequent response to the client over the last hour
  57. template: riakkv_kv_put_slow
  58. on: riak.kv.latency.put
  59. class: Latency
  60. type: Database
  61. component: Riak KV
  62. calc: $mean
  63. lookup: average -3m unaligned of time
  64. units: ms
  65. every: 10s
  66. warn: ($this > ($riakkv_1h_kv_put_mean_latency * 2) )
  67. crit: ($this > ($riakkv_1h_kv_put_mean_latency * 3) )
  68. summary: Riak KV PUT latency
  69. info: Average time between reception of client PUT request and \
  70. subsequent response to the client over the last 3 minutes, \
  71. compared to the average over the last hour
  72. delay: down 5m multiplier 1.5 max 1h
  73. to: dba
  74. ## VM healthchecks
  75. # Default Erlang VM process limit: 262144
  76. # On systems observed, this is < 2000, but may grow depending on load.
  77. template: riakkv_vm_high_process_count
  78. on: riak.vm
  79. class: Utilization
  80. type: Database
  81. component: Riak KV
  82. calc: $sys_process_count
  83. units: processes
  84. every: 10s
  85. warn: $this > 10000
  86. crit: $this > 100000
  87. summary: Riak KV number of processes
  88. info: Number of processes running in the Erlang VM
  89. to: dba