dbengine.conf 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. alarm: 10min_dbengine_global_fs_errors
  3. on: netdata.dbengine_global_errors
  4. class: Errors
  5. type: Netdata
  6. component: DB engine
  7. os: linux freebsd macos
  8. hosts: *
  9. lookup: sum -10m unaligned of fs_errors
  10. units: errors
  11. every: 10s
  12. crit: $this > 0
  13. delay: down 15m multiplier 1.5 max 1h
  14. summary: Netdata DBengine filesystem errors
  15. info: Number of filesystem errors in the last 10 minutes (too many open files, wrong permissions, etc)
  16. to: sysadmin
  17. alarm: 10min_dbengine_global_io_errors
  18. on: netdata.dbengine_global_errors
  19. class: Errors
  20. type: Netdata
  21. component: DB engine
  22. os: linux freebsd macos
  23. hosts: *
  24. lookup: sum -10m unaligned of io_errors
  25. units: errors
  26. every: 10s
  27. crit: $this > 0
  28. delay: down 1h multiplier 1.5 max 3h
  29. summary: Netdata DBengine IO errors
  30. info: Number of IO errors in the last 10 minutes (CRC errors, out of space, bad disk, etc)
  31. to: sysadmin
  32. alarm: 10min_dbengine_global_flushing_warnings
  33. on: netdata.dbengine_global_errors
  34. class: Errors
  35. type: Netdata
  36. component: DB engine
  37. os: linux freebsd macos
  38. hosts: *
  39. lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
  40. units: errors
  41. every: 10s
  42. warn: $this > 0
  43. delay: down 1h multiplier 1.5 max 3h
  44. summary: Netdata DBengine global flushing warnings
  45. info: number of times when dbengine dirty pages were over 50% of the instance's page cache in the last 10 minutes. \
  46. Metric data are at risk of not being stored in the database. To remedy, reduce disk load or use faster disks.
  47. to: sysadmin
  48. alarm: 10min_dbengine_global_flushing_errors
  49. on: netdata.dbengine_long_term_page_stats
  50. class: Errors
  51. type: Netdata
  52. component: DB engine
  53. os: linux freebsd macos
  54. hosts: *
  55. lookup: sum -10m unaligned of flushing_pressure_deletions
  56. units: pages
  57. every: 10s
  58. crit: $this != 0
  59. delay: down 1h multiplier 1.5 max 3h
  60. summary: Netdata DBengine global flushing errors
  61. info: Number of pages deleted due to failure to flush data to disk in the last 10 minutes. \
  62. Metric data were lost to unblock data collection. To fix, reduce disk load or use faster disks.
  63. to: sysadmin