memory.conf 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. alarm: 1hour_ecc_memory_correctable
  3. on: mem.ecc_ce
  4. class: Errors
  5. type: System
  6. component: Memory
  7. os: linux
  8. hosts: *
  9. lookup: sum -10m unaligned
  10. units: errors
  11. every: 1m
  12. warn: $this > 0
  13. delay: down 1h multiplier 1.5 max 1h
  14. info: number of ECC correctable errors in the last 10 minutes
  15. to: sysadmin
  16. alarm: 1hour_ecc_memory_uncorrectable
  17. on: mem.ecc_ue
  18. class: Errors
  19. type: System
  20. component: Memory
  21. os: linux
  22. hosts: *
  23. lookup: sum -10m unaligned
  24. units: errors
  25. every: 1m
  26. crit: $this > 0
  27. delay: down 1h multiplier 1.5 max 1h
  28. info: number of ECC uncorrectable errors in the last 10 minutes
  29. to: sysadmin
  30. alarm: 1hour_memory_hw_corrupted
  31. on: mem.hwcorrupt
  32. class: Errors
  33. type: System
  34. component: Memory
  35. os: linux
  36. hosts: *
  37. calc: $HardwareCorrupted
  38. units: MB
  39. every: 10s
  40. warn: $this > 0
  41. delay: down 1h multiplier 1.5 max 1h
  42. info: amount of memory corrupted due to a hardware failure
  43. to: sysadmin