hdfs.conf 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # Common
  2. template: hdfs_capacity_usage
  3. on: hdfs.capacity
  4. class: Utilization
  5. type: Storage
  6. component: HDFS
  7. calc: ($used) * 100 / ($used + $remaining)
  8. units: %
  9. every: 10s
  10. warn: $this > (($status >= $WARNING) ? (70) : (80))
  11. crit: $this > (($status == $CRITICAL) ? (80) : (98))
  12. delay: down 15m multiplier 1.5 max 1h
  13. info: summary datanodes space capacity utilization
  14. to: sysadmin
  15. # NameNode
  16. template: hdfs_missing_blocks
  17. on: hdfs.blocks
  18. class: Errors
  19. type: Storage
  20. component: HDFS
  21. calc: $missing
  22. units: missing blocks
  23. every: 10s
  24. warn: $this > 0
  25. delay: down 15m multiplier 1.5 max 1h
  26. info: number of missing blocks
  27. to: sysadmin
  28. template: hdfs_stale_nodes
  29. on: hdfs.data_nodes
  30. class: Errors
  31. type: Storage
  32. component: HDFS
  33. calc: $stale
  34. units: dead nodes
  35. every: 10s
  36. warn: $this > 0
  37. delay: down 15m multiplier 1.5 max 1h
  38. info: number of datanodes marked stale due to delayed heartbeat
  39. to: sysadmin
  40. template: hdfs_dead_nodes
  41. on: hdfs.data_nodes
  42. class: Errors
  43. type: Storage
  44. component: HDFS
  45. calc: $dead
  46. units: dead nodes
  47. every: 10s
  48. crit: $this > 0
  49. delay: down 15m multiplier 1.5 max 1h
  50. info: number of datanodes which are currently dead
  51. to: sysadmin
  52. # DataNode
  53. template: hdfs_num_failed_volumes
  54. on: hdfs.num_failed_volumes
  55. class: Errors
  56. type: Storage
  57. component: HDFS
  58. calc: $fsds_num_failed_volumes
  59. units: failed volumes
  60. every: 10s
  61. warn: $this > 0
  62. delay: down 15m multiplier 1.5 max 1h
  63. info: number of failed volumes
  64. to: sysadmin