metadata.yaml 171 KB


  1. plugin_name: proc.plugin
  2. modules:
  3. - meta:
  4. plugin_name: proc.plugin
  5. module_name: /proc/stat
  6. monitored_instance:
  7. name: System statistics
  8. link: ""
  9. categories:
  10. - data-collection.linux-systems.system-metrics
  11. icon_filename: "linuxserver.svg"
  12. related_resources:
  13. integrations:
  14. list: []
  15. info_provided_to_referring_integrations:
  16. description: ""
  17. keywords:
  18. - cpu utilization
  19. - process counts
  20. most_popular: false
  21. overview:
  22. data_collection:
  23. metrics_description: |
  24. CPU utilization, states and frequencies and key Linux system performance metrics.
  25. The `/proc/stat` file provides various types of system statistics:
  26. - The overall system CPU usage statistics
  27. - Per CPU core statistics
  28. - The total context switching of the system
  29. - The total number of processes running
  30. - The total CPU interrupts
  31. - The total CPU softirqs
  32. The collector also reads:
  33. - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
  34. - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
  35. - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
  36. - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
  37. - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
  38. - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
  39. - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
  40. method_description: ""
  41. supported_platforms:
  42. include: ["linux"]
  43. exclude: []
  44. multi_instance: false
  45. additional_permissions:
  46. description: ""
  47. default_behavior:
  48. auto_detection:
  49. description: |
  50. The collector auto-detects all metrics. No configuration is needed.
  51. limits:
  52. description: ""
  53. performance_impact:
  54. description: |
  55. The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
  56. setup:
  57. prerequisites:
  58. list: []
  59. configuration:
  60. file:
  61. section_name: "plugin:proc:/proc/stat"
  62. name: "netdata.conf"
  63. description: ""
  64. options:
  65. description: ""
  66. folding:
  67. title: ""
  68. enabled: true
  69. list: []
  70. examples:
  71. folding:
  72. enabled: true
  73. title: ""
  74. list: []
  75. troubleshooting:
  76. problems:
  77. list: []
  78. alerts:
  79. - name: 10min_cpu_usage
  80. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  81. metric: system.cpu
  82. info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
  83. os: "linux"
  84. - name: 10min_cpu_iowait
  85. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  86. metric: system.cpu
  87. info: average CPU iowait time over the last 10 minutes
  88. os: "linux"
  89. - name: 20min_steal_cpu
  90. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  91. metric: system.cpu
  92. info: average CPU steal time over the last 20 minutes
  93. os: "linux"
  94. metrics:
  95. folding:
  96. title: Metrics
  97. enabled: false
  98. description: ""
  99. availability: []
  100. scopes:
  101. - name: global
  102. description: ""
  103. labels: []
  104. metrics:
  105. - name: system.cpu
  106. description: Total CPU utilization
  107. unit: "percentage"
  108. chart_type: stacked
  109. dimensions:
  110. - name: guest_nice
  111. - name: guest
  112. - name: steal
  113. - name: softirq
  114. - name: irq
  115. - name: user
  116. - name: system
  117. - name: nice
  118. - name: iowait
  119. - name: idle
  120. - name: system.intr
  121. description: CPU Interrupts
  122. unit: "interrupts/s"
  123. chart_type: line
  124. dimensions:
  125. - name: interrupts
  126. - name: system.ctxt
  127. description: CPU Context Switches
  128. unit: "context switches/s"
  129. chart_type: line
  130. dimensions:
  131. - name: switches
  132. - name: system.forks
  133. description: Started Processes
  134. unit: "processes/s"
  135. chart_type: line
  136. dimensions:
  137. - name: started
  138. - name: system.processes
  139. description: System Processes
  140. unit: "processes"
  141. chart_type: line
  142. dimensions:
  143. - name: running
  144. - name: blocked
  145. - name: cpu.core_throttling
  146. description: Core Thermal Throttling Events
  147. unit: "events/s"
  148. chart_type: line
  149. dimensions:
  150. - name: a dimension per cpu core
  151. - name: cpu.package_throttling
  152. description: Package Thermal Throttling Events
  153. unit: "events/s"
  154. chart_type: line
  155. dimensions:
  156. - name: a dimension per package
  157. - name: cpu.cpufreq
  158. description: Current CPU Frequency
  159. unit: "MHz"
  160. chart_type: line
  161. dimensions:
  162. - name: a dimension per cpu core
  163. - name: cpu core
  164. description: ""
  165. labels:
  166. - name: cpu
  167. description: TBD
  168. metrics:
  169. - name: cpu.cpu
  170. description: Core utilization
  171. unit: "percentage"
  172. chart_type: stacked
  173. dimensions:
  174. - name: guest_nice
  175. - name: guest
  176. - name: steal
  177. - name: softirq
  178. - name: irq
  179. - name: user
  180. - name: system
  181. - name: nice
  182. - name: iowait
  183. - name: idle
  184. - name: cpuidle.cpu_cstate_residency_time
  185. description: C-state residency time
  186. unit: "percentage"
  187. chart_type: stacked
  188. dimensions:
  189. - name: a dimension per c-state
  190. - meta:
  191. plugin_name: proc.plugin
  192. module_name: /proc/sys/kernel/random/entropy_avail
  193. monitored_instance:
  194. name: Entropy
  195. link: ""
  196. categories:
  197. - data-collection.linux-systems.system-metrics
  198. icon_filename: "syslog.png"
  199. related_resources:
  200. integrations:
  201. list: []
  202. info_provided_to_referring_integrations:
  203. description: ""
  204. keywords:
  205. - entropy
  206. most_popular: false
  207. overview:
  208. data_collection:
  209. metrics_description: |
  210. Entropy, a measure of the randomness or unpredictability of data.
  211. In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
  212. secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
  213. vulnerable to attacks that exploit the predictability of the generated keys.
  214. In most operating systems, entropy is generated by collecting random events from various sources, such as
  215. hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
  216. of entropy, which is then used to generate random numbers when needed.
  217. The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
  218. to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
  219. which blocks until enough entropy is available to generate the requested numbers. This ensures that the
  220. generated numbers are truly random and not predictable.
  221. However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
  222. programs that rely on random numbers to slow down or even freeze. This is especially problematic for
  223. cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
  224. To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
  225. entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
  226. radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
  227. software-based sources.
  228. One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
  229. for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
  230. high-quality entropy, which can be used to seed the pool of entropy in the operating system.
  231. Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
  232. exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
  233. can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
  234. method_description: ""
  235. supported_platforms:
  236. include: ["linux"]
  237. exclude: []
  238. multi_instance: false
  239. additional_permissions:
  240. description: ""
  241. default_behavior:
  242. auto_detection:
  243. description: ""
  244. limits:
  245. description: ""
  246. performance_impact:
  247. description: ""
  248. setup:
  249. prerequisites:
  250. list: []
  251. configuration:
  252. file:
  253. name: ""
  254. description: ""
  255. options:
  256. description: ""
  257. folding:
  258. title: ""
  259. enabled: true
  260. list: []
  261. examples:
  262. folding:
  263. enabled: true
  264. title: ""
  265. list: []
  266. troubleshooting:
  267. problems:
  268. list: []
  269. alerts:
  270. - name: lowest_entropy
  271. link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf
  272. metric: system.entropy
  273. info: minimum number of bits of entropy available for the kernel’s random number generator
  274. metrics:
  275. folding:
  276. title: Metrics
  277. enabled: false
  278. description: ""
  279. availability: []
  280. scopes:
  281. - name: global
  282. description: ""
  283. labels: []
  284. metrics:
  285. - name: system.entropy
  286. description: Available Entropy
  287. unit: "entropy"
  288. chart_type: line
  289. dimensions:
  290. - name: entropy
  291. - meta:
  292. plugin_name: proc.plugin
  293. module_name: /proc/uptime
  294. monitored_instance:
  295. name: System Uptime
  296. link: ""
  297. categories:
  298. - data-collection.linux-systems.system-metrics
  299. icon_filename: "linuxserver.svg"
  300. related_resources:
  301. integrations:
  302. list: []
  303. info_provided_to_referring_integrations:
  304. description: ""
  305. keywords:
  306. - uptime
  307. most_popular: false
  308. overview:
  309. data_collection:
  310. metrics_description: |
  311. The amount of time the system has been up (running).
  312. Uptime is a critical aspect of overall system performance:
  313. - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
  314. - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
  315. - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
  316. - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
  317. - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
  318. - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
  319. - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
  320. method_description: ""
  321. supported_platforms:
  322. include: ["linux"]
  323. exclude: []
  324. multi_instance: false
  325. additional_permissions:
  326. description: ""
  327. default_behavior:
  328. auto_detection:
  329. description: ""
  330. limits:
  331. description: ""
  332. performance_impact:
  333. description: ""
  334. setup:
  335. prerequisites:
  336. list: []
  337. configuration:
  338. file:
  339. name: ""
  340. description: ""
  341. options:
  342. description: ""
  343. folding:
  344. title: ""
  345. enabled: true
  346. list: []
  347. examples:
  348. folding:
  349. enabled: true
  350. title: ""
  351. list: []
  352. troubleshooting:
  353. problems:
  354. list: []
  355. alerts: []
  356. metrics:
  357. folding:
  358. title: Metrics
  359. enabled: false
  360. description: ""
  361. availability: []
  362. scopes:
  363. - name: global
  364. description: ""
  365. labels: []
  366. metrics:
  367. - name: system.uptime
  368. description: System Uptime
  369. unit: "seconds"
  370. chart_type: line
  371. dimensions:
  372. - name: uptime
  373. - meta:
  374. plugin_name: proc.plugin
  375. module_name: /proc/vmstat
  376. monitored_instance:
  377. name: Memory Statistics
  378. link: ""
  379. categories:
  380. - data-collection.linux-systems.memory-metrics
  381. icon_filename: "linuxserver.svg"
  382. related_resources:
  383. integrations:
  384. list: []
  385. info_provided_to_referring_integrations:
  386. description: ""
  387. keywords:
  388. - swap
  389. - page faults
  390. - oom
  391. - numa
  392. most_popular: false
  393. overview:
  394. data_collection:
  395. metrics_description: |
  396. Linux Virtual memory subsystem.
  397. Information about memory management, indicating how effectively the kernel allocates and frees
  398. memory resources in response to system demands.
  399. Monitors page faults, which occur when a process requests a portion of its memory that isn't
  400. immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
  401. provide insights into application behavior.
  402. Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
  403. swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
  404. a compressed cache for swap pages, and provides insights into its usage and performance implications.
  405. In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
  406. memory resources between host and guest systems.
  407. For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
  408. can impact the performance based on the memory access times.
  409. The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
  410. of memory resources.
  411. method_description: ""
  412. supported_platforms:
  413. include: ["linux"]
  414. exclude: []
  415. multi_instance: false
  416. additional_permissions:
  417. description: ""
  418. default_behavior:
  419. auto_detection:
  420. description: ""
  421. limits:
  422. description: ""
  423. performance_impact:
  424. description: ""
  425. setup:
  426. prerequisites:
  427. list: []
  428. configuration:
  429. file:
  430. name: ""
  431. description: ""
  432. options:
  433. description: ""
  434. folding:
  435. title: ""
  436. enabled: true
  437. list: []
  438. examples:
  439. folding:
  440. enabled: true
  441. title: ""
  442. list: []
  443. troubleshooting:
  444. problems:
  445. list: []
  446. alerts:
  447. - name: 30min_ram_swapped_out
  448. link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
  449. metric: mem.swapio
  450. info: percentage of the system RAM swapped in the last 30 minutes
  451. os: "linux freebsd"
  452. - name: oom_kill
  453. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  454. metric: mem.oom_kill
  455. info: number of out of memory kills in the last 30 minutes
  456. os: "linux"
  457. metrics:
  458. folding:
  459. title: Metrics
  460. enabled: false
  461. description: ""
  462. availability: []
  463. scopes:
  464. - name: global
  465. description: ""
  466. labels: []
  467. metrics:
  468. - name: mem.swapio
  469. description: Swap I/O
  470. unit: "KiB/s"
  471. chart_type: area
  472. dimensions:
  473. - name: in
  474. - name: out
  475. - name: system.pgpgio
  476. description: Memory Paged from/to disk
  477. unit: "KiB/s"
  478. chart_type: area
  479. dimensions:
  480. - name: in
  481. - name: out
  482. - name: system.pgfaults
  483. description: Memory Page Faults
  484. unit: "faults/s"
  485. chart_type: line
  486. dimensions:
  487. - name: minor
  488. - name: major
  489. - name: mem.balloon
  490. description: Memory Ballooning Operations
  491. unit: "KiB/s"
  492. chart_type: line
  493. dimensions:
  494. - name: inflate
  495. - name: deflate
  496. - name: migrate
  497. - name: mem.zswapio
  498. description: ZSwap I/O
  499. unit: "KiB/s"
  500. chart_type: area
  501. dimensions:
  502. - name: in
  503. - name: out
  504. - name: mem.ksm_cow
  505. description: KSM Copy On Write Operations
  506. unit: "KiB/s"
  507. chart_type: line
  508. dimensions:
  509. - name: swapin
  510. - name: write
  511. - name: mem.thp_faults
  512. description: Transparent Huge Page Fault Allocations
  513. unit: "events/s"
  514. chart_type: line
  515. dimensions:
  516. - name: alloc
  517. - name: fallback
  518. - name: fallback_charge
  519. - name: mem.thp_file
  520. description: Transparent Huge Page File Allocations
  521. unit: "events/s"
  522. chart_type: line
  523. dimensions:
  524. - name: alloc
  525. - name: fallback
  526. - name: mapped
  527. - name: fallback_charge
  528. - name: mem.thp_zero
  529. description: Transparent Huge Zero Page Allocations
  530. unit: "events/s"
  531. chart_type: line
  532. dimensions:
  533. - name: alloc
  534. - name: failed
  535. - name: mem.thp_collapse
  536. description: Transparent Huge Pages Collapsed by khugepaged
  537. unit: "events/s"
  538. chart_type: line
  539. dimensions:
  540. - name: alloc
  541. - name: failed
  542. - name: mem.thp_split
  543. description: Transparent Huge Page Splits
  544. unit: "events/s"
  545. chart_type: line
  546. dimensions:
  547. - name: split
  548. - name: failed
  549. - name: split_pmd
  550. - name: split_deferred
  551. - name: mem.thp_swapout
  552. description: Transparent Huge Pages Swap Out
  553. unit: "events/s"
  554. chart_type: line
  555. dimensions:
  556. - name: swapout
  557. - name: fallback
  558. - name: mem.thp_compact
  559. description: Transparent Huge Pages Compaction
  560. unit: "events/s"
  561. chart_type: line
  562. dimensions:
  563. - name: success
  564. - name: fail
  565. - name: stall
  566. - name: mem.oom_kill
  567. description: Out of Memory Kills
  568. unit: "kills/s"
  569. chart_type: line
  570. dimensions:
  571. - name: kills
  572. - name: mem.numa
  573. description: NUMA events
  574. unit: "events/s"
  575. chart_type: line
  576. dimensions:
  577. - name: local
  578. - name: foreign
  579. - name: interleave
  580. - name: other
  581. - name: pte_updates
  582. - name: huge_pte_updates
  583. - name: hint_faults
  584. - name: hint_faults_local
  585. - name: pages_migrated
  586. - meta:
  587. plugin_name: proc.plugin
  588. module_name: /proc/interrupts
  589. monitored_instance:
  590. name: Interrupts
  591. link: ""
  592. categories:
  593. - data-collection.linux-systems.cpu-metrics
  594. icon_filename: "linuxserver.svg"
  595. related_resources:
  596. integrations:
  597. list: []
  598. info_provided_to_referring_integrations:
  599. description: ""
  600. keywords:
  601. - interrupts
  602. most_popular: false
  603. overview:
  604. data_collection:
  605. metrics_description: |
  606. Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
  607. The numbers reported are the counts of the interrupts that have occurred of each type.
  608. An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
  609. immediate attention. The processor then interrupts its current activities and executes the interrupt handler
  610. to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
  611. The types of interrupts include:
  612. - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
  613. you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
  614. - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
  615. used to switch the CPU among different tasks.
  616. - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
  617. - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
  618. Monitoring `/proc/interrupts` can be used for:
  619. - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
  620. configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
  621. performance degradation.
  622. - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
  623. - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
  624. understand what your system is doing. It can provide insights into the system's interaction with hardware,
  625. drivers, and other parts of the kernel.
  626. method_description: ""
  627. supported_platforms:
  628. include: []
  629. exclude: []
  630. multi_instance: true
  631. additional_permissions:
  632. description: ""
  633. default_behavior:
  634. auto_detection:
  635. description: ""
  636. limits:
  637. description: ""
  638. performance_impact:
  639. description: ""
  640. setup:
  641. prerequisites:
  642. list: []
  643. configuration:
  644. file:
  645. name: ""
  646. description: ""
  647. options:
  648. description: ""
  649. folding:
  650. title: ""
  651. enabled: true
  652. list: []
  653. examples:
  654. folding:
  655. enabled: true
  656. title: ""
  657. list: []
  658. troubleshooting:
  659. problems:
  660. list: []
  661. alerts: []
  662. metrics:
  663. folding:
  664. title: Metrics
  665. enabled: false
  666. description: ""
  667. availability: []
  668. scopes:
  669. - name: global
  670. description: ""
  671. labels: []
  672. metrics:
  673. - name: system.interrupts
  674. description: System interrupts
  675. unit: "interrupts/s"
  676. chart_type: stacked
  677. dimensions:
  678. - name: a dimension per device
  679. - name: cpu core
  680. description: ""
  681. labels:
  682. - name: cpu
  683. description: TBD
  684. metrics:
  685. - name: cpu.interrupts
  686. description: CPU interrupts
  687. unit: "interrupts/s"
  688. chart_type: stacked
  689. dimensions:
  690. - name: a dimension per device
  691. - meta:
  692. plugin_name: proc.plugin
  693. module_name: /proc/loadavg
  694. monitored_instance:
  695. name: System Load Average
  696. link: ""
  697. categories:
  698. - data-collection.linux-systems.system-metrics
  699. icon_filename: "linuxserver.svg"
  700. related_resources:
  701. integrations:
  702. list: []
  703. info_provided_to_referring_integrations:
  704. description: ""
  705. keywords:
  706. - load
  707. - load average
  708. most_popular: false
  709. overview:
  710. data_collection:
  711. metrics_description: |
  712. The `/proc/loadavg` file provides information about the system load average.
  713. The load average is a measure of the amount of computational work that a system performs. It is a
  714. representation of the average system load over a period of time.
  715. This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
  716. respectively. It also includes the currently running processes and the total number of processes.
  717. Monitoring the load average can be used for:
  718. - **System performance**: If the load average is too high, it may indicate that your system is overloaded.
  719. On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
  720. load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
  721. overloaded and tasks are waiting for CPU time.
  722. - **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
  723. due to a runaway process, a software bug, or a hardware issue.
  724. - **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
  725. system's workload. This can help with capacity planning and scaling decisions.
  726. Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
  727. Therefore, high load averages could be due to I/O contention as well as CPU contention.
  728. method_description: ""
  729. supported_platforms:
  730. include: []
  731. exclude: []
  732. multi_instance: false
  733. additional_permissions:
  734. description: ""
  735. default_behavior:
  736. auto_detection:
  737. description: ""
  738. limits:
  739. description: ""
  740. performance_impact:
  741. description: ""
  742. setup:
  743. prerequisites:
  744. list: []
  745. configuration:
  746. file:
  747. name: ""
  748. description: ""
  749. options:
  750. description: ""
  751. folding:
  752. title: ""
  753. enabled: true
  754. list: []
  755. examples:
  756. folding:
  757. enabled: true
  758. title: ""
  759. list: []
  760. troubleshooting:
  761. problems:
  762. list: []
  763. alerts:
  764. - name: load_cpu_number
  765. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  766. metric: system.load
  767. info: number of active CPU cores in the system
  768. os: "linux"
  769. - name: load_average_15
  770. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  771. metric: system.load
  772. info: system fifteen-minute load average
  773. os: "linux"
  774. - name: load_average_5
  775. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  776. metric: system.load
  777. info: system five-minute load average
  778. os: "linux"
  779. - name: load_average_1
  780. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  781. metric: system.load
  782. info: system one-minute load average
  783. os: "linux"
  784. - name: active_processes
  785. link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf
  786. metric: system.active_processes
  787. info: system process IDs (PID) space utilization
  788. metrics:
  789. folding:
  790. title: Metrics
  791. enabled: false
  792. description: ""
  793. availability: []
  794. scopes:
  795. - name: global
  796. description: ""
  797. labels: []
  798. metrics:
  799. - name: system.load
  800. description: System Load Average
  801. unit: "load"
  802. chart_type: line
  803. dimensions:
  804. - name: load1
  805. - name: load5
  806. - name: load15
  807. - name: system.active_processes
  808. description: System Active Processes
  809. unit: "processes"
  810. chart_type: line
  811. dimensions:
  812. - name: active
  813. - meta:
  814. plugin_name: proc.plugin
  815. module_name: /proc/pressure
  816. monitored_instance:
  817. name: Pressure Stall Information
  818. link: ""
  819. categories:
  820. - data-collection.linux-systems.pressure-metrics
  821. icon_filename: "linuxserver.svg"
  822. related_resources:
  823. integrations:
  824. list: []
  825. info_provided_to_referring_integrations:
  826. description: ""
  827. keywords:
  828. - pressure
  829. most_popular: false
  830. overview:
  831. data_collection:
  832. metrics_description: |
  833. Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
  834. (PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
  835. resource contention, such as CPU, memory, or I/O.
  836. The collectors monitored 3 separate files for CPU, memory, and I/O:
  837. - **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
  838. - **memory**: Tracks the amount of time tasks are stalled due to memory contention.
  839. - **io**: Tracks the amount of time tasks are stalled due to I/O contention.
  840. - **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
  841. Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
  842. Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
  843. - **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
  844. frequently being stalled due to lack of resources, which can significantly degrade system performance.
  845. - **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
  846. help identify whether resource contention is the cause.
  847. - **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
  848. utilization and make informed decisions about when to add more resources to your system.
  849. method_description: ""
  850. supported_platforms:
  851. include: []
  852. exclude: []
  853. multi_instance: false
  854. additional_permissions:
  855. description: ""
  856. default_behavior:
  857. auto_detection:
  858. description: ""
  859. limits:
  860. description: ""
  861. performance_impact:
  862. description: ""
  863. setup:
  864. prerequisites:
  865. list: []
  866. configuration:
  867. file:
  868. name: ""
  869. description: ""
  870. options:
  871. description: ""
  872. folding:
  873. title: ""
  874. enabled: true
  875. list: []
  876. examples:
  877. folding:
  878. enabled: true
  879. title: ""
  880. list: []
  881. troubleshooting:
  882. problems:
  883. list: []
  884. alerts: []
  885. metrics:
  886. folding:
  887. title: Metrics
  888. enabled: false
  889. description: ""
  890. availability: []
  891. scopes:
  892. - name: global
  893. description: ""
  894. labels: []
  895. metrics:
  896. - name: system.cpu_some_pressure
  897. description: CPU some pressure
  898. unit: "percentage"
  899. chart_type: line
  900. dimensions:
  901. - name: some10
  902. - name: some60
  903. - name: some300
  904. - name: system.cpu_some_pressure_stall_time
  905. description: CPU some pressure stall time
  906. unit: "ms"
  907. chart_type: line
  908. dimensions:
  909. - name: time
  910. - name: system.cpu_full_pressure
  911. description: CPU full pressure
  912. unit: "percentage"
  913. chart_type: line
  914. dimensions:
  915. - name: some10
  916. - name: some60
  917. - name: some300
  918. - name: system.cpu_full_pressure_stall_time
  919. description: CPU full pressure stall time
  920. unit: "ms"
  921. chart_type: line
  922. dimensions:
  923. - name: time
  924. - name: system.memory_some_pressure
  925. description: Memory some pressure
  926. unit: "percentage"
  927. chart_type: line
  928. dimensions:
  929. - name: some10
  930. - name: some60
  931. - name: some300
  932. - name: system.memory_some_pressure_stall_time
  933. description: Memory some pressure stall time
  934. unit: "ms"
  935. chart_type: line
  936. dimensions:
  937. - name: time
  938. - name: system.memory_full_pressure
  939. description: Memory full pressure
  940. unit: "percentage"
  941. chart_type: line
  942. dimensions:
  943. - name: some10
  944. - name: some60
  945. - name: some300
  946. - name: system.memory_full_pressure_stall_time
  947. description: Memory full pressure stall time
  948. unit: "ms"
  949. chart_type: line
  950. dimensions:
  951. - name: time
  952. - name: system.io_some_pressure
  953. description: I/O some pressure
  954. unit: "percentage"
  955. chart_type: line
  956. dimensions:
  957. - name: some10
  958. - name: some60
  959. - name: some300
  960. - name: system.io_some_pressure_stall_time
  961. description: I/O some pressure stall time
  962. unit: "ms"
  963. chart_type: line
  964. dimensions:
  965. - name: time
  966. - name: system.io_full_pressure
  967. description: I/O some pressure
  968. unit: "percentage"
  969. chart_type: line
  970. dimensions:
  971. - name: some10
  972. - name: some60
  973. - name: some300
  974. - name: system.io_full_pressure_stall_time
  975. description: I/O some pressure stall time
  976. unit: "ms"
  977. chart_type: line
  978. dimensions:
  979. - name: time
  980. - meta:
  981. plugin_name: proc.plugin
  982. module_name: /proc/softirqs
  983. monitored_instance:
  984. name: SoftIRQ statistics
  985. link: ""
  986. categories:
  987. - data-collection.linux-systems.cpu-metrics
  988. icon_filename: "linuxserver.svg"
  989. related_resources:
  990. integrations:
  991. list: []
  992. info_provided_to_referring_integrations:
  993. description: ""
  994. keywords:
  995. - softirqs
  996. - interrupts
  997. most_popular: false
  998. overview:
  999. data_collection:
  1000. metrics_description: |
  1001. In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
  1002. The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
  1003. Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
  1004. deferred and processed later in a context where it's safe to enable interrupts.
  1005. The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
  1006. decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
  1007. half for too long, which could lead to missed interrupts.
  1008. Monitoring `/proc/softirqs` is useful for:
  1009. - **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
  1010. rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
  1011. - **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
  1012. what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
  1013. with a disk.
  1014. - **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
  1015. your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
  1016. interrupts.
  1017. method_description: ""
  1018. supported_platforms:
  1019. include: []
  1020. exclude: []
  1021. multi_instance: true
  1022. additional_permissions:
  1023. description: ""
  1024. default_behavior:
  1025. auto_detection:
  1026. description: ""
  1027. limits:
  1028. description: ""
  1029. performance_impact:
  1030. description: ""
  1031. setup:
  1032. prerequisites:
  1033. list: []
  1034. configuration:
  1035. file:
  1036. name: ""
  1037. description: ""
  1038. options:
  1039. description: ""
  1040. folding:
  1041. title: ""
  1042. enabled: true
  1043. list: []
  1044. examples:
  1045. folding:
  1046. enabled: true
  1047. title: ""
  1048. list: []
  1049. troubleshooting:
  1050. problems:
  1051. list: []
  1052. alerts: []
  1053. metrics:
  1054. folding:
  1055. title: Metrics
  1056. enabled: false
  1057. description: ""
  1058. availability: []
  1059. scopes:
  1060. - name: global
  1061. description: ""
  1062. labels: []
  1063. metrics:
  1064. - name: system.softirqs
  1065. description: System softirqs
  1066. unit: "softirqs/s"
  1067. chart_type: stacked
  1068. dimensions:
  1069. - name: a dimension per softirq
  1070. - name: cpu core
  1071. description: ""
  1072. labels:
  1073. - name: cpu
  1074. description: TBD
  1075. metrics:
  1076. - name: cpu.softirqs
  1077. description: CPU softirqs
  1078. unit: "softirqs/s"
  1079. chart_type: stacked
  1080. dimensions:
  1081. - name: a dimension per softirq
  1082. - meta:
  1083. plugin_name: proc.plugin
  1084. module_name: /proc/net/softnet_stat
  1085. monitored_instance:
  1086. name: Softnet Statistics
  1087. link: ""
  1088. categories:
  1089. - data-collection.linux-systems.network-metrics
  1090. icon_filename: "linuxserver.svg"
  1091. related_resources:
  1092. integrations:
  1093. list: []
  1094. info_provided_to_referring_integrations:
  1095. description: ""
  1096. keywords:
  1097. - softnet
  1098. most_popular: false
  1099. overview:
  1100. data_collection:
  1101. metrics_description: |
  1102. `/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
  1103. It provides information about:
  1104. - Total number of processed packets (`processed`).
  1105. - Times ksoftirq ran out of quota (`dropped`).
  1106. - Times net_rx_action was rescheduled.
  1107. - Number of times processed all lists before quota.
  1108. - Number of times did not process all lists due to quota.
  1109. - Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
  1110. - Number of times GRO cells were processed.
  1111. Monitoring the /proc/net/softnet_stat file can be useful for:
  1112. - **Network performance monitoring**: By tracking the total number of processed packets and how many packets
  1113. were dropped, you can gain insights into your system's network performance.
  1114. - **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
  1115. For instance, a high number of dropped packets may indicate a network problem.
  1116. - **Capacity planning**: If your system is consistently processing near its maximum capacity of network
  1117. packets, it might be time to consider upgrading your network infrastructure.
  1118. method_description: ""
  1119. supported_platforms:
  1120. include: []
  1121. exclude: []
  1122. multi_instance: true
  1123. additional_permissions:
  1124. description: ""
  1125. default_behavior:
  1126. auto_detection:
  1127. description: ""
  1128. limits:
  1129. description: ""
  1130. performance_impact:
  1131. description: ""
  1132. setup:
  1133. prerequisites:
  1134. list: []
  1135. configuration:
  1136. file:
  1137. name: ""
  1138. description: ""
  1139. options:
  1140. description: ""
  1141. folding:
  1142. title: ""
  1143. enabled: true
  1144. list: []
  1145. examples:
  1146. folding:
  1147. enabled: true
  1148. title: ""
  1149. list: []
  1150. troubleshooting:
  1151. problems:
  1152. list: []
  1153. alerts:
  1154. - name: 1min_netdev_backlog_exceeded
  1155. link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
  1156. metric: system.softnet_stat
  1157. info: average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog
  1158. os: "linux"
  1159. - name: 1min_netdev_budget_ran_outs
  1160. link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
  1161. metric: system.softnet_stat
  1162. info:
  1163. average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last
  1164. minute (this can be a cause for dropped packets)
  1165. os: "linux"
  1166. metrics:
  1167. folding:
  1168. title: Metrics
  1169. enabled: false
  1170. description: ""
  1171. availability: []
  1172. scopes:
  1173. - name: global
  1174. description: ""
  1175. labels: []
  1176. metrics:
  1177. - name: system.softnet_stat
  1178. description: System softnet_stat
  1179. unit: "events/s"
  1180. chart_type: line
  1181. dimensions:
  1182. - name: processed
  1183. - name: dropped
  1184. - name: squeezed
  1185. - name: received_rps
  1186. - name: flow_limit_count
  1187. - name: cpu core
  1188. description: ""
  1189. labels: []
  1190. metrics:
  1191. - name: cpu.softnet_stat
  1192. description: CPU softnet_stat
  1193. unit: "events/s"
  1194. chart_type: line
  1195. dimensions:
  1196. - name: processed
  1197. - name: dropped
  1198. - name: squeezed
  1199. - name: received_rps
  1200. - name: flow_limit_count
  1201. - meta:
  1202. plugin_name: proc.plugin
  1203. module_name: /proc/meminfo
  1204. monitored_instance:
  1205. name: Memory Usage
  1206. link: ""
  1207. categories:
  1208. - data-collection.linux-systems.memory-metrics
  1209. icon_filename: "linuxserver.svg"
  1210. related_resources:
  1211. integrations:
  1212. list: []
  1213. info_provided_to_referring_integrations:
  1214. description: ""
  1215. keywords:
  1216. - memory
  1217. - ram
  1218. - available
  1219. - committed
  1220. most_popular: false
  1221. overview:
  1222. data_collection:
  1223. metrics_description: |
  1224. `/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
  1225. about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
  1226. SLAB memory, memory mappings, and more.
  1227. Monitoring /proc/meminfo can be useful for:
  1228. - **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
  1229. tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
  1230. from more RAM.
  1231. - **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
  1232. whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
  1233. mean that your system is swapping out a lot of memory to disk, which can degrade performance.
  1234. - **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
  1235. decisions about future capacity needs.
  1236. method_description: ""
  1237. supported_platforms:
  1238. include: []
  1239. exclude: []
  1240. multi_instance: false
  1241. additional_permissions:
  1242. description: ""
  1243. default_behavior:
  1244. auto_detection:
  1245. description: ""
  1246. limits:
  1247. description: ""
  1248. performance_impact:
  1249. description: ""
  1250. setup:
  1251. prerequisites:
  1252. list: []
  1253. configuration:
  1254. file:
  1255. name: ""
  1256. description: ""
  1257. options:
  1258. description: ""
  1259. folding:
  1260. title: ""
  1261. enabled: true
  1262. list: []
  1263. examples:
  1264. folding:
  1265. enabled: true
  1266. title: ""
  1267. list: []
  1268. troubleshooting:
  1269. problems:
  1270. list: []
  1271. alerts:
  1272. - name: ram_in_use
  1273. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  1274. metric: system.ram
  1275. info: system memory utilization
  1276. os: "linux"
  1277. - name: ram_available
  1278. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  1279. metric: mem.available
  1280. info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
  1281. os: "linux"
  1282. - name: used_swap
  1283. link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
  1284. metric: mem.swap
  1285. info: swap memory utilization
  1286. os: "linux freebsd"
  1287. - name: 1hour_memory_hw_corrupted
  1288. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1289. metric: mem.hwcorrupt
  1290. info: amount of memory corrupted due to a hardware failure
  1291. os: "linux"
  1292. metrics:
  1293. folding:
  1294. title: Metrics
  1295. enabled: false
  1296. description: ""
  1297. availability: []
  1298. scopes:
  1299. - name: global
  1300. description: ""
  1301. labels: []
  1302. metrics:
  1303. - name: system.ram
  1304. description: System RAM
  1305. unit: "MiB"
  1306. chart_type: stacked
  1307. dimensions:
  1308. - name: free
  1309. - name: used
  1310. - name: cached
  1311. - name: buffers
  1312. - name: mem.available
  1313. description: Available RAM for applications
  1314. unit: "MiB"
  1315. chart_type: area
  1316. dimensions:
  1317. - name: avail
  1318. - name: mem.swap
  1319. description: System Swap
  1320. unit: "MiB"
  1321. chart_type: stacked
  1322. dimensions:
  1323. - name: free
  1324. - name: used
  1325. - name: mem.swap_cached
  1326. description: Swap Memory Cached in RAM
  1327. unit: "MiB"
  1328. chart_type: stacked
  1329. dimensions:
  1330. - name: cached
  1331. - name: mem.zswap
  1332. description: Zswap Usage
  1333. unit: "MiB"
  1334. chart_type: stacked
  1335. dimensions:
  1336. - name: in-ram
  1337. - name: on-disk
  1338. - name: mem.hwcorrupt
  1339. description: Corrupted Memory detected by ECC
  1340. unit: "MiB"
  1341. chart_type: line
  1342. dimensions:
  1343. - name: HardwareCorrupted
  1344. - name: mem.commited
  1345. description: Committed (Allocated) Memory
  1346. unit: "MiB"
  1347. chart_type: area
  1348. dimensions:
  1349. - name: Commited_AS
  1350. - name: mem.writeback
  1351. description: Writeback Memory
  1352. unit: "MiB"
  1353. chart_type: line
  1354. dimensions:
  1355. - name: Dirty
  1356. - name: Writeback
  1357. - name: FuseWriteback
  1358. - name: NfsWriteback
  1359. - name: Bounce
  1360. - name: mem.kernel
  1361. description: Memory Used by Kernel
  1362. unit: "MiB"
  1363. chart_type: stacked
  1364. dimensions:
  1365. - name: Slab
  1366. - name: KernelStack
  1367. - name: PageTables
  1368. - name: VmallocUsed
  1369. - name: Percpu
  1370. - name: mem.slab
  1371. description: Reclaimable Kernel Memory
  1372. unit: "MiB"
  1373. chart_type: stacked
  1374. dimensions:
  1375. - name: reclaimable
  1376. - name: unreclaimable
  1377. - name: mem.hugepages
  1378. description: Dedicated HugePages Memory
  1379. unit: "MiB"
  1380. chart_type: stacked
  1381. dimensions:
  1382. - name: free
  1383. - name: used
  1384. - name: surplus
  1385. - name: reserved
  1386. - name: mem.thp
  1387. description: Transparent HugePages Memory
  1388. unit: "MiB"
  1389. chart_type: stacked
  1390. dimensions:
  1391. - name: anonymous
  1392. - name: shmem
  1393. - name: mem.thp_details
  1394. description: Details of Transparent HugePages Usage
  1395. unit: "MiB"
  1396. chart_type: line
  1397. dimensions:
  1398. - name: ShmemPmdMapped
  1399. - name: FileHugePages
  1400. - name: FilePmdMapped
  1401. - name: mem.reclaiming
  1402. description: Memory Reclaiming
  1403. unit: "MiB"
  1404. chart_type: line
  1405. dimensions:
  1406. - name: Active
  1407. - name: Inactive
  1408. - name: Active(anon)
  1409. - name: Inactive(anon)
  1410. - name: Active(file)
  1411. - name: Inactive(file)
  1412. - name: Unevictable
  1413. - name: Mlocked
  1414. - name: mem.high_low
  1415. description: High and Low Used and Free Memory Areas
  1416. unit: "MiB"
  1417. chart_type: stacked
  1418. dimensions:
  1419. - name: high_used
  1420. - name: low_used
  1421. - name: high_free
  1422. - name: low_free
  1423. - name: mem.cma
  1424. description: Contiguous Memory Allocator (CMA) Memory
  1425. unit: "MiB"
  1426. chart_type: stacked
  1427. dimensions:
  1428. - name: used
  1429. - name: free
  1430. - name: mem.directmaps
  1431. description: Direct Memory Mappings
  1432. unit: "MiB"
  1433. chart_type: stacked
  1434. dimensions:
  1435. - name: 4k
  1436. - name: 2m
  1437. - name: 4m
  1438. - name: 1g
  1439. - meta:
  1440. plugin_name: proc.plugin
  1441. module_name: /proc/pagetypeinfo
  1442. monitored_instance:
  1443. name: Page types
  1444. link: ""
  1445. categories:
  1446. - data-collection.linux-systems.memory-metrics
  1447. icon_filename: "microchip.svg"
  1448. related_resources:
  1449. integrations:
  1450. list: []
  1451. info_provided_to_referring_integrations:
  1452. description: ""
  1453. keywords:
  1454. - memory page types
  1455. most_popular: false
  1456. overview:
  1457. data_collection:
  1458. metrics_description: "This integration provides metrics about the system's memory page types"
  1459. method_description: ""
  1460. supported_platforms:
  1461. include: []
  1462. exclude: []
  1463. multi_instance: false
  1464. additional_permissions:
  1465. description: ""
  1466. default_behavior:
  1467. auto_detection:
  1468. description: ""
  1469. limits:
  1470. description: ""
  1471. performance_impact:
  1472. description: ""
  1473. setup:
  1474. prerequisites:
  1475. list: []
  1476. configuration:
  1477. file:
  1478. name: ""
  1479. description: ""
  1480. options:
  1481. description: ""
  1482. folding:
  1483. title: ""
  1484. enabled: true
  1485. list: []
  1486. examples:
  1487. folding:
  1488. enabled: true
  1489. title: ""
  1490. list: []
  1491. troubleshooting:
  1492. problems:
  1493. list: []
  1494. alerts: []
  1495. metrics:
  1496. folding:
  1497. title: Metrics
  1498. enabled: false
  1499. description: ""
  1500. availability: []
  1501. scopes:
  1502. - name: global
  1503. description: ""
  1504. labels: []
  1505. metrics:
  1506. - name: mem.pagetype_global
  1507. description: System orders available
  1508. unit: "B"
  1509. chart_type: stacked
  1510. dimensions:
  1511. - name: a dimension per pagesize
  1512. - name: node, zone, type
  1513. description: ""
  1514. labels:
  1515. - name: node_id
  1516. description: TBD
  1517. - name: node_zone
  1518. description: TBD
  1519. - name: node_type
  1520. description: TBD
  1521. metrics:
  1522. - name: mem.pagetype
  1523. description: pagetype_Node{node}_{zone}_{type}
  1524. unit: "B"
  1525. chart_type: stacked
  1526. dimensions:
  1527. - name: a dimension per pagesize
  1528. - meta:
  1529. plugin_name: proc.plugin
  1530. module_name: /sys/devices/system/edac/mc
  1531. monitored_instance:
  1532. name: Memory modules (DIMMs)
  1533. link: ""
  1534. categories:
  1535. - data-collection.linux-systems.memory-metrics
  1536. icon_filename: "microchip.svg"
  1537. related_resources:
  1538. integrations:
  1539. list: []
  1540. info_provided_to_referring_integrations:
  1541. description: ""
  1542. keywords:
  1543. - edac
  1544. - ecc
  1545. - dimm
  1546. - ram
  1547. - hardware
  1548. most_popular: false
  1549. overview:
  1550. data_collection:
  1551. metrics_description: |
  1552. The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
  1553. primarily ECC (Error-Correcting Code) memory errors.
  1554. The collector provides data for:
  1555. - Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
  1556. - errors related to a DIMM
  1557. - errors that cannot be associated with a DIMM
  1558. - Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
  1559. - memory controllers that can identify the physical DIMMS and report errors directly for them,
  1560. - memory controllers that report errors for memory address ranges that can be linked to dimms.
  1561. In this case the DIMMS reported may be more than the physical DIMMS installed.
  1562. method_description: ""
  1563. supported_platforms:
  1564. include: []
  1565. exclude: []
  1566. multi_instance: true
  1567. additional_permissions:
  1568. description: ""
  1569. default_behavior:
  1570. auto_detection:
  1571. description: ""
  1572. limits:
  1573. description: ""
  1574. performance_impact:
  1575. description: ""
  1576. setup:
  1577. prerequisites:
  1578. list: []
  1579. configuration:
  1580. file:
  1581. name: ""
  1582. description: ""
  1583. options:
  1584. description: ""
  1585. folding:
  1586. title: ""
  1587. enabled: true
  1588. list: []
  1589. examples:
  1590. folding:
  1591. enabled: true
  1592. title: ""
  1593. list: []
  1594. troubleshooting:
  1595. problems:
  1596. list: []
  1597. alerts:
  1598. - name: ecc_memory_mc_noinfo_correctable
  1599. metric: mem.edac_mc
  1600. info: memory controller ${label:controller} ECC correctable errors (unknown DIMM slot) in the last 10 minutes
  1601. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1602. - name: ecc_memory_mc_noinfo_uncorrectable
  1603. metric: mem.edac_mc
  1604. info: memory controller ${label:controller} ECC uncorrectable errors (unknown DIMM slot) in the last 10 minutes
  1605. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1606. - name: ecc_memory_dimm_correctable
  1607. metric: mem.edac_mc_dimm
  1608. info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes
  1609. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1610. - name: ecc_memory_dimm_uncorrectable
  1611. metric: mem.edac_mc_dimm
  1612. info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes
  1613. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1614. metrics:
  1615. folding:
  1616. title: Metrics
  1617. enabled: false
  1618. description: ""
  1619. availability: []
  1620. scopes:
  1621. - name: memory controller
  1622. description: These metrics refer to the memory controller.
  1623. labels:
  1624. - name: controller
  1625. description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
  1626. - name: mc_name
  1627. description: Memory controller type.
  1628. - name: size_mb
  1629. description: The amount of memory in megabytes that this memory controller manages.
  1630. - name: max_location
  1631. description: Last available memory slot in this memory controller.
  1632. metrics:
  1633. - name: mem.edac_mc
  1634. description: Memory Controller (MC) Error Detection And Correction (EDAC) Errors
  1635. unit: errors/s
  1636. chart_type: line
  1637. dimensions:
  1638. - name: correctable
  1639. - name: uncorrectable
  1640. - name: correctable_noinfo
  1641. - name: uncorrectable_noinfo
  1642. - name: memory module
  1643. description: These metrics refer to the memory module (or rank, [depends on the memory controller](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5)).
  1644. labels:
  1645. - name: controller
  1646. description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
  1647. - name: dimm
  1648. description: "[dimmX or rankX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#dimmx-or-rankx-directories) directory name of this memory module."
  1649. - name: dimm_dev_type
  1650. description: Type of DRAM device used in this memory module. For example, x1, x2, x4, x8.
  1651. - name: dimm_edac_mode
  1652. description: Used type of error detection and correction. For example, S4ECD4ED would mean a Chipkill with x4 DRAM.
  1653. - name: dimm_label
  1654. description: Label assigned to this memory module.
  1655. - name: dimm_location
  1656. description: Location of the memory module.
  1657. - name: dimm_mem_type
  1658. description: Type of the memory module.
  1659. - name: size
  1660. description: The amount of memory in megabytes that this memory module manages.
  1661. metrics:
  1662. - name: mem.edac_mc
  1663. description: DIMM Error Detection And Correction (EDAC) Errors
  1664. unit: errors/s
  1665. chart_type: line
  1666. dimensions:
  1667. - name: correctable
  1668. - name: uncorrectable
  1669. - meta:
  1670. plugin_name: proc.plugin
  1671. module_name: /sys/devices/system/node
  1672. monitored_instance:
  1673. name: Non-Uniform Memory Access
  1674. link: ""
  1675. categories:
  1676. - data-collection.linux-systems.memory-metrics
  1677. icon_filename: "linuxserver.svg"
  1678. related_resources:
  1679. integrations:
  1680. list: []
  1681. info_provided_to_referring_integrations:
  1682. description: ""
  1683. keywords:
  1684. - numa
  1685. most_popular: false
  1686. overview:
  1687. data_collection:
  1688. metrics_description: |
  1689. Information about NUMA (Non-Uniform Memory Access) nodes on the system.
  1690. NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
  1691. share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
  1692. symmetric multiprocessing (SMP) system.
  1693. In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
  1694. Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
  1695. memory in any of the nodes, it does so faster when accessing memory within its own node.
  1696. The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
  1697. efficiency of memory allocations in multi-node systems.
  1698. method_description: ""
  1699. supported_platforms:
  1700. include: []
  1701. exclude: []
  1702. multi_instance: true
  1703. additional_permissions:
  1704. description: ""
  1705. default_behavior:
  1706. auto_detection:
  1707. description: ""
  1708. limits:
  1709. description: ""
  1710. performance_impact:
  1711. description: ""
  1712. setup:
  1713. prerequisites:
  1714. list: []
  1715. configuration:
  1716. file:
  1717. name: ""
  1718. description: ""
  1719. options:
  1720. description: ""
  1721. folding:
  1722. title: ""
  1723. enabled: true
  1724. list: []
  1725. examples:
  1726. folding:
  1727. enabled: true
  1728. title: ""
  1729. list: []
  1730. troubleshooting:
  1731. problems:
  1732. list: []
  1733. alerts: []
  1734. metrics:
  1735. folding:
  1736. title: Metrics
  1737. enabled: false
  1738. description: ""
  1739. availability: []
  1740. scopes:
  1741. - name: numa node
  1742. description: ""
  1743. labels:
  1744. - name: numa_node
  1745. description: TBD
  1746. metrics:
  1747. - name: mem.numa_nodes
  1748. description: NUMA events
  1749. unit: "events/s"
  1750. chart_type: line
  1751. dimensions:
  1752. - name: hit
  1753. - name: miss
  1754. - name: local
  1755. - name: foreign
  1756. - name: interleave
  1757. - name: other
  1758. - meta:
  1759. plugin_name: proc.plugin
  1760. module_name: /sys/kernel/mm/ksm
  1761. monitored_instance:
  1762. name: Kernel Same-Page Merging
  1763. link: ""
  1764. categories:
  1765. - data-collection.linux-systems.memory-metrics
  1766. icon_filename: "microchip.svg"
  1767. related_resources:
  1768. integrations:
  1769. list: []
  1770. info_provided_to_referring_integrations:
  1771. description: ""
  1772. keywords:
  1773. - ksm
  1774. - samepage
  1775. - merging
  1776. most_popular: false
  1777. overview:
  1778. data_collection:
  1779. metrics_description: |
  1780. Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
  1781. memory of different processes and identify identical pages. It then merges these identical pages into a
  1782. single page that the processes share. This is particularly useful for virtualization, where multiple virtual
  1783. machines might be running the same operating system or applications and have many identical pages.
  1784. The collector provides information about the operation and effectiveness of KSM on your system.
  1785. method_description: ""
  1786. supported_platforms:
  1787. include: []
  1788. exclude: []
  1789. multi_instance: false
  1790. additional_permissions:
  1791. description: ""
  1792. default_behavior:
  1793. auto_detection:
  1794. description: ""
  1795. limits:
  1796. description: ""
  1797. performance_impact:
  1798. description: ""
  1799. setup:
  1800. prerequisites:
  1801. list: []
  1802. configuration:
  1803. file:
  1804. name: ""
  1805. description: ""
  1806. options:
  1807. description: ""
  1808. folding:
  1809. title: ""
  1810. enabled: true
  1811. list: []
  1812. examples:
  1813. folding:
  1814. enabled: true
  1815. title: ""
  1816. list: []
  1817. troubleshooting:
  1818. problems:
  1819. list: []
  1820. alerts: []
  1821. metrics:
  1822. folding:
  1823. title: Metrics
  1824. enabled: false
  1825. description: ""
  1826. availability: []
  1827. scopes:
  1828. - name: global
  1829. description: ""
  1830. labels: []
  1831. metrics:
  1832. - name: mem.ksm
  1833. description: Kernel Same Page Merging
  1834. unit: "MiB"
  1835. chart_type: stacked
  1836. dimensions:
  1837. - name: shared
  1838. - name: unshared
  1839. - name: sharing
  1840. - name: volatile
  1841. - name: mem.ksm_savings
  1842. description: Kernel Same Page Merging Savings
  1843. unit: "MiB"
  1844. chart_type: area
  1845. dimensions:
  1846. - name: savings
  1847. - name: offered
  1848. - name: mem.ksm_ratios
  1849. description: Kernel Same Page Merging Effectiveness
  1850. unit: "percentage"
  1851. chart_type: line
  1852. dimensions:
  1853. - name: savings
  1854. - meta:
  1855. plugin_name: proc.plugin
  1856. module_name: /sys/block/zram
  1857. monitored_instance:
  1858. name: ZRAM
  1859. link: ""
  1860. categories:
  1861. - data-collection.linux-systems.memory-metrics
  1862. icon_filename: "microchip.svg"
  1863. related_resources:
  1864. integrations:
  1865. list: []
  1866. info_provided_to_referring_integrations:
  1867. description: ""
  1868. keywords:
  1869. - zram
  1870. most_popular: false
  1871. overview:
  1872. data_collection:
  1873. metrics_description: |
  1874. zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
  1875. The data written to this block device is compressed and stored in memory.
  1876. The collectors provides information about the operation and the effectiveness of zRAM on your system.
  1877. method_description: ""
  1878. supported_platforms:
  1879. include: []
  1880. exclude: []
  1881. multi_instance: true
  1882. additional_permissions:
  1883. description: ""
  1884. default_behavior:
  1885. auto_detection:
  1886. description: ""
  1887. limits:
  1888. description: ""
  1889. performance_impact:
  1890. description: ""
  1891. setup:
  1892. prerequisites:
  1893. list: []
  1894. configuration:
  1895. file:
  1896. name: ""
  1897. description: ""
  1898. options:
  1899. description: ""
  1900. folding:
  1901. title: ""
  1902. enabled: true
  1903. list: []
  1904. examples:
  1905. folding:
  1906. enabled: true
  1907. title: ""
  1908. list: []
  1909. troubleshooting:
  1910. problems:
  1911. list: []
  1912. alerts: []
  1913. metrics:
  1914. folding:
  1915. title: Metrics
  1916. enabled: false
  1917. description: ""
  1918. availability: []
  1919. scopes:
  1920. - name: zram device
  1921. description: ""
  1922. labels:
  1923. - name: device
  1924. description: TBD
  1925. metrics:
  1926. - name: mem.zram_usage
  1927. description: ZRAM Memory Usage
  1928. unit: "MiB"
  1929. chart_type: area
  1930. dimensions:
  1931. - name: compressed
  1932. - name: metadata
  1933. - name: mem.zram_savings
  1934. description: ZRAM Memory Savings
  1935. unit: "MiB"
  1936. chart_type: area
  1937. dimensions:
  1938. - name: savings
  1939. - name: original
  1940. - name: mem.zram_ratio
  1941. description: ZRAM Compression Ratio (original to compressed)
  1942. unit: "ratio"
  1943. chart_type: line
  1944. dimensions:
  1945. - name: ratio
  1946. - name: mem.zram_efficiency
  1947. description: ZRAM Efficiency
  1948. unit: "percentage"
  1949. chart_type: line
  1950. dimensions:
  1951. - name: percent
  1952. - meta:
  1953. plugin_name: proc.plugin
  1954. module_name: ipc
  1955. monitored_instance:
  1956. name: Inter Process Communication
  1957. link: ""
  1958. categories:
  1959. - data-collection.linux-systems.ipc-metrics
  1960. icon_filename: "network-wired.svg"
  1961. related_resources:
  1962. integrations:
  1963. list: []
  1964. info_provided_to_referring_integrations:
  1965. description: ""
  1966. keywords:
  1967. - ipc
  1968. - semaphores
  1969. - shared memory
  1970. most_popular: false
  1971. overview:
  1972. data_collection:
  1973. metrics_description: |
  1974. IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
  1975. other and synchronize their actions.
  1976. This collector exposes information about:
  1977. - Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
  1978. allows messages to be placed onto a queue and read at a later time.
  1979. - Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
  1980. reading/writing into shared memory segments.
  1981. - Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
  1982. processes are trying to access a single shared resource, semaphores can ensure that only one process
  1983. accesses the resource at a given time.
  1984. method_description: ""
  1985. supported_platforms:
  1986. include: []
  1987. exclude: []
  1988. multi_instance: false
  1989. additional_permissions:
  1990. description: ""
  1991. default_behavior:
  1992. auto_detection:
  1993. description: ""
  1994. limits:
  1995. description: ""
  1996. performance_impact:
  1997. description: ""
  1998. setup:
  1999. prerequisites:
  2000. list: []
  2001. configuration:
  2002. file:
  2003. name: ""
  2004. description: ""
  2005. options:
  2006. description: ""
  2007. folding:
  2008. title: ""
  2009. enabled: true
  2010. list: []
  2011. examples:
  2012. folding:
  2013. enabled: true
  2014. title: ""
  2015. list: []
  2016. troubleshooting:
  2017. problems:
  2018. list: []
  2019. alerts:
  2020. - name: semaphores_used
  2021. link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
  2022. metric: system.ipc_semaphores
  2023. info: IPC semaphore utilization
  2024. os: "linux"
  2025. - name: semaphore_arrays_used
  2026. link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
  2027. metric: system.ipc_semaphore_arrays
  2028. info: IPC semaphore arrays utilization
  2029. os: "linux"
  2030. metrics:
  2031. folding:
  2032. title: Metrics
  2033. enabled: false
  2034. description: ""
  2035. availability: []
  2036. scopes:
  2037. - name: global
  2038. description: ""
  2039. labels: []
  2040. metrics:
  2041. - name: system.ipc_semaphores
  2042. description: IPC Semaphores
  2043. unit: "semaphores"
  2044. chart_type: area
  2045. dimensions:
  2046. - name: semaphores
  2047. - name: system.ipc_semaphore_arrays
  2048. description: IPC Semaphore Arrays
  2049. unit: "arrays"
  2050. chart_type: area
  2051. dimensions:
  2052. - name: arrays
  2053. - name: system.message_queue_message
  2054. description: IPC Message Queue Number of Messages
  2055. unit: "messages"
  2056. chart_type: stacked
  2057. dimensions:
  2058. - name: a dimension per queue
  2059. - name: system.message_queue_bytes
  2060. description: IPC Message Queue Used Bytes
  2061. unit: "bytes"
  2062. chart_type: stacked
  2063. dimensions:
  2064. - name: a dimension per queue
  2065. - name: system.shared_memory_segments
  2066. description: IPC Shared Memory Number of Segments
  2067. unit: "segments"
  2068. chart_type: stacked
  2069. dimensions:
  2070. - name: segments
  2071. - name: system.shared_memory_bytes
  2072. description: IPC Shared Memory Used Bytes
  2073. unit: "bytes"
  2074. chart_type: stacked
  2075. dimensions:
  2076. - name: bytes
  2077. - meta:
  2078. plugin_name: proc.plugin
  2079. module_name: /proc/diskstats
  2080. monitored_instance:
  2081. name: Disk Statistics
  2082. link: ""
  2083. categories:
  2084. - data-collection.linux-systems.disk-metrics
  2085. icon_filename: "hard-drive.svg"
  2086. related_resources:
  2087. integrations:
  2088. list: []
  2089. info_provided_to_referring_integrations:
  2090. description: ""
  2091. keywords:
  2092. - disk
  2093. - disks
  2094. - io
  2095. - bcache
  2096. - block devices
  2097. most_popular: false
  2098. overview:
  2099. data_collection:
  2100. metrics_description: |
  2101. Detailed statistics for each of your system's disk devices and partitions.
  2102. The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
  2103. Get valuable insight into how your disks are performing and where potential bottlenecks might be.
  2104. method_description: ""
  2105. supported_platforms:
  2106. include: []
  2107. exclude: []
  2108. multi_instance: true
  2109. additional_permissions:
  2110. description: ""
  2111. default_behavior:
  2112. auto_detection:
  2113. description: ""
  2114. limits:
  2115. description: ""
  2116. performance_impact:
  2117. description: ""
  2118. setup:
  2119. prerequisites:
  2120. list: []
  2121. configuration:
  2122. file:
  2123. name: ""
  2124. description: ""
  2125. options:
  2126. description: ""
  2127. folding:
  2128. title: ""
  2129. enabled: true
  2130. list: []
  2131. examples:
  2132. folding:
  2133. enabled: true
  2134. title: ""
  2135. list: []
  2136. troubleshooting:
  2137. problems:
  2138. list: []
  2139. alerts:
  2140. - name: 10min_disk_backlog
  2141. link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
  2142. metric: disk.backlog
  2143. info: average backlog size of the ${label:device} disk over the last 10 minutes
  2144. os: "linux"
  2145. - name: 10min_disk_utilization
  2146. link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
  2147. metric: disk.util
  2148. info: average percentage of time ${label:device} disk was busy over the last 10 minutes
  2149. os: "linux freebsd"
  2150. - name: bcache_cache_dirty
  2151. link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
  2152. metric: disk.bcache_cache_alloc
  2153. info: percentage of cache space used for dirty data and metadata (this usually means your SSD cache is too small)
  2154. - name: bcache_cache_errors
  2155. link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
  2156. metric: disk.bcache_cache_read_races
  2157. info:
  2158. number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is
  2159. reread from the backing device)
  2160. metrics:
  2161. folding:
  2162. title: Metrics
  2163. enabled: false
  2164. description: ""
  2165. availability: []
  2166. scopes:
  2167. - name: global
  2168. description: ""
  2169. labels: []
  2170. metrics:
  2171. - name: system.io
  2172. description: Disk I/O
  2173. unit: "KiB/s"
  2174. chart_type: area
  2175. dimensions:
  2176. - name: in
  2177. - name: out
  2178. - name: disk
  2179. description: ""
  2180. labels:
  2181. - name: device
  2182. description: TBD
  2183. - name: mount_point
  2184. description: TBD
  2185. - name: device_type
  2186. description: TBD
  2187. metrics:
  2188. - name: disk.io
  2189. description: Disk I/O Bandwidth
  2190. unit: "KiB/s"
  2191. chart_type: area
  2192. dimensions:
  2193. - name: reads
  2194. - name: writes
  2195. - name: disk_ext.io
  2196. description: Amount of Discarded Data
  2197. unit: "KiB/s"
  2198. chart_type: area
  2199. dimensions:
  2200. - name: discards
  2201. - name: disk.ops
  2202. description: Disk Completed I/O Operations
  2203. unit: "operations/s"
  2204. chart_type: line
  2205. dimensions:
  2206. - name: reads
  2207. - name: writes
  2208. - name: disk_ext.ops
  2209. description: Disk Completed Extended I/O Operations
  2210. unit: "operations/s"
  2211. chart_type: line
  2212. dimensions:
  2213. - name: discards
  2214. - name: flushes
  2215. - name: disk.qops
  2216. description: Disk Current I/O Operations
  2217. unit: "operations"
  2218. chart_type: line
  2219. dimensions:
  2220. - name: operations
  2221. - name: disk.backlog
  2222. description: Disk Backlog
  2223. unit: "milliseconds"
  2224. chart_type: area
  2225. dimensions:
  2226. - name: backlog
  2227. - name: disk.busy
  2228. description: Disk Busy Time
  2229. unit: "milliseconds"
  2230. chart_type: area
  2231. dimensions:
  2232. - name: busy
  2233. - name: disk.util
  2234. description: Disk Utilization Time
  2235. unit: "% of time working"
  2236. chart_type: area
  2237. dimensions:
  2238. - name: utilization
  2239. - name: disk.mops
  2240. description: Disk Merged Operations
  2241. unit: "merged operations/s"
  2242. chart_type: line
  2243. dimensions:
  2244. - name: reads
  2245. - name: writes
  2246. - name: disk_ext.mops
  2247. description: Disk Merged Discard Operations
  2248. unit: "merged operations/s"
  2249. chart_type: line
  2250. dimensions:
  2251. - name: discards
  2252. - name: disk.iotime
  2253. description: Disk Total I/O Time
  2254. unit: "milliseconds/s"
  2255. chart_type: line
  2256. dimensions:
  2257. - name: reads
  2258. - name: writes
  2259. - name: disk_ext.iotime
  2260. description: Disk Total I/O Time for Extended Operations
  2261. unit: "milliseconds/s"
  2262. chart_type: line
  2263. dimensions:
  2264. - name: discards
  2265. - name: flushes
  2266. - name: disk.await
  2267. description: Average Completed I/O Operation Time
  2268. unit: "milliseconds/operation"
  2269. chart_type: line
  2270. dimensions:
  2271. - name: reads
  2272. - name: writes
  2273. - name: disk_ext.await
  2274. description: Average Completed Extended I/O Operation Time
  2275. unit: "milliseconds/operation"
  2276. chart_type: line
  2277. dimensions:
  2278. - name: discards
  2279. - name: flushes
  2280. - name: disk.avgsz
  2281. description: Average Completed I/O Operation Bandwidth
  2282. unit: "KiB/operation"
  2283. chart_type: area
  2284. dimensions:
  2285. - name: reads
  2286. - name: writes
  2287. - name: disk_ext.avgsz
  2288. description: Average Amount of Discarded Data
  2289. unit: "KiB/operation"
  2290. chart_type: area
  2291. dimensions:
  2292. - name: discards
  2293. - name: disk.svctm
  2294. description: Average Service Time
  2295. unit: "milliseconds/operation"
  2296. chart_type: line
  2297. dimensions:
  2298. - name: svctm
  2299. - name: disk.bcache_cache_alloc
  2300. description: BCache Cache Allocations
  2301. unit: "percentage"
  2302. chart_type: stacked
  2303. dimensions:
  2304. - name: ununsed
  2305. - name: dirty
  2306. - name: clean
  2307. - name: metadata
  2308. - name: undefined
  2309. - name: disk.bcache_hit_ratio
  2310. description: BCache Cache Hit Ratio
  2311. unit: "percentage"
  2312. chart_type: line
  2313. dimensions:
  2314. - name: 5min
  2315. - name: 1hour
  2316. - name: 1day
  2317. - name: ever
  2318. - name: disk.bcache_rates
  2319. description: BCache Rates
  2320. unit: "KiB/s"
  2321. chart_type: area
  2322. dimensions:
  2323. - name: congested
  2324. - name: writeback
  2325. - name: disk.bcache_size
  2326. description: BCache Cache Sizes
  2327. unit: "MiB"
  2328. chart_type: area
  2329. dimensions:
  2330. - name: dirty
  2331. - name: disk.bcache_usage
  2332. description: BCache Cache Usage
  2333. unit: "percentage"
  2334. chart_type: area
  2335. dimensions:
  2336. - name: avail
  2337. - name: disk.bcache_cache_read_races
  2338. description: BCache Cache Read Races
  2339. unit: "operations/s"
  2340. chart_type: line
  2341. dimensions:
  2342. - name: races
  2343. - name: errors
  2344. - name: disk.bcache
  2345. description: BCache Cache I/O Operations
  2346. unit: "operations/s"
  2347. chart_type: line
  2348. dimensions:
  2349. - name: hits
  2350. - name: misses
  2351. - name: collisions
  2352. - name: readaheads
  2353. - name: disk.bcache_bypass
  2354. description: BCache Cache Bypass I/O Operations
  2355. unit: "operations/s"
  2356. chart_type: line
  2357. dimensions:
  2358. - name: hits
  2359. - name: misses
  2360. - meta:
  2361. plugin_name: proc.plugin
  2362. module_name: /proc/mdstat
  2363. monitored_instance:
  2364. name: MD RAID
  2365. link: ""
  2366. categories:
  2367. - data-collection.linux-systems.disk-metrics
  2368. icon_filename: "hard-drive.svg"
  2369. related_resources:
  2370. integrations:
  2371. list: []
  2372. info_provided_to_referring_integrations:
  2373. description: ""
  2374. keywords:
  2375. - raid
  2376. - mdadm
  2377. - mdstat
  2378. - raid
  2379. most_popular: false
  2380. overview:
  2381. data_collection:
  2382. metrics_description: "This integration monitors the status of MD RAID devices."
  2383. method_description: ""
  2384. supported_platforms:
  2385. include: []
  2386. exclude: []
  2387. multi_instance: true
  2388. additional_permissions:
  2389. description: ""
  2390. default_behavior:
  2391. auto_detection:
  2392. description: ""
  2393. limits:
  2394. description: ""
  2395. performance_impact:
  2396. description: ""
  2397. setup:
  2398. prerequisites:
  2399. list: []
  2400. configuration:
  2401. file:
  2402. name: ""
  2403. description: ""
  2404. options:
  2405. description: ""
  2406. folding:
  2407. title: ""
  2408. enabled: true
  2409. list: []
  2410. examples:
  2411. folding:
  2412. enabled: true
  2413. title: ""
  2414. list: []
  2415. troubleshooting:
  2416. problems:
  2417. list: []
  2418. alerts:
  2419. - name: mdstat_last_collected
  2420. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2421. metric: md.disks
  2422. info: number of seconds since the last successful data collection
  2423. - name: mdstat_disks
  2424. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2425. metric: md.disks
  2426. info:
  2427. number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded.
  2428. - name: mdstat_mismatch_cnt
  2429. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2430. metric: md.mismatch_cnt
  2431. info: number of unsynchronized blocks for the ${label:device} ${label:raid_level} array
  2432. - name: mdstat_nonredundant_last_collected
  2433. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2434. metric: md.nonredundant
  2435. info: number of seconds since the last successful data collection
  2436. metrics:
  2437. folding:
  2438. title: Metrics
  2439. enabled: false
  2440. description: ""
  2441. availability: []
  2442. scopes:
  2443. - name: global
  2444. description: ""
  2445. labels: []
  2446. metrics:
  2447. - name: md.health
  2448. description: Faulty Devices In MD
  2449. unit: "failed disks"
  2450. chart_type: line
  2451. dimensions:
  2452. - name: a dimension per md array
  2453. - name: md array
  2454. description: ""
  2455. labels:
  2456. - name: device
  2457. description: TBD
  2458. - name: raid_level
  2459. description: TBD
  2460. metrics:
  2461. - name: md.disks
  2462. description: Disks Stats
  2463. unit: "disks"
  2464. chart_type: stacked
  2465. dimensions:
  2466. - name: inuse
  2467. - name: down
  2468. - name: md.mismatch_cnt
  2469. description: Mismatch Count
  2470. unit: "unsynchronized blocks"
  2471. chart_type: line
  2472. dimensions:
  2473. - name: count
  2474. - name: md.status
  2475. description: Current Status
  2476. unit: "percent"
  2477. chart_type: line
  2478. dimensions:
  2479. - name: check
  2480. - name: resync
  2481. - name: recovery
  2482. - name: reshape
  2483. - name: md.expected_time_until_operation_finish
  2484. description: Approximate Time Until Finish
  2485. unit: "seconds"
  2486. chart_type: line
  2487. dimensions:
  2488. - name: finish_in
  2489. - name: md.operation_speed
  2490. description: Operation Speed
  2491. unit: "KiB/s"
  2492. chart_type: line
  2493. dimensions:
  2494. - name: speed
  2495. - name: md.nonredundant
  2496. description: Nonredundant Array Availability
  2497. unit: "boolean"
  2498. chart_type: line
  2499. dimensions:
  2500. - name: available
  2501. - meta:
  2502. plugin_name: proc.plugin
  2503. module_name: /proc/net/dev
  2504. monitored_instance:
  2505. name: Network interfaces
  2506. link: ""
  2507. categories:
  2508. - data-collection.linux-systems.network-metrics
  2509. icon_filename: "network-wired.svg"
  2510. related_resources:
  2511. integrations:
  2512. list: []
  2513. info_provided_to_referring_integrations:
  2514. description: ""
  2515. keywords:
  2516. - network interfaces
  2517. most_popular: false
  2518. overview:
  2519. data_collection:
  2520. metrics_description: "Monitor network interface metrics about bandwidth, state, errors and more."
  2521. method_description: ""
  2522. supported_platforms:
  2523. include: []
  2524. exclude: []
  2525. multi_instance: true
  2526. additional_permissions:
  2527. description: ""
  2528. default_behavior:
  2529. auto_detection:
  2530. description: ""
  2531. limits:
  2532. description: ""
  2533. performance_impact:
  2534. description: ""
  2535. setup:
  2536. prerequisites:
  2537. list: []
  2538. configuration:
  2539. file:
  2540. name: ""
  2541. description: ""
  2542. options:
  2543. description: ""
  2544. folding:
  2545. title: ""
  2546. enabled: true
  2547. list: []
  2548. examples:
  2549. folding:
  2550. enabled: true
  2551. title: ""
  2552. list: []
  2553. troubleshooting:
  2554. problems:
  2555. list: []
  2556. alerts:
  2557. - name: interface_speed
  2558. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2559. metric: net.net
  2560. info: network interface ${label:device} current speed
  2561. os: "*"
  2562. - name: 1m_received_traffic_overflow
  2563. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2564. metric: net.net
  2565. info: average inbound utilization for the network interface ${label:device} over the last minute
  2566. os: "linux"
  2567. - name: 1m_sent_traffic_overflow
  2568. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2569. metric: net.net
  2570. info: average outbound utilization for the network interface ${label:device} over the last minute
  2571. os: "linux"
  2572. - name: inbound_packets_dropped_ratio
  2573. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2574. metric: net.packets
  2575. info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2576. os: "linux"
  2577. - name: outbound_packets_dropped_ratio
  2578. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2579. metric: net.packets
  2580. info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2581. os: "linux"
  2582. - name: wifi_inbound_packets_dropped_ratio
  2583. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2584. metric: net.packets
  2585. info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2586. os: "linux"
  2587. - name: wifi_outbound_packets_dropped_ratio
  2588. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2589. metric: net.packets
  2590. info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2591. os: "linux"
  2592. - name: 1m_received_packets_rate
  2593. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2594. metric: net.packets
  2595. info: average number of packets received by the network interface ${label:device} over the last minute
  2596. os: "linux freebsd"
  2597. - name: 10s_received_packets_storm
  2598. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2599. metric: net.packets
  2600. info:
  2601. ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over
  2602. the last minute
  2603. os: "linux freebsd"
  2604. - name: inbound_packets_dropped
  2605. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2606. metric: net.drops
  2607. info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes
  2608. os: "linux"
  2609. - name: outbound_packets_dropped
  2610. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2611. metric: net.drops
  2612. info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes
  2613. os: "linux"
  2614. - name: 10min_fifo_errors
  2615. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2616. metric: net.fifo
  2617. info: number of FIFO errors for the network interface ${label:device} in the last 10 minutes
  2618. os: "linux"
  2619. metrics:
  2620. folding:
  2621. title: Metrics
  2622. enabled: false
  2623. description: ""
  2624. availability: []
  2625. scopes:
  2626. - name: global
  2627. description: ""
  2628. labels: []
  2629. metrics:
  2630. - name: system.net
  2631. description: Physical Network Interfaces Aggregated Bandwidth
  2632. unit: "kilobits/s"
  2633. chart_type: area
  2634. dimensions:
  2635. - name: received
  2636. - name: sent
  2637. - name: network device
  2638. description: ""
  2639. labels:
  2640. - name: interface_type
  2641. description: TBD
  2642. - name: device
  2643. description: TBD
  2644. metrics:
  2645. - name: net.net
  2646. description: Bandwidth
  2647. unit: "kilobits/s"
  2648. chart_type: area
  2649. dimensions:
  2650. - name: received
  2651. - name: sent
  2652. - name: net.speed
  2653. description: Interface Speed
  2654. unit: "kilobits/s"
  2655. chart_type: line
  2656. dimensions:
  2657. - name: speed
  2658. - name: net.duplex
  2659. description: Interface Duplex State
  2660. unit: "state"
  2661. chart_type: line
  2662. dimensions:
  2663. - name: full
  2664. - name: half
  2665. - name: unknown
  2666. - name: net.operstate
  2667. description: Interface Operational State
  2668. unit: "state"
  2669. chart_type: line
  2670. dimensions:
  2671. - name: up
  2672. - name: down
  2673. - name: notpresent
  2674. - name: lowerlayerdown
  2675. - name: testing
  2676. - name: dormant
  2677. - name: unknown
  2678. - name: net.carrier
  2679. description: Interface Physical Link State
  2680. unit: "state"
  2681. chart_type: line
  2682. dimensions:
  2683. - name: up
  2684. - name: down
  2685. - name: net.mtu
  2686. description: Interface MTU
  2687. unit: "octets"
  2688. chart_type: line
  2689. dimensions:
  2690. - name: mtu
  2691. - name: net.packets
  2692. description: Packets
  2693. unit: "packets/s"
  2694. chart_type: line
  2695. dimensions:
  2696. - name: received
  2697. - name: sent
  2698. - name: multicast
  2699. - name: net.errors
  2700. description: Interface Errors
  2701. unit: "errors/s"
  2702. chart_type: line
  2703. dimensions:
  2704. - name: inbound
  2705. - name: outbound
  2706. - name: net.drops
  2707. description: Interface Drops
  2708. unit: "drops/s"
  2709. chart_type: line
  2710. dimensions:
  2711. - name: inbound
  2712. - name: outbound
  2713. - name: net.fifo
  2714. description: Interface FIFO Buffer Errors
  2715. unit: "errors"
  2716. chart_type: line
  2717. dimensions:
  2718. - name: receive
  2719. - name: transmit
  2720. - name: net.compressed
  2721. description: Compressed Packets
  2722. unit: "packets/s"
  2723. chart_type: line
  2724. dimensions:
  2725. - name: received
  2726. - name: sent
  2727. - name: net.events
  2728. description: Network Interface Events
  2729. unit: "events/s"
  2730. chart_type: line
  2731. dimensions:
  2732. - name: frames
  2733. - name: collisions
  2734. - name: carrier
  2735. - meta:
  2736. plugin_name: proc.plugin
  2737. module_name: /proc/net/wireless
  2738. monitored_instance:
  2739. name: Wireless network interfaces
  2740. link: ""
  2741. categories:
  2742. - data-collection.linux-systems.network-metrics
  2743. icon_filename: "network-wired.svg"
  2744. related_resources:
  2745. integrations:
  2746. list: []
  2747. info_provided_to_referring_integrations:
  2748. description: ""
  2749. keywords:
  2750. - wireless devices
  2751. most_popular: false
  2752. overview:
  2753. data_collection:
  2754. metrics_description: "Monitor wireless devices with metrics about status, link quality, signal level, noise level and more."
  2755. method_description: ""
  2756. supported_platforms:
  2757. include: []
  2758. exclude: []
  2759. multi_instance: true
  2760. additional_permissions:
  2761. description: ""
  2762. default_behavior:
  2763. auto_detection:
  2764. description: ""
  2765. limits:
  2766. description: ""
  2767. performance_impact:
  2768. description: ""
  2769. setup:
  2770. prerequisites:
  2771. list: []
  2772. configuration:
  2773. file:
  2774. name: ""
  2775. description: ""
  2776. options:
  2777. description: ""
  2778. folding:
  2779. title: ""
  2780. enabled: true
  2781. list: []
  2782. examples:
  2783. folding:
  2784. enabled: true
  2785. title: ""
  2786. list: []
  2787. troubleshooting:
  2788. problems:
  2789. list: []
  2790. alerts: []
  2791. metrics:
  2792. folding:
  2793. title: Metrics
  2794. enabled: false
  2795. description: ""
  2796. availability: []
  2797. scopes:
  2798. - name: wireless device
  2799. description: ""
  2800. labels: []
  2801. metrics:
  2802. - name: wireless.status
  2803. description: Internal status reported by interface.
  2804. unit: "status"
  2805. chart_type: line
  2806. dimensions:
  2807. - name: status
  2808. - name: wireless.link_quality
  2809. description: Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.
  2810. unit: "value"
  2811. chart_type: line
  2812. dimensions:
  2813. - name: link_quality
  2814. - name: wireless.signal_level
  2815. description:
  2816. The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the
  2817. signal.
  2818. unit: "dBm"
  2819. chart_type: line
  2820. dimensions:
  2821. - name: signal_level
  2822. - name: wireless.noise_level
  2823. description:
  2824. The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.
  2825. unit: "dBm"
  2826. chart_type: line
  2827. dimensions:
  2828. - name: noise_level
  2829. - name: wireless.discarded_packets
  2830. description: Packet discarded in the wireless adapter due to wireless specific problems.
  2831. unit: "packets/s"
  2832. chart_type: line
  2833. dimensions:
  2834. - name: nwid
  2835. - name: crypt
  2836. - name: frag
  2837. - name: retry
  2838. - name: misc
  2839. - name: wireless.missed_beacons
  2840. description: Number of missed beacons.
  2841. unit: "frames/s"
  2842. chart_type: line
  2843. dimensions:
  2844. - name: missed_beacons
  2845. - meta:
  2846. plugin_name: proc.plugin
  2847. module_name: /sys/class/infiniband
  2848. monitored_instance:
  2849. name: InfiniBand
  2850. link: ""
  2851. categories:
  2852. - data-collection.linux-systems.network-metrics
  2853. icon_filename: "network-wired.svg"
  2854. related_resources:
  2855. integrations:
  2856. list: []
  2857. info_provided_to_referring_integrations:
  2858. description: ""
  2859. keywords:
  2860. - infiniband
  2861. - rdma
  2862. most_popular: false
  2863. overview:
  2864. data_collection:
  2865. metrics_description: "This integration monitors InfiniBand network inteface statistics."
  2866. method_description: ""
  2867. supported_platforms:
  2868. include: []
  2869. exclude: []
  2870. multi_instance: true
  2871. additional_permissions:
  2872. description: ""
  2873. default_behavior:
  2874. auto_detection:
  2875. description: ""
  2876. limits:
  2877. description: ""
  2878. performance_impact:
  2879. description: ""
  2880. setup:
  2881. prerequisites:
  2882. list: []
  2883. configuration:
  2884. file:
  2885. name: ""
  2886. description: ""
  2887. options:
  2888. description: ""
  2889. folding:
  2890. title: ""
  2891. enabled: true
  2892. list: []
  2893. examples:
  2894. folding:
  2895. enabled: true
  2896. title: ""
  2897. list: []
  2898. troubleshooting:
  2899. problems:
  2900. list: []
  2901. alerts: []
  2902. metrics:
  2903. folding:
  2904. title: Metrics
  2905. enabled: false
  2906. description: ""
  2907. availability: []
  2908. scopes:
  2909. - name: infiniband port
  2910. description: ""
  2911. labels: []
  2912. metrics:
  2913. - name: ib.bytes
  2914. description: Bandwidth usage
  2915. unit: "kilobits/s"
  2916. chart_type: area
  2917. dimensions:
  2918. - name: Received
  2919. - name: Sent
  2920. - name: ib.packets
  2921. description: Packets Statistics
  2922. unit: "packets/s"
  2923. chart_type: area
  2924. dimensions:
  2925. - name: Received
  2926. - name: Sent
  2927. - name: Mcast_rcvd
  2928. - name: Mcast_sent
  2929. - name: Ucast_rcvd
  2930. - name: Ucast_sent
  2931. - name: ib.errors
  2932. description: Error Counters
  2933. unit: "errors/s"
  2934. chart_type: line
  2935. dimensions:
  2936. - name: Pkts_malformated
  2937. - name: Pkts_rcvd_discarded
  2938. - name: Pkts_sent_discarded
  2939. - name: Tick_Wait_to_send
  2940. - name: Pkts_missed_resource
  2941. - name: Buffer_overrun
  2942. - name: Link_Downed
  2943. - name: Link_recovered
  2944. - name: Link_integrity_err
  2945. - name: Link_minor_errors
  2946. - name: Pkts_rcvd_with_EBP
  2947. - name: Pkts_rcvd_discarded_by_switch
  2948. - name: Pkts_sent_discarded_by_switch
  2949. - name: ib.hwerrors
  2950. description: Hardware Errors
  2951. unit: "errors/s"
  2952. chart_type: line
  2953. dimensions:
  2954. - name: Duplicated_packets
  2955. - name: Pkt_Seq_Num_gap
  2956. - name: Ack_timer_expired
  2957. - name: Drop_missing_buffer
  2958. - name: Drop_out_of_sequence
  2959. - name: NAK_sequence_rcvd
  2960. - name: CQE_err_Req
  2961. - name: CQE_err_Resp
  2962. - name: CQE_Flushed_err_Req
  2963. - name: CQE_Flushed_err_Resp
  2964. - name: Remote_access_err_Req
  2965. - name: Remote_access_err_Resp
  2966. - name: Remote_invalid_req
  2967. - name: Local_length_err_Resp
  2968. - name: RNR_NAK_Packets
  2969. - name: CNP_Pkts_ignored
  2970. - name: RoCE_ICRC_Errors
  2971. - name: ib.hwpackets
  2972. description: Hardware Packets Statistics
  2973. unit: "packets/s"
  2974. chart_type: line
  2975. dimensions:
  2976. - name: RoCEv2_Congestion_sent
  2977. - name: RoCEv2_Congestion_rcvd
  2978. - name: IB_Congestion_handled
  2979. - name: ATOMIC_req_rcvd
  2980. - name: Connection_req_rcvd
  2981. - name: Read_req_rcvd
  2982. - name: Write_req_rcvd
  2983. - name: RoCE_retrans_adaptive
  2984. - name: RoCE_retrans_timeout
  2985. - name: RoCE_slow_restart
  2986. - name: RoCE_slow_restart_congestion
  2987. - name: RoCE_slow_restart_count
  2988. - meta:
  2989. plugin_name: proc.plugin
  2990. module_name: /proc/net/netstat
  2991. monitored_instance:
  2992. name: Network statistics
  2993. link: ""
  2994. categories:
  2995. - data-collection.linux-systems.network-metrics
  2996. icon_filename: "network-wired.svg"
  2997. related_resources:
  2998. integrations:
  2999. list: []
  3000. info_provided_to_referring_integrations:
  3001. description: ""
  3002. keywords:
  3003. - ip
  3004. - udp
  3005. - udplite
  3006. - icmp
  3007. - netstat
  3008. - snmp
  3009. most_popular: false
  3010. overview:
  3011. data_collection:
  3012. metrics_description: "This integration provides metrics from the `netstat`, `snmp` and `snmp6` modules."
  3013. method_description: ""
  3014. supported_platforms:
  3015. include: []
  3016. exclude: []
  3017. multi_instance: true
  3018. additional_permissions:
  3019. description: ""
  3020. default_behavior:
  3021. auto_detection:
  3022. description: ""
  3023. limits:
  3024. description: ""
  3025. performance_impact:
  3026. description: ""
  3027. setup:
  3028. prerequisites:
  3029. list: []
  3030. configuration:
  3031. file:
  3032. name: ""
  3033. description: ""
  3034. options:
  3035. description: ""
  3036. folding:
  3037. title: ""
  3038. enabled: true
  3039. list: []
  3040. examples:
  3041. folding:
  3042. enabled: true
  3043. title: ""
  3044. list: []
  3045. troubleshooting:
  3046. problems:
  3047. list: []
  3048. alerts:
  3049. - name: 1m_tcp_syn_queue_drops
  3050. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3051. metric: ip.tcp_syn_queue
  3052. info: average number of SYN requests was dropped due to the full TCP SYN queue over the last minute (SYN cookies were not enabled)
  3053. os: "linux"
  3054. - name: 1m_tcp_syn_queue_cookies
  3055. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3056. metric: ip.tcp_syn_queue
  3057. info: average number of sent SYN cookies due to the full TCP SYN queue over the last minute
  3058. os: "linux"
  3059. - name: 1m_tcp_accept_queue_overflows
  3060. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3061. metric: ip.tcp_accept_queue
  3062. info: average number of overflows in the TCP accept queue over the last minute
  3063. os: "linux"
  3064. - name: 1m_tcp_accept_queue_drops
  3065. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3066. metric: ip.tcp_accept_queue
  3067. info: average number of dropped packets in the TCP accept queue over the last minute
  3068. os: "linux"
  3069. - name: tcp_connections
  3070. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf
  3071. metric: ipv4.tcpsock
  3072. info: IPv4 TCP connections utilization
  3073. os: "linux"
  3074. - name: 1m_ipv4_tcp_resets_sent
  3075. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3076. metric: ipv4.tcphandshake
  3077. info: average number of sent TCP RESETS over the last minute
  3078. os: "linux"
  3079. - name: 10s_ipv4_tcp_resets_sent
  3080. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3081. metric: ipv4.tcphandshake
  3082. info:
  3083. average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has
  3084. crashed. Netdata will not send a clear notification for this alarm.
  3085. os: "linux"
  3086. - name: 1m_ipv4_tcp_resets_received
  3087. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3088. metric: ipv4.tcphandshake
  3089. info: average number of received TCP RESETS over the last minute
  3090. os: "linux freebsd"
  3091. - name: 10s_ipv4_tcp_resets_received
  3092. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3093. metric: ipv4.tcphandshake
  3094. info:
  3095. average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed.
  3096. Netdata will not send a clear notification for this alarm.
  3097. os: "linux freebsd"
  3098. - name: 1m_ipv4_udp_receive_buffer_errors
  3099. link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
  3100. metric: ipv4.udperrors
  3101. info: average number of UDP receive buffer errors over the last minute
  3102. os: "linux freebsd"
  3103. - name: 1m_ipv4_udp_send_buffer_errors
  3104. link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
  3105. metric: ipv4.udperrors
  3106. info: average number of UDP send buffer errors over the last minute
  3107. os: "linux"
  3108. metrics:
  3109. folding:
  3110. title: Metrics
  3111. enabled: false
  3112. description: ""
  3113. availability: []
  3114. scopes:
  3115. - name: global
  3116. description: ""
  3117. labels: []
  3118. metrics:
  3119. - name: system.ip
  3120. description: IP Bandwidth
  3121. unit: "kilobits/s"
  3122. chart_type: area
  3123. dimensions:
  3124. - name: received
  3125. - name: sent
  3126. - name: ip.inerrors
  3127. description: IP Input Errors
  3128. unit: "packets/s"
  3129. chart_type: line
  3130. dimensions:
  3131. - name: noroutes
  3132. - name: truncated
  3133. - name: checksum
  3134. - name: ip.mcast
  3135. description: IP Multicast Bandwidth
  3136. unit: "kilobits/s"
  3137. chart_type: area
  3138. dimensions:
  3139. - name: received
  3140. - name: sent
  3141. - name: ip.bcast
  3142. description: IP Broadcast Bandwidth
  3143. unit: "kilobits/s"
  3144. chart_type: area
  3145. dimensions:
  3146. - name: received
  3147. - name: sent
  3148. - name: ip.mcastpkts
  3149. description: IP Multicast Packets
  3150. unit: "packets/s"
  3151. chart_type: line
  3152. dimensions:
  3153. - name: received
  3154. - name: sent
  3155. - name: ip.bcastpkts
  3156. description: IP Broadcast Packets
  3157. unit: "packets/s"
  3158. chart_type: line
  3159. dimensions:
  3160. - name: received
  3161. - name: sent
  3162. - name: ip.ecnpkts
  3163. description: IP ECN Statistics
  3164. unit: "packets/s"
  3165. chart_type: line
  3166. dimensions:
  3167. - name: CEP
  3168. - name: NoECTP
  3169. - name: ECTP0
  3170. - name: ECTP1
  3171. - name: ip.tcpmemorypressures
  3172. description: TCP Memory Pressures
  3173. unit: "events/s"
  3174. chart_type: line
  3175. dimensions:
  3176. - name: pressures
  3177. - name: ip.tcpconnaborts
  3178. description: TCP Connection Aborts
  3179. unit: "connections/s"
  3180. chart_type: line
  3181. dimensions:
  3182. - name: baddata
  3183. - name: userclosed
  3184. - name: nomemory
  3185. - name: timeout
  3186. - name: linger
  3187. - name: failed
  3188. - name: ip.tcpreorders
  3189. description: TCP Reordered Packets by Detection Method
  3190. unit: "packets/s"
  3191. chart_type: line
  3192. dimensions:
  3193. - name: timestamp
  3194. - name: sack
  3195. - name: fack
  3196. - name: reno
  3197. - name: ip.tcpofo
  3198. description: TCP Out-Of-Order Queue
  3199. unit: "packets/s"
  3200. chart_type: line
  3201. dimensions:
  3202. - name: inqueue
  3203. - name: dropped
  3204. - name: merged
  3205. - name: pruned
  3206. - name: ip.tcpsyncookies
  3207. description: TCP SYN Cookies
  3208. unit: "packets/s"
  3209. chart_type: line
  3210. dimensions:
  3211. - name: received
  3212. - name: sent
  3213. - name: failed
  3214. - name: ip.tcp_syn_queue
  3215. description: TCP SYN Queue Issues
  3216. unit: "packets/s"
  3217. chart_type: line
  3218. dimensions:
  3219. - name: drops
  3220. - name: cookies
  3221. - name: ip.tcp_accept_queue
  3222. description: TCP Accept Queue Issues
  3223. unit: "packets/s"
  3224. chart_type: line
  3225. dimensions:
  3226. - name: overflows
  3227. - name: drops
  3228. - name: ipv4.packets
  3229. description: IPv4 Packets
  3230. unit: "packets/s"
  3231. chart_type: line
  3232. dimensions:
  3233. - name: received
  3234. - name: sent
  3235. - name: forwarded
  3236. - name: delivered
  3237. - name: ipv4.fragsout
  3238. description: IPv4 Fragments Sent
  3239. unit: "packets/s"
  3240. chart_type: line
  3241. dimensions:
  3242. - name: ok
  3243. - name: failed
  3244. - name: created
  3245. - name: ipv4.fragsin
  3246. description: IPv4 Fragments Reassembly
  3247. unit: "packets/s"
  3248. chart_type: line
  3249. dimensions:
  3250. - name: ok
  3251. - name: failed
  3252. - name: all
  3253. - name: ipv4.errors
  3254. description: IPv4 Errors
  3255. unit: "packets/s"
  3256. chart_type: line
  3257. dimensions:
  3258. - name: InDiscards
  3259. - name: OutDiscards
  3260. - name: InHdrErrors
  3261. - name: OutNoRoutes
  3262. - name: InAddrErrors
  3263. - name: InUnknownProtos
  3264. - name: ipv4.icmp
  3265. description: IPv4 ICMP Packets
  3266. unit: "packets/s"
  3267. chart_type: line
  3268. dimensions:
  3269. - name: received
  3270. - name: sent
  3271. - name: ipv4.icmp_errors
  3272. description: IPv4 ICMP Errors
  3273. unit: "packets/s"
  3274. chart_type: line
  3275. dimensions:
  3276. - name: InErrors
  3277. - name: OutErrors
  3278. - name: InCsumErrors
  3279. - name: ipv4.icmpmsg
  3280. description: IPv4 ICMP Messages
  3281. unit: "packets/s"
  3282. chart_type: line
  3283. dimensions:
  3284. - name: InEchoReps
  3285. - name: OutEchoReps
  3286. - name: InDestUnreachs
  3287. - name: OutDestUnreachs
  3288. - name: InRedirects
  3289. - name: OutRedirects
  3290. - name: InEchos
  3291. - name: OutEchos
  3292. - name: InRouterAdvert
  3293. - name: OutRouterAdvert
  3294. - name: InRouterSelect
  3295. - name: OutRouterSelect
  3296. - name: InTimeExcds
  3297. - name: OutTimeExcds
  3298. - name: InParmProbs
  3299. - name: OutParmProbs
  3300. - name: InTimestamps
  3301. - name: OutTimestamps
  3302. - name: InTimestampReps
  3303. - name: OutTimestampReps
  3304. - name: ipv4.tcpsock
  3305. description: IPv4 TCP Connections
  3306. unit: "active connections"
  3307. chart_type: line
  3308. dimensions:
  3309. - name: connections
  3310. - name: ipv4.tcppackets
  3311. description: IPv4 TCP Packets
  3312. unit: "packets/s"
  3313. chart_type: line
  3314. dimensions:
  3315. - name: received
  3316. - name: sent
  3317. - name: ipv4.tcperrors
  3318. description: IPv4 TCP Errors
  3319. unit: "packets/s"
  3320. chart_type: line
  3321. dimensions:
  3322. - name: InErrs
  3323. - name: InCsumErrors
  3324. - name: RetransSegs
  3325. - name: ipv4.tcpopens
  3326. description: IPv4 TCP Opens
  3327. unit: "connections/s"
  3328. chart_type: line
  3329. dimensions:
  3330. - name: active
  3331. - name: passive
  3332. - name: ipv4.tcphandshake
  3333. description: IPv4 TCP Handshake Issues
  3334. unit: "events/s"
  3335. chart_type: line
  3336. dimensions:
  3337. - name: EstabResets
  3338. - name: OutRsts
  3339. - name: AttemptFails
  3340. - name: SynRetrans
  3341. - name: ipv4.udppackets
  3342. description: IPv4 UDP Packets
  3343. unit: "packets/s"
  3344. chart_type: line
  3345. dimensions:
  3346. - name: received
  3347. - name: sent
  3348. - name: ipv4.udperrors
  3349. description: IPv4 UDP Errors
  3350. unit: "events/s"
  3351. chart_type: line
  3352. dimensions:
  3353. - name: RcvbufErrors
  3354. - name: SndbufErrors
  3355. - name: InErrors
  3356. - name: NoPorts
  3357. - name: InCsumErrors
  3358. - name: IgnoredMulti
  3359. - name: ipv4.udplite
  3360. description: IPv4 UDPLite Packets
  3361. unit: "packets/s"
  3362. chart_type: line
  3363. dimensions:
  3364. - name: received
  3365. - name: sent
  3366. - name: ipv4.udplite_errors
  3367. description: IPv4 UDPLite Errors
  3368. unit: "packets/s"
  3369. chart_type: line
  3370. dimensions:
  3371. - name: RcvbufErrors
  3372. - name: SndbufErrors
  3373. - name: InErrors
  3374. - name: NoPorts
  3375. - name: InCsumErrors
  3376. - name: IgnoredMulti
  3377. - name: system.ipv6
  3378. description: IPv6 Bandwidth
  3379. unit: "kilobits/s"
  3380. chart_type: area
  3381. dimensions:
  3382. - name: received
  3383. - name: sent
  3384. - name: system.ipv6
  3385. description: IPv6 Packets
  3386. unit: "packets/s"
  3387. chart_type: line
  3388. dimensions:
  3389. - name: received
  3390. - name: sent
  3391. - name: forwarded
  3392. - name: delivers
  3393. - name: ipv6.fragsout
  3394. description: IPv6 Fragments Sent
  3395. unit: "packets/s"
  3396. chart_type: line
  3397. dimensions:
  3398. - name: ok
  3399. - name: failed
  3400. - name: all
  3401. - name: ipv6.fragsin
  3402. description: IPv6 Fragments Reassembly
  3403. unit: "packets/s"
  3404. chart_type: line
  3405. dimensions:
  3406. - name: ok
  3407. - name: failed
  3408. - name: timeout
  3409. - name: all
  3410. - name: ipv6.errors
  3411. description: IPv6 Errors
  3412. unit: "packets/s"
  3413. chart_type: line
  3414. dimensions:
  3415. - name: InDiscards
  3416. - name: OutDiscards
  3417. - name: InHdrErrors
  3418. - name: InAddrErrors
  3419. - name: InUnknownProtos
  3420. - name: InTooBigErrors
  3421. - name: InTruncatedPkts
  3422. - name: InNoRoutes
  3423. - name: OutNoRoutes
  3424. - name: ipv6.udppackets
  3425. description: IPv6 UDP Packets
  3426. unit: "packets/s"
  3427. chart_type: line
  3428. dimensions:
  3429. - name: received
  3430. - name: sent
  3431. - name: ipv6.udperrors
  3432. description: IPv6 UDP Errors
  3433. unit: "events/s"
  3434. chart_type: line
  3435. dimensions:
  3436. - name: RcvbufErrors
  3437. - name: SndbufErrors
  3438. - name: InErrors
  3439. - name: NoPorts
  3440. - name: InCsumErrors
  3441. - name: IgnoredMulti
  3442. - name: ipv6.udplitepackets
  3443. description: IPv6 UDPlite Packets
  3444. unit: "packets/s"
  3445. chart_type: line
  3446. dimensions:
  3447. - name: received
  3448. - name: sent
  3449. - name: ipv6.udpliteerrors
  3450. description: IPv6 UDP Lite Errors
  3451. unit: "events/s"
  3452. chart_type: line
  3453. dimensions:
  3454. - name: RcvbufErrors
  3455. - name: SndbufErrors
  3456. - name: InErrors
  3457. - name: NoPorts
  3458. - name: InCsumErrors
  3459. - name: ipv6.mcast
  3460. description: IPv6 Multicast Bandwidth
  3461. unit: "kilobits/s"
  3462. chart_type: area
  3463. dimensions:
  3464. - name: received
  3465. - name: sent
  3466. - name: ipv6.bcast
  3467. description: IPv6 Broadcast Bandwidth
  3468. unit: "kilobits/s"
  3469. chart_type: area
  3470. dimensions:
  3471. - name: received
  3472. - name: sent
  3473. - name: ipv6.mcastpkts
  3474. description: IPv6 Multicast Packets
  3475. unit: "packets/s"
  3476. chart_type: line
  3477. dimensions:
  3478. - name: received
  3479. - name: sent
  3480. - name: ipv6.icmp
  3481. description: IPv6 ICMP Messages
  3482. unit: "messages/s"
  3483. chart_type: line
  3484. dimensions:
  3485. - name: received
  3486. - name: sent
  3487. - name: ipv6.icmpredir
  3488. description: IPv6 ICMP Redirects
  3489. unit: "redirects/s"
  3490. chart_type: line
  3491. dimensions:
  3492. - name: received
  3493. - name: sent
  3494. - name: ipv6.icmperrors
  3495. description: IPv6 ICMP Errors
  3496. unit: "errors/s"
  3497. chart_type: line
  3498. dimensions:
  3499. - name: InErrors
  3500. - name: OutErrors
  3501. - name: InCsumErrors
  3502. - name: InDestUnreachs
  3503. - name: InPktTooBigs
  3504. - name: InTimeExcds
  3505. - name: InParmProblems
  3506. - name: OutDestUnreachs
  3507. - name: OutPktTooBigs
  3508. - name: OutTimeExcds
  3509. - name: OutParmProblems
  3510. - name: ipv6.icmpechos
  3511. description: IPv6 ICMP Echo
  3512. unit: "messages/s"
  3513. chart_type: line
  3514. dimensions:
  3515. - name: InEchos
  3516. - name: OutEchos
  3517. - name: InEchoReplies
  3518. - name: OutEchoReplies
  3519. - name: ipv6.groupmemb
  3520. description: IPv6 ICMP Group Membership
  3521. unit: "messages/s"
  3522. chart_type: line
  3523. dimensions:
  3524. - name: InQueries
  3525. - name: OutQueries
  3526. - name: InResponses
  3527. - name: OutResponses
  3528. - name: InReductions
  3529. - name: OutReductions
  3530. - name: ipv6.icmprouter
  3531. description: IPv6 Router Messages
  3532. unit: "messages/s"
  3533. chart_type: line
  3534. dimensions:
  3535. - name: InSolicits
  3536. - name: OutSolicits
  3537. - name: InAdvertisements
  3538. - name: OutAdvertisements
  3539. - name: ipv6.icmpneighbor
  3540. description: IPv6 Neighbor Messages
  3541. unit: "messages/s"
  3542. chart_type: line
  3543. dimensions:
  3544. - name: InSolicits
  3545. - name: OutSolicits
  3546. - name: InAdvertisements
  3547. - name: OutAdvertisements
  3548. - name: ipv6.icmpmldv2
  3549. description: IPv6 ICMP MLDv2 Reports
  3550. unit: "reports/s"
  3551. chart_type: line
  3552. dimensions:
  3553. - name: received
  3554. - name: sent
  3555. - name: ipv6.icmptypes
  3556. description: IPv6 ICMP Types
  3557. unit: "messages/s"
  3558. chart_type: line
  3559. dimensions:
  3560. - name: InType1
  3561. - name: InType128
  3562. - name: InType129
  3563. - name: InType136
  3564. - name: OutType1
  3565. - name: OutType128
  3566. - name: OutType129
  3567. - name: OutType133
  3568. - name: OutType135
  3569. - name: OutType143
  3570. - name: ipv6.ect
  3571. description: IPv6 ECT Packets
  3572. unit: "packets/s"
  3573. chart_type: line
  3574. dimensions:
  3575. - name: InNoECTPkts
  3576. - name: InECT1Pkts
  3577. - name: InECT0Pkts
  3578. - name: InCEPkts
  3579. - name: ipv6.ect
  3580. description: IPv6 ECT Packets
  3581. unit: "packets/s"
  3582. chart_type: line
  3583. dimensions:
  3584. - name: InNoECTPkts
  3585. - name: InECT1Pkts
  3586. - name: InECT0Pkts
  3587. - name: InCEPkts
  3588. - meta:
  3589. plugin_name: proc.plugin
  3590. module_name: /proc/net/sockstat
  3591. monitored_instance:
  3592. name: Socket statistics
  3593. link: ""
  3594. categories:
  3595. - data-collection.linux-systems.network-metrics
  3596. icon_filename: "network-wired.svg"
  3597. related_resources:
  3598. integrations:
  3599. list: []
  3600. info_provided_to_referring_integrations:
  3601. description: ""
  3602. keywords:
  3603. - sockets
  3604. most_popular: false
  3605. overview:
  3606. data_collection:
  3607. metrics_description: "This integration provides socket statistics."
  3608. method_description: ""
  3609. supported_platforms:
  3610. include: []
  3611. exclude: []
  3612. multi_instance: true
  3613. additional_permissions:
  3614. description: ""
  3615. default_behavior:
  3616. auto_detection:
  3617. description: ""
  3618. limits:
  3619. description: ""
  3620. performance_impact:
  3621. description: ""
  3622. setup:
  3623. prerequisites:
  3624. list: []
  3625. configuration:
  3626. file:
  3627. name: ""
  3628. description: ""
  3629. options:
  3630. description: ""
  3631. folding:
  3632. title: ""
  3633. enabled: true
  3634. list: []
  3635. examples:
  3636. folding:
  3637. enabled: true
  3638. title: ""
  3639. list: []
  3640. troubleshooting:
  3641. problems:
  3642. list: []
  3643. alerts:
  3644. - name: tcp_orphans
  3645. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_orphans.conf
  3646. metric: ipv4.sockstat_tcp_sockets
  3647. info: orphan IPv4 TCP sockets utilization
  3648. os: "linux"
  3649. - name: tcp_memory
  3650. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_mem.conf
  3651. metric: ipv4.sockstat_tcp_mem
  3652. info: TCP memory utilization
  3653. os: "linux"
  3654. metrics:
  3655. folding:
  3656. title: Metrics
  3657. enabled: false
  3658. description: ""
  3659. availability: []
  3660. scopes:
  3661. - name: global
  3662. description: ""
  3663. labels: []
  3664. metrics:
  3665. - name: ipv4.sockstat_sockets
  3666. description: IPv4 Sockets Used
  3667. unit: "sockets"
  3668. chart_type: line
  3669. dimensions:
  3670. - name: used
  3671. - name: ipv4.sockstat_tcp_sockets
  3672. description: IPv4 TCP Sockets
  3673. unit: "sockets"
  3674. chart_type: line
  3675. dimensions:
  3676. - name: alloc
  3677. - name: orphan
  3678. - name: inuse
  3679. - name: timewait
  3680. - name: ipv4.sockstat_tcp_mem
  3681. description: IPv4 TCP Sockets Memory
  3682. unit: "KiB"
  3683. chart_type: area
  3684. dimensions:
  3685. - name: mem
  3686. - name: ipv4.sockstat_udp_sockets
  3687. description: IPv4 UDP Sockets
  3688. unit: "sockets"
  3689. chart_type: line
  3690. dimensions:
  3691. - name: inuse
  3692. - name: ipv4.sockstat_udp_mem
  3693. description: IPv4 UDP Sockets Memory
  3694. unit: "sockets"
  3695. chart_type: line
  3696. dimensions:
  3697. - name: mem
  3698. - name: ipv4.sockstat_udplite_sockets
  3699. description: IPv4 UDPLITE Sockets
  3700. unit: "sockets"
  3701. chart_type: line
  3702. dimensions:
  3703. - name: inuse
  3704. - name: ipv4.sockstat_raw_sockets
  3705. description: IPv4 RAW Sockets
  3706. unit: "sockets"
  3707. chart_type: line
  3708. dimensions:
  3709. - name: inuse
  3710. - name: ipv4.sockstat_frag_sockets
  3711. description: IPv4 FRAG Sockets
  3712. unit: "fragments"
  3713. chart_type: line
  3714. dimensions:
  3715. - name: inuse
  3716. - name: ipv4.sockstat_frag_mem
  3717. description: IPv4 FRAG Sockets Memory
  3718. unit: "KiB"
  3719. chart_type: area
  3720. dimensions:
  3721. - name: mem
  3722. - meta:
  3723. plugin_name: proc.plugin
  3724. module_name: /proc/net/sockstat6
  3725. monitored_instance:
  3726. name: IPv6 Socket Statistics
  3727. link: ""
  3728. categories:
  3729. - data-collection.linux-systems.network-metrics
  3730. icon_filename: "network-wired.svg"
  3731. related_resources:
  3732. integrations:
  3733. list: []
  3734. info_provided_to_referring_integrations:
  3735. description: ""
  3736. keywords:
  3737. - ipv6 sockets
  3738. most_popular: false
  3739. overview:
  3740. data_collection:
  3741. metrics_description: "This integration provides IPv6 socket statistics."
  3742. method_description: ""
  3743. supported_platforms:
  3744. include: []
  3745. exclude: []
  3746. multi_instance: true
  3747. additional_permissions:
  3748. description: ""
  3749. default_behavior:
  3750. auto_detection:
  3751. description: ""
  3752. limits:
  3753. description: ""
  3754. performance_impact:
  3755. description: ""
  3756. setup:
  3757. prerequisites:
  3758. list: []
  3759. configuration:
  3760. file:
  3761. name: ""
  3762. description: ""
  3763. options:
  3764. description: ""
  3765. folding:
  3766. title: ""
  3767. enabled: true
  3768. list: []
  3769. examples:
  3770. folding:
  3771. enabled: true
  3772. title: ""
  3773. list: []
  3774. troubleshooting:
  3775. problems:
  3776. list: []
  3777. alerts: []
  3778. metrics:
  3779. folding:
  3780. title: Metrics
  3781. enabled: false
  3782. description: ""
  3783. availability: []
  3784. scopes:
  3785. - name: global
  3786. description: ""
  3787. labels: []
  3788. metrics:
  3789. - name: ipv6.sockstat6_tcp_sockets
  3790. description: IPv6 TCP Sockets
  3791. unit: "sockets"
  3792. chart_type: line
  3793. dimensions:
  3794. - name: inuse
  3795. - name: ipv6.sockstat6_udp_sockets
  3796. description: IPv6 UDP Sockets
  3797. unit: "sockets"
  3798. chart_type: line
  3799. dimensions:
  3800. - name: inuse
  3801. - name: ipv6.sockstat6_udplite_sockets
  3802. description: IPv6 UDPLITE Sockets
  3803. unit: "sockets"
  3804. chart_type: line
  3805. dimensions:
  3806. - name: inuse
  3807. - name: ipv6.sockstat6_raw_sockets
  3808. description: IPv6 RAW Sockets
  3809. unit: "sockets"
  3810. chart_type: line
  3811. dimensions:
  3812. - name: inuse
  3813. - name: ipv6.sockstat6_frag_sockets
  3814. description: IPv6 FRAG Sockets
  3815. unit: "fragments"
  3816. chart_type: line
  3817. dimensions:
  3818. - name: inuse
  3819. - meta:
  3820. plugin_name: proc.plugin
  3821. module_name: /proc/net/ip_vs_stats
  3822. monitored_instance:
  3823. name: IP Virtual Server
  3824. link: ""
  3825. categories:
  3826. - data-collection.linux-systems.network-metrics
  3827. icon_filename: "network-wired.svg"
  3828. related_resources:
  3829. integrations:
  3830. list: []
  3831. info_provided_to_referring_integrations:
  3832. description: ""
  3833. keywords:
  3834. - ip virtual server
  3835. most_popular: false
  3836. overview:
  3837. data_collection:
  3838. metrics_description: "This integration monitors IP Virtual Server statistics"
  3839. method_description: ""
  3840. supported_platforms:
  3841. include: []
  3842. exclude: []
  3843. multi_instance: true
  3844. additional_permissions:
  3845. description: ""
  3846. default_behavior:
  3847. auto_detection:
  3848. description: ""
  3849. limits:
  3850. description: ""
  3851. performance_impact:
  3852. description: ""
  3853. setup:
  3854. prerequisites:
  3855. list: []
  3856. configuration:
  3857. file:
  3858. name: ""
  3859. description: ""
  3860. options:
  3861. description: ""
  3862. folding:
  3863. title: ""
  3864. enabled: true
  3865. list: []
  3866. examples:
  3867. folding:
  3868. enabled: true
  3869. title: ""
  3870. list: []
  3871. troubleshooting:
  3872. problems:
  3873. list: []
  3874. alerts: []
  3875. metrics:
  3876. folding:
  3877. title: Metrics
  3878. enabled: false
  3879. description: ""
  3880. availability: []
  3881. scopes:
  3882. - name: global
  3883. description: ""
  3884. labels: []
  3885. metrics:
  3886. - name: ipvs.sockets
  3887. description: IPVS New Connections
  3888. unit: "connections/s"
  3889. chart_type: line
  3890. dimensions:
  3891. - name: connections
  3892. - name: ipvs.packets
  3893. description: IPVS Packets
  3894. unit: "packets/s"
  3895. chart_type: line
  3896. dimensions:
  3897. - name: received
  3898. - name: sent
  3899. - name: ipvs.net
  3900. description: IPVS Bandwidth
  3901. unit: "kilobits/s"
  3902. chart_type: area
  3903. dimensions:
  3904. - name: received
  3905. - name: sent
  3906. - meta:
  3907. plugin_name: proc.plugin
  3908. module_name: /proc/net/rpc/nfs
  3909. monitored_instance:
  3910. name: NFS Client
  3911. link: ""
  3912. categories:
  3913. - data-collection.linux-systems.filesystem-metrics.nfs
  3914. icon_filename: "nfs.png"
  3915. related_resources:
  3916. integrations:
  3917. list: []
  3918. info_provided_to_referring_integrations:
  3919. description: ""
  3920. keywords:
  3921. - nfs client
  3922. - filesystem
  3923. most_popular: false
  3924. overview:
  3925. data_collection:
  3926. metrics_description: "This integration provides statistics from the Linux kernel's NFS Client."
  3927. method_description: ""
  3928. supported_platforms:
  3929. include: []
  3930. exclude: []
  3931. multi_instance: true
  3932. additional_permissions:
  3933. description: ""
  3934. default_behavior:
  3935. auto_detection:
  3936. description: ""
  3937. limits:
  3938. description: ""
  3939. performance_impact:
  3940. description: ""
  3941. setup:
  3942. prerequisites:
  3943. list: []
  3944. configuration:
  3945. file:
  3946. name: ""
  3947. description: ""
  3948. options:
  3949. description: ""
  3950. folding:
  3951. title: ""
  3952. enabled: true
  3953. list: []
  3954. examples:
  3955. folding:
  3956. enabled: true
  3957. title: ""
  3958. list: []
  3959. troubleshooting:
  3960. problems:
  3961. list: []
  3962. alerts: []
  3963. metrics:
  3964. folding:
  3965. title: Metrics
  3966. enabled: false
  3967. description: ""
  3968. availability: []
  3969. scopes:
  3970. - name: global
  3971. description: ""
  3972. labels: []
  3973. metrics:
  3974. - name: nfs.net
  3975. description: NFS Client Network
  3976. unit: "operations/s"
  3977. chart_type: stacked
  3978. dimensions:
  3979. - name: udp
  3980. - name: tcp
  3981. - name: nfs.rpc
  3982. description: NFS Client Remote Procedure Calls Statistics
  3983. unit: "calls/s"
  3984. chart_type: line
  3985. dimensions:
  3986. - name: calls
  3987. - name: retransmits
  3988. - name: auth_refresh
  3989. - name: nfs.proc2
  3990. description: NFS v2 Client Remote Procedure Calls
  3991. unit: "calls/s"
  3992. chart_type: stacked
  3993. dimensions:
  3994. - name: a dimension per proc2 call
  3995. - name: nfs.proc3
  3996. description: NFS v3 Client Remote Procedure Calls
  3997. unit: "calls/s"
  3998. chart_type: stacked
  3999. dimensions:
  4000. - name: a dimension per proc3 call
  4001. - name: nfs.proc4
  4002. description: NFS v4 Client Remote Procedure Calls
  4003. unit: "calls/s"
  4004. chart_type: stacked
  4005. dimensions:
  4006. - name: a dimension per proc4 call
  4007. - meta:
  4008. plugin_name: proc.plugin
  4009. module_name: /proc/net/rpc/nfsd
  4010. monitored_instance:
  4011. name: NFS Server
  4012. link: ""
  4013. categories:
  4014. - data-collection.linux-systems.filesystem-metrics.nfs
  4015. icon_filename: "nfs.png"
  4016. related_resources:
  4017. integrations:
  4018. list: []
  4019. info_provided_to_referring_integrations:
  4020. description: ""
  4021. keywords:
  4022. - nfs server
  4023. - filesystem
  4024. most_popular: false
  4025. overview:
  4026. data_collection:
  4027. metrics_description: "This integration provides statistics from the Linux kernel's NFS Server."
  4028. method_description: ""
  4029. supported_platforms:
  4030. include: []
  4031. exclude: []
  4032. multi_instance: true
  4033. additional_permissions:
  4034. description: ""
  4035. default_behavior:
  4036. auto_detection:
  4037. description: ""
  4038. limits:
  4039. description: ""
  4040. performance_impact:
  4041. description: ""
  4042. setup:
  4043. prerequisites:
  4044. list: []
  4045. configuration:
  4046. file:
  4047. name: ""
  4048. description: ""
  4049. options:
  4050. description: ""
  4051. folding:
  4052. title: ""
  4053. enabled: true
  4054. list: []
  4055. examples:
  4056. folding:
  4057. enabled: true
  4058. title: ""
  4059. list: []
  4060. troubleshooting:
  4061. problems:
  4062. list: []
  4063. alerts: []
  4064. metrics:
  4065. folding:
  4066. title: Metrics
  4067. enabled: false
  4068. description: ""
  4069. availability: []
  4070. scopes:
  4071. - name: global
  4072. description: ""
  4073. labels: []
  4074. metrics:
  4075. - name: nfsd.readcache
  4076. description: NFS Server Read Cache
  4077. unit: "reads/s"
  4078. chart_type: stacked
  4079. dimensions:
  4080. - name: hits
  4081. - name: misses
  4082. - name: nocache
  4083. - name: nfsd.filehandles
  4084. description: NFS Server File Handles
  4085. unit: "handles/s"
  4086. chart_type: line
  4087. dimensions:
  4088. - name: stale
  4089. - name: nfsd.io
  4090. description: NFS Server I/O
  4091. unit: "kilobytes/s"
  4092. chart_type: area
  4093. dimensions:
  4094. - name: read
  4095. - name: write
  4096. - name: nfsd.threads
  4097. description: NFS Server Threads
  4098. unit: "threads"
  4099. chart_type: line
  4100. dimensions:
  4101. - name: threads
  4102. - name: nfsd.net
  4103. description: NFS Server Network Statistics
  4104. unit: "packets/s"
  4105. chart_type: line
  4106. dimensions:
  4107. - name: udp
  4108. - name: tcp
  4109. - name: nfsd.rpc
  4110. description: NFS Server Remote Procedure Calls Statistics
  4111. unit: "calls/s"
  4112. chart_type: line
  4113. dimensions:
  4114. - name: calls
  4115. - name: bad_format
  4116. - name: bad_auth
  4117. - name: nfsd.proc2
  4118. description: NFS v2 Server Remote Procedure Calls
  4119. unit: "calls/s"
  4120. chart_type: stacked
  4121. dimensions:
  4122. - name: a dimension per proc2 call
  4123. - name: nfsd.proc3
  4124. description: NFS v3 Server Remote Procedure Calls
  4125. unit: "calls/s"
  4126. chart_type: stacked
  4127. dimensions:
  4128. - name: a dimension per proc3 call
  4129. - name: nfsd.proc4
  4130. description: NFS v4 Server Remote Procedure Calls
  4131. unit: "calls/s"
  4132. chart_type: stacked
  4133. dimensions:
  4134. - name: a dimension per proc4 call
  4135. - name: nfsd.proc4ops
  4136. description: NFS v4 Server Operations
  4137. unit: "operations/s"
  4138. chart_type: stacked
  4139. dimensions:
  4140. - name: a dimension per proc4 operation
  4141. - meta:
  4142. plugin_name: proc.plugin
  4143. module_name: /proc/net/sctp/snmp
  4144. monitored_instance:
  4145. name: SCTP Statistics
  4146. link: ""
  4147. categories:
  4148. - data-collection.linux-systems.network-metrics
  4149. icon_filename: "network-wired.svg"
  4150. related_resources:
  4151. integrations:
  4152. list: []
  4153. info_provided_to_referring_integrations:
  4154. description: ""
  4155. keywords:
  4156. - sctp
  4157. - stream control transmission protocol
  4158. most_popular: false
  4159. overview:
  4160. data_collection:
  4161. metrics_description: "This integration provides statistics about the Stream Control Transmission Protocol (SCTP)."
  4162. method_description: ""
  4163. supported_platforms:
  4164. include: []
  4165. exclude: []
  4166. multi_instance: true
  4167. additional_permissions:
  4168. description: ""
  4169. default_behavior:
  4170. auto_detection:
  4171. description: ""
  4172. limits:
  4173. description: ""
  4174. performance_impact:
  4175. description: ""
  4176. setup:
  4177. prerequisites:
  4178. list: []
  4179. configuration:
  4180. file:
  4181. name: ""
  4182. description: ""
  4183. options:
  4184. description: ""
  4185. folding:
  4186. title: ""
  4187. enabled: true
  4188. list: []
  4189. examples:
  4190. folding:
  4191. enabled: true
  4192. title: ""
  4193. list: []
  4194. troubleshooting:
  4195. problems:
  4196. list: []
  4197. alerts: []
  4198. metrics:
  4199. folding:
  4200. title: Metrics
  4201. enabled: false
  4202. description: ""
  4203. availability: []
  4204. scopes:
  4205. - name: global
  4206. description: ""
  4207. labels: []
  4208. metrics:
  4209. - name: sctp.established
  4210. description: SCTP current total number of established associations
  4211. unit: "associations"
  4212. chart_type: line
  4213. dimensions:
  4214. - name: established
  4215. - name: sctp.transitions
  4216. description: SCTP Association Transitions
  4217. unit: "transitions/s"
  4218. chart_type: line
  4219. dimensions:
  4220. - name: active
  4221. - name: passive
  4222. - name: aborted
  4223. - name: shutdown
  4224. - name: sctp.packets
  4225. description: SCTP Packets
  4226. unit: "packets/s"
  4227. chart_type: line
  4228. dimensions:
  4229. - name: received
  4230. - name: sent
  4231. - name: sctp.packet_errors
  4232. description: SCTP Packet Errors
  4233. unit: "packets/s"
  4234. chart_type: line
  4235. dimensions:
  4236. - name: invalid
  4237. - name: checksum
  4238. - name: sctp.fragmentation
  4239. description: SCTP Fragmentation
  4240. unit: "packets/s"
  4241. chart_type: line
  4242. dimensions:
  4243. - name: reassembled
  4244. - name: fragmented
  4245. - meta:
  4246. plugin_name: proc.plugin
  4247. module_name: /proc/net/stat/nf_conntrack
  4248. monitored_instance:
  4249. name: Conntrack
  4250. link: ""
  4251. categories:
  4252. - data-collection.linux-systems.firewall-metrics
  4253. icon_filename: "firewall.svg"
  4254. related_resources:
  4255. integrations:
  4256. list: []
  4257. info_provided_to_referring_integrations:
  4258. description: ""
  4259. keywords:
  4260. - connection tracking mechanism
  4261. - netfilter
  4262. - conntrack
  4263. most_popular: false
  4264. overview:
  4265. data_collection:
  4266. metrics_description: "This integration monitors the connection tracking mechanism of Netfilter in the Linux Kernel."
  4267. method_description: ""
  4268. supported_platforms:
  4269. include: []
  4270. exclude: []
  4271. multi_instance: true
  4272. additional_permissions:
  4273. description: ""
  4274. default_behavior:
  4275. auto_detection:
  4276. description: ""
  4277. limits:
  4278. description: ""
  4279. performance_impact:
  4280. description: ""
  4281. setup:
  4282. prerequisites:
  4283. list: []
  4284. configuration:
  4285. file:
  4286. name: ""
  4287. description: ""
  4288. options:
  4289. description: ""
  4290. folding:
  4291. title: ""
  4292. enabled: true
  4293. list: []
  4294. examples:
  4295. folding:
  4296. enabled: true
  4297. title: ""
  4298. list: []
  4299. troubleshooting:
  4300. problems:
  4301. list: []
  4302. alerts:
  4303. - name: netfilter_conntrack_full
  4304. link: https://github.com/netdata/netdata/blob/master/health/health.d/netfilter.conf
  4305. metric: netfilter.conntrack_sockets
  4306. info: netfilter connection tracker table size utilization
  4307. os: "linux"
  4308. metrics:
  4309. folding:
  4310. title: Metrics
  4311. enabled: false
  4312. description: ""
  4313. availability: []
  4314. scopes:
  4315. - name: global
  4316. description: ""
  4317. labels: []
  4318. metrics:
  4319. - name: netfilter.conntrack_sockets
  4320. description: Connection Tracker Connections
  4321. unit: "active connections"
  4322. chart_type: line
  4323. dimensions:
  4324. - name: connections
  4325. - name: netfilter.conntrack_new
  4326. description: Connection Tracker New Connections
  4327. unit: "connections/s"
  4328. chart_type: line
  4329. dimensions:
  4330. - name: new
  4331. - name: ignore
  4332. - name: invalid
  4333. - name: netfilter.conntrack_changes
  4334. description: Connection Tracker Changes
  4335. unit: "changes/s"
  4336. chart_type: line
  4337. dimensions:
  4338. - name: inserted
  4339. - name: deleted
  4340. - name: delete_list
  4341. - name: netfilter.conntrack_expect
  4342. description: Connection Tracker Expectations
  4343. unit: "expectations/s"
  4344. chart_type: line
  4345. dimensions:
  4346. - name: created
  4347. - name: deleted
  4348. - name: new
  4349. - name: netfilter.conntrack_search
  4350. description: Connection Tracker Searches
  4351. unit: "searches/s"
  4352. chart_type: line
  4353. dimensions:
  4354. - name: searched
  4355. - name: restarted
  4356. - name: found
  4357. - name: netfilter.conntrack_errors
  4358. description: Connection Tracker Errors
  4359. unit: "events/s"
  4360. chart_type: line
  4361. dimensions:
  4362. - name: icmp_error
  4363. - name: error_failed
  4364. - name: drop
  4365. - name: early_drop
  4366. - meta:
  4367. plugin_name: proc.plugin
  4368. module_name: /proc/net/stat/synproxy
  4369. monitored_instance:
  4370. name: Synproxy
  4371. link: ""
  4372. categories:
  4373. - data-collection.linux-systems.firewall-metrics
  4374. icon_filename: "firewall.svg"
  4375. related_resources:
  4376. integrations:
  4377. list: []
  4378. info_provided_to_referring_integrations:
  4379. description: ""
  4380. keywords:
  4381. - synproxy
  4382. most_popular: false
  4383. overview:
  4384. data_collection:
  4385. metrics_description: "This integration provides statistics about the Synproxy netfilter module."
  4386. method_description: ""
  4387. supported_platforms:
  4388. include: []
  4389. exclude: []
  4390. multi_instance: true
  4391. additional_permissions:
  4392. description: ""
  4393. default_behavior:
  4394. auto_detection:
  4395. description: ""
  4396. limits:
  4397. description: ""
  4398. performance_impact:
  4399. description: ""
  4400. setup:
  4401. prerequisites:
  4402. list: []
  4403. configuration:
  4404. file:
  4405. name: ""
  4406. description: ""
  4407. options:
  4408. description: ""
  4409. folding:
  4410. title: ""
  4411. enabled: true
  4412. list: []
  4413. examples:
  4414. folding:
  4415. enabled: true
  4416. title: ""
  4417. list: []
  4418. troubleshooting:
  4419. problems:
  4420. list: []
  4421. alerts: []
  4422. metrics:
  4423. folding:
  4424. title: Metrics
  4425. enabled: false
  4426. description: ""
  4427. availability: []
  4428. scopes:
  4429. - name: global
  4430. description: ""
  4431. labels: []
  4432. metrics:
  4433. - name: netfilter.synproxy_syn_received
  4434. description: SYNPROXY SYN Packets received
  4435. unit: "packets/s"
  4436. chart_type: line
  4437. dimensions:
  4438. - name: received
  4439. - name: netfilter.synproxy_conn_reopened
  4440. description: SYNPROXY Connections Reopened
  4441. unit: "connections/s"
  4442. chart_type: line
  4443. dimensions:
  4444. - name: reopened
  4445. - name: netfilter.synproxy_cookies
  4446. description: SYNPROXY TCP Cookies
  4447. unit: "cookies/s"
  4448. chart_type: line
  4449. dimensions:
  4450. - name: valid
  4451. - name: invalid
  4452. - name: retransmits
  4453. - meta:
  4454. plugin_name: proc.plugin
  4455. module_name: /proc/spl/kstat/zfs
  4456. monitored_instance:
  4457. name: ZFS Pools
  4458. link: ""
  4459. categories:
  4460. - data-collection.linux-systems.filesystem-metrics.zfs
  4461. icon_filename: "filesystem.svg"
  4462. related_resources:
  4463. integrations:
  4464. list: []
  4465. info_provided_to_referring_integrations:
  4466. description: ""
  4467. keywords:
  4468. - zfs pools
  4469. - pools
  4470. - zfs
  4471. - filesystem
  4472. most_popular: false
  4473. overview:
  4474. data_collection:
  4475. metrics_description: "This integration provides metrics about the state of ZFS pools."
  4476. method_description: ""
  4477. supported_platforms:
  4478. include: []
  4479. exclude: []
  4480. multi_instance: true
  4481. additional_permissions:
  4482. description: ""
  4483. default_behavior:
  4484. auto_detection:
  4485. description: ""
  4486. limits:
  4487. description: ""
  4488. performance_impact:
  4489. description: ""
  4490. setup:
  4491. prerequisites:
  4492. list: []
  4493. configuration:
  4494. file:
  4495. name: ""
  4496. description: ""
  4497. options:
  4498. description: ""
  4499. folding:
  4500. title: ""
  4501. enabled: true
  4502. list: []
  4503. examples:
  4504. folding:
  4505. enabled: true
  4506. title: ""
  4507. list: []
  4508. troubleshooting:
  4509. problems:
  4510. list: []
  4511. alerts:
  4512. - name: zfs_pool_state_warn
  4513. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4514. metric: zfspool.state
  4515. info: ZFS pool ${label:pool} state is degraded
  4516. - name: zfs_pool_state_crit
  4517. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4518. metric: zfspool.state
  4519. info: ZFS pool ${label:pool} state is faulted or unavail
  4520. metrics:
  4521. folding:
  4522. title: Metrics
  4523. enabled: false
  4524. description: ""
  4525. availability: []
  4526. scopes:
  4527. - name: zfs pool
  4528. description: ""
  4529. labels:
  4530. - name: pool
  4531. description: TBD
  4532. metrics:
  4533. - name: zfspool.state
  4534. description: ZFS pool state
  4535. unit: "boolean"
  4536. chart_type: line
  4537. dimensions:
  4538. - name: online
  4539. - name: degraded
  4540. - name: faulted
  4541. - name: offline
  4542. - name: removed
  4543. - name: unavail
  4544. - name: suspended
  4545. - meta:
  4546. plugin_name: proc.plugin
  4547. module_name: /proc/spl/kstat/zfs/arcstats
  4548. monitored_instance:
  4549. name: ZFS Adaptive Replacement Cache
  4550. link: ""
  4551. categories:
  4552. - data-collection.linux-systems.filesystem-metrics.zfs
  4553. icon_filename: "filesystem.svg"
  4554. related_resources:
  4555. integrations:
  4556. list: []
  4557. info_provided_to_referring_integrations:
  4558. description: ""
  4559. keywords:
  4560. - zfs arc
  4561. - arc
  4562. - zfs
  4563. - filesystem
  4564. most_popular: false
  4565. overview:
  4566. data_collection:
  4567. metrics_description: "This integration monitors ZFS Adadptive Replacement Cache (ARC) statistics."
  4568. method_description: ""
  4569. supported_platforms:
  4570. include: []
  4571. exclude: []
  4572. multi_instance: true
  4573. additional_permissions:
  4574. description: ""
  4575. default_behavior:
  4576. auto_detection:
  4577. description: ""
  4578. limits:
  4579. description: ""
  4580. performance_impact:
  4581. description: ""
  4582. setup:
  4583. prerequisites:
  4584. list: []
  4585. configuration:
  4586. file:
  4587. name: ""
  4588. description: ""
  4589. options:
  4590. description: ""
  4591. folding:
  4592. title: ""
  4593. enabled: true
  4594. list: []
  4595. examples:
  4596. folding:
  4597. enabled: true
  4598. title: ""
  4599. list: []
  4600. troubleshooting:
  4601. problems:
  4602. list: []
  4603. alerts:
  4604. - name: zfs_memory_throttle
  4605. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4606. metric: zfs.memory_ops
  4607. info: number of times ZFS had to limit the ARC growth in the last 10 minutes
  4608. metrics:
  4609. folding:
  4610. title: Metrics
  4611. enabled: false
  4612. description: ""
  4613. availability: []
  4614. scopes:
  4615. - name: global
  4616. description: ""
  4617. labels: []
  4618. metrics:
  4619. - name: zfs.arc_size
  4620. description: ZFS ARC Size
  4621. unit: "MiB"
  4622. chart_type: area
  4623. dimensions:
  4624. - name: arcsz
  4625. - name: target
  4626. - name: min
  4627. - name: max
  4628. - name: zfs.l2_size
  4629. description: ZFS L2 ARC Size
  4630. unit: "MiB"
  4631. chart_type: area
  4632. dimensions:
  4633. - name: actual
  4634. - name: size
  4635. - name: zfs.reads
  4636. description: ZFS Reads
  4637. unit: "reads/s"
  4638. chart_type: area
  4639. dimensions:
  4640. - name: arc
  4641. - name: demand
  4642. - name: prefetch
  4643. - name: metadata
  4644. - name: l2
  4645. - name: zfs.bytes
  4646. description: ZFS ARC L2 Read/Write Rate
  4647. unit: "KiB/s"
  4648. chart_type: area
  4649. dimensions:
  4650. - name: read
  4651. - name: write
  4652. - name: zfs.hits
  4653. description: ZFS ARC Hits
  4654. unit: "percentage"
  4655. chart_type: stacked
  4656. dimensions:
  4657. - name: hits
  4658. - name: misses
  4659. - name: zfs.hits_rate
  4660. description: ZFS ARC Hits Rate
  4661. unit: "events/s"
  4662. chart_type: stacked
  4663. dimensions:
  4664. - name: hits
  4665. - name: misses
  4666. - name: zfs.dhits
  4667. description: ZFS Demand Hits
  4668. unit: "percentage"
  4669. chart_type: stacked
  4670. dimensions:
  4671. - name: hits
  4672. - name: misses
  4673. - name: zfs.dhits_rate
  4674. description: ZFS Demand Hits Rate
  4675. unit: "events/s"
  4676. chart_type: stacked
  4677. dimensions:
  4678. - name: hits
  4679. - name: misses
  4680. - name: zfs.phits
  4681. description: ZFS Prefetch Hits
  4682. unit: "percentage"
  4683. chart_type: stacked
  4684. dimensions:
  4685. - name: hits
  4686. - name: misses
  4687. - name: zfs.phits_rate
  4688. description: ZFS Prefetch Hits Rate
  4689. unit: "events/s"
  4690. chart_type: stacked
  4691. dimensions:
  4692. - name: hits
  4693. - name: misses
  4694. - name: zfs.mhits
  4695. description: ZFS Metadata Hits
  4696. unit: "percentage"
  4697. chart_type: stacked
  4698. dimensions:
  4699. - name: hits
  4700. - name: misses
  4701. - name: zfs.mhits_rate
  4702. description: ZFS Metadata Hits Rate
  4703. unit: "events/s"
  4704. chart_type: stacked
  4705. dimensions:
  4706. - name: hits
  4707. - name: misses
  4708. - name: zfs.l2hits
  4709. description: ZFS L2 Hits
  4710. unit: "percentage"
  4711. chart_type: stacked
  4712. dimensions:
  4713. - name: hits
  4714. - name: misses
  4715. - name: zfs.l2hits_rate
  4716. description: ZFS L2 Hits Rate
  4717. unit: "events/s"
  4718. chart_type: stacked
  4719. dimensions:
  4720. - name: hits
  4721. - name: misses
  4722. - name: zfs.list_hits
  4723. description: ZFS List Hits
  4724. unit: "hits/s"
  4725. chart_type: area
  4726. dimensions:
  4727. - name: mfu
  4728. - name: mfu_ghost
  4729. - name: mru
  4730. - name: mru_ghost
  4731. - name: zfs.arc_size_breakdown
  4732. description: ZFS ARC Size Breakdown
  4733. unit: "percentage"
  4734. chart_type: stacked
  4735. dimensions:
  4736. - name: recent
  4737. - name: frequent
  4738. - name: zfs.memory_ops
  4739. description: ZFS Memory Operations
  4740. unit: "operations/s"
  4741. chart_type: line
  4742. dimensions:
  4743. - name: direct
  4744. - name: throttled
  4745. - name: indirect
  4746. - name: zfs.important_ops
  4747. description: ZFS Important Operations
  4748. unit: "operations/s"
  4749. chart_type: line
  4750. dimensions:
  4751. - name: evict_skip
  4752. - name: deleted
  4753. - name: mutex_miss
  4754. - name: hash_collisions
  4755. - name: zfs.actual_hits
  4756. description: ZFS Actual Cache Hits
  4757. unit: "percentage"
  4758. chart_type: stacked
  4759. dimensions:
  4760. - name: hits
  4761. - name: misses
  4762. - name: zfs.actual_hits_rate
  4763. description: ZFS Actual Cache Hits Rate
  4764. unit: "events/s"
  4765. chart_type: stacked
  4766. dimensions:
  4767. - name: hits
  4768. - name: misses
  4769. - name: zfs.demand_data_hits
  4770. description: ZFS Data Demand Efficiency
  4771. unit: "percentage"
  4772. chart_type: stacked
  4773. dimensions:
  4774. - name: hits
  4775. - name: misses
  4776. - name: zfs.demand_data_hits_rate
  4777. description: ZFS Data Demand Efficiency Rate
  4778. unit: "events/s"
  4779. chart_type: stacked
  4780. dimensions:
  4781. - name: hits
  4782. - name: misses
  4783. - name: zfs.prefetch_data_hits
  4784. description: ZFS Data Prefetch Efficiency
  4785. unit: "percentage"
  4786. chart_type: stacked
  4787. dimensions:
  4788. - name: hits
  4789. - name: misses
  4790. - name: zfs.prefetch_data_hits_rate
  4791. description: ZFS Data Prefetch Efficiency Rate
  4792. unit: "events/s"
  4793. chart_type: stacked
  4794. dimensions:
  4795. - name: hits
  4796. - name: misses
  4797. - name: zfs.hash_elements
  4798. description: ZFS ARC Hash Elements
  4799. unit: "elements"
  4800. chart_type: line
  4801. dimensions:
  4802. - name: current
  4803. - name: max
  4804. - name: zfs.hash_chains
  4805. description: ZFS ARC Hash Chains
  4806. unit: "chains"
  4807. chart_type: line
  4808. dimensions:
  4809. - name: current
  4810. - name: max
  4811. - meta:
  4812. plugin_name: proc.plugin
  4813. module_name: /sys/fs/btrfs
  4814. monitored_instance:
  4815. name: BTRFS
  4816. link: ""
  4817. categories:
  4818. - data-collection.linux-systems.filesystem-metrics.btrfs
  4819. icon_filename: "filesystem.svg"
  4820. related_resources:
  4821. integrations:
  4822. list: []
  4823. info_provided_to_referring_integrations:
  4824. description: ""
  4825. keywords:
  4826. - btrfs
  4827. - filesystem
  4828. most_popular: false
  4829. overview:
  4830. data_collection:
  4831. metrics_description: "This integration provides usage and error statistics from the BTRFS filesystem."
  4832. method_description: ""
  4833. supported_platforms:
  4834. include: []
  4835. exclude: []
  4836. multi_instance: true
  4837. additional_permissions:
  4838. description: ""
  4839. default_behavior:
  4840. auto_detection:
  4841. description: ""
  4842. limits:
  4843. description: ""
  4844. performance_impact:
  4845. description: ""
  4846. setup:
  4847. prerequisites:
  4848. list: []
  4849. configuration:
  4850. file:
  4851. name: ""
  4852. description: ""
  4853. options:
  4854. description: ""
  4855. folding:
  4856. title: ""
  4857. enabled: true
  4858. list: []
  4859. examples:
  4860. folding:
  4861. enabled: true
  4862. title: ""
  4863. list: []
  4864. troubleshooting:
  4865. problems:
  4866. list: []
  4867. alerts:
  4868. - name: btrfs_allocated
  4869. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4870. metric: btrfs.disk
  4871. info: percentage of allocated BTRFS physical disk space
  4872. os: "*"
  4873. - name: btrfs_data
  4874. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4875. metric: btrfs.data
  4876. info: utilization of BTRFS data space
  4877. os: "*"
  4878. - name: btrfs_metadata
  4879. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4880. metric: btrfs.metadata
  4881. info: utilization of BTRFS metadata space
  4882. os: "*"
  4883. - name: btrfs_system
  4884. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4885. metric: btrfs.system
  4886. info: utilization of BTRFS system space
  4887. os: "*"
  4888. - name: btrfs_device_read_errors
  4889. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4890. metric: btrfs.device_errors
  4891. info: number of encountered BTRFS read errors
  4892. os: "*"
  4893. - name: btrfs_device_write_errors
  4894. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4895. metric: btrfs.device_errors
  4896. info: number of encountered BTRFS write errors
  4897. os: "*"
  4898. - name: btrfs_device_flush_errors
  4899. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4900. metric: btrfs.device_errors
  4901. info: number of encountered BTRFS flush errors
  4902. os: "*"
  4903. - name: btrfs_device_corruption_errors
  4904. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4905. metric: btrfs.device_errors
  4906. info: number of encountered BTRFS corruption errors
  4907. os: "*"
  4908. - name: btrfs_device_generation_errors
  4909. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4910. metric: btrfs.device_errors
  4911. info: number of encountered BTRFS generation errors
  4912. os: "*"
  4913. metrics:
  4914. folding:
  4915. title: Metrics
  4916. enabled: false
  4917. description: ""
  4918. availability: []
  4919. scopes:
  4920. - name: btrfs filesystem
  4921. description: ""
  4922. labels:
  4923. - name: filesystem_uuid
  4924. description: TBD
  4925. - name: filesystem_label
  4926. description: TBD
  4927. metrics:
  4928. - name: btrfs.disk
  4929. description: BTRFS Physical Disk Allocation
  4930. unit: "MiB"
  4931. chart_type: stacked
  4932. dimensions:
  4933. - name: unallocated
  4934. - name: data_free
  4935. - name: data_used
  4936. - name: meta_free
  4937. - name: meta_used
  4938. - name: sys_free
  4939. - name: sys_used
  4940. - name: btrfs.data
  4941. description: BTRFS Data Allocation
  4942. unit: "MiB"
  4943. chart_type: stacked
  4944. dimensions:
  4945. - name: free
  4946. - name: used
  4947. - name: btrfs.metadata
  4948. description: BTRFS Metadata Allocation
  4949. unit: "MiB"
  4950. chart_type: stacked
  4951. dimensions:
  4952. - name: free
  4953. - name: used
  4954. - name: reserved
  4955. - name: btrfs.system
  4956. description: BTRFS System Allocation
  4957. unit: "MiB"
  4958. chart_type: stacked
  4959. dimensions:
  4960. - name: free
  4961. - name: used
  4962. - name: btrfs.commits
  4963. description: BTRFS Commits
  4964. unit: "commits"
  4965. chart_type: line
  4966. dimensions:
  4967. - name: commits
  4968. - name: btrfs.commits_perc_time
  4969. description: BTRFS Commits Time Share
  4970. unit: "percentage"
  4971. chart_type: line
  4972. dimensions:
  4973. - name: commits
  4974. - name: btrfs.commit_timings
  4975. description: BTRFS Commit Timings
  4976. unit: "ms"
  4977. chart_type: line
  4978. dimensions:
  4979. - name: last
  4980. - name: max
  4981. - name: btrfs device
  4982. description: ""
  4983. labels:
  4984. - name: device_id
  4985. description: TBD
  4986. - name: filesystem_uuid
  4987. description: TBD
  4988. - name: filesystem_label
  4989. description: TBD
  4990. metrics:
  4991. - name: btrfs.device_errors
  4992. description: BTRFS Device Errors
  4993. unit: "errors"
  4994. chart_type: line
  4995. dimensions:
  4996. - name: write_errs
  4997. - name: read_errs
  4998. - name: flush_errs
  4999. - name: corruption_errs
  5000. - name: generation_errs
  5001. - meta:
  5002. plugin_name: proc.plugin
  5003. module_name: /sys/class/power_supply
  5004. monitored_instance:
  5005. name: Power Supply
  5006. link: ""
  5007. categories:
  5008. - data-collection.linux-systems.power-supply-metrics
  5009. icon_filename: "powersupply.svg"
  5010. related_resources:
  5011. integrations:
  5012. list: []
  5013. info_provided_to_referring_integrations:
  5014. description: ""
  5015. keywords:
  5016. - psu
  5017. - power supply
  5018. most_popular: false
  5019. overview:
  5020. data_collection:
  5021. metrics_description: "This integration monitors Power supply metrics, such as battery status, AC power status and more."
  5022. method_description: ""
  5023. supported_platforms:
  5024. include: []
  5025. exclude: []
  5026. multi_instance: true
  5027. additional_permissions:
  5028. description: ""
  5029. default_behavior:
  5030. auto_detection:
  5031. description: ""
  5032. limits:
  5033. description: ""
  5034. performance_impact:
  5035. description: ""
  5036. setup:
  5037. prerequisites:
  5038. list: []
  5039. configuration:
  5040. file:
  5041. name: ""
  5042. description: ""
  5043. options:
  5044. description: ""
  5045. folding:
  5046. title: ""
  5047. enabled: true
  5048. list: []
  5049. examples:
  5050. folding:
  5051. enabled: true
  5052. title: ""
  5053. list: []
  5054. troubleshooting:
  5055. problems:
  5056. list: []
  5057. alerts:
  5058. - name: linux_power_supply_capacity
  5059. link: https://github.com/netdata/netdata/blob/master/health/health.d/linux_power_supply.conf
  5060. metric: powersupply.capacity
  5061. info: percentage of remaining power supply capacity
  5062. metrics:
  5063. folding:
  5064. title: Metrics
  5065. enabled: false
  5066. description: ""
  5067. availability: []
  5068. scopes:
  5069. - name: power device
  5070. description: ""
  5071. labels:
  5072. - name: device
  5073. description: TBD
  5074. metrics:
  5075. - name: powersupply.capacity
  5076. description: Battery capacity
  5077. unit: "percentage"
  5078. chart_type: line
  5079. dimensions:
  5080. - name: capacity
  5081. - name: powersupply.charge
  5082. description: Battery charge
  5083. unit: "Ah"
  5084. chart_type: line
  5085. dimensions:
  5086. - name: empty_design
  5087. - name: empty
  5088. - name: now
  5089. - name: full
  5090. - name: full_design
  5091. - name: powersupply.energy
  5092. description: Battery energy
  5093. unit: "Wh"
  5094. chart_type: line
  5095. dimensions:
  5096. - name: empty_design
  5097. - name: empty
  5098. - name: now
  5099. - name: full
  5100. - name: full_design
  5101. - name: powersupply.voltage
  5102. description: Power supply voltage
  5103. unit: "V"
  5104. chart_type: line
  5105. dimensions:
  5106. - name: min_design
  5107. - name: min
  5108. - name: now
  5109. - name: max
  5110. - name: max_design
  5111. - meta:
  5112. plugin_name: proc.plugin
  5113. module_name: /sys/class/drm
  5114. monitored_instance:
  5115. name: AMD GPU
  5116. link: "https://www.amd.com"
  5117. categories:
  5118. - data-collection.hardware-devices-and-sensors
  5119. icon_filename: amd.svg
  5120. related_resources:
  5121. integrations:
  5122. list: []
  5123. info_provided_to_referring_integrations:
  5124. description: ""
  5125. keywords:
  5126. - amd
  5127. - gpu
  5128. - hardware
  5129. most_popular: false
  5130. overview:
  5131. data_collection:
  5132. metrics_description: "This integration monitors AMD GPU metrics, such as utilization, clock frequency and memory usage."
  5133. method_description: "It reads `/sys/class/drm` to collect metrics for every AMD GPU card instance it encounters."
  5134. supported_platforms:
  5135. include:
  5136. - Linux
  5137. exclude: []
  5138. multi_instance: true
  5139. additional_permissions:
  5140. description: ""
  5141. default_behavior:
  5142. auto_detection:
  5143. description: ""
  5144. limits:
  5145. description: ""
  5146. performance_impact:
  5147. description: ""
  5148. setup:
  5149. prerequisites:
  5150. list: []
  5151. configuration:
  5152. file:
  5153. name: ""
  5154. description: ""
  5155. options:
  5156. description: ""
  5157. folding:
  5158. title: ""
  5159. enabled: true
  5160. list: []
  5161. examples:
  5162. folding:
  5163. enabled: true
  5164. title: ""
  5165. list: []
  5166. troubleshooting:
  5167. problems:
  5168. list: []
  5169. alerts: []
  5170. metrics:
  5171. folding:
  5172. title: Metrics
  5173. enabled: false
  5174. description: ""
  5175. availability: []
  5176. scopes:
  5177. - name: gpu
  5178. description: "These metrics refer to the GPU."
  5179. labels:
  5180. - name: product_name
  5181. description: GPU product name (e.g. AMD RX 6600)
  5182. metrics:
  5183. - name: amdgpu.gpu_utilization
  5184. description: GPU utilization
  5185. unit: "percentage"
  5186. chart_type: line
  5187. dimensions:
  5188. - name: utilization
  5189. - name: amdgpu.gpu_mem_utilization
  5190. description: GPU memory utilization
  5191. unit: "percentage"
  5192. chart_type: line
  5193. dimensions:
  5194. - name: utilization
  5195. - name: amdgpu.gpu_clk_frequency
  5196. description: GPU clock frequency
  5197. unit: "MHz"
  5198. chart_type: line
  5199. dimensions:
  5200. - name: frequency
  5201. - name: amdgpu.gpu_mem_clk_frequency
  5202. description: GPU memory clock frequency
  5203. unit: "MHz"
  5204. chart_type: line
  5205. dimensions:
  5206. - name: frequency
  5207. - name: amdgpu.gpu_mem_vram_usage_perc
  5208. description: VRAM memory usage percentage
  5209. unit: "percentage"
  5210. chart_type: line
  5211. dimensions:
  5212. - name: usage
  5213. - name: amdgpu.gpu_mem_vram_usage
  5214. description: VRAM memory usage
  5215. unit: "bytes"
  5216. chart_type: area
  5217. dimensions:
  5218. - name: free
  5219. - name: used
  5220. - name: amdgpu.gpu_mem_vis_vram_usage_perc
  5221. description: visible VRAM memory usage percentage
  5222. unit: "percentage"
  5223. chart_type: line
  5224. dimensions:
  5225. - name: usage
  5226. - name: amdgpu.gpu_mem_vis_vram_usage
  5227. description: visible VRAM memory usage
  5228. unit: "bytes"
  5229. chart_type: area
  5230. dimensions:
  5231. - name: free
  5232. - name: used
  5233. - name: amdgpu.gpu_mem_gtt_usage_perc
  5234. description: GTT memory usage percentage
  5235. unit: "percentage"
  5236. chart_type: line
  5237. dimensions:
  5238. - name: usage
  5239. - name: amdgpu.gpu_mem_gtt_usage
  5240. description: GTT memory usage
  5241. unit: "bytes"
  5242. chart_type: area
  5243. dimensions:
  5244. - name: free
  5245. - name: used